Fix: Move pagerank field from create to update dataset API (#8217)

### What problem does this PR solve?

- Remove pagerank from CreateDatasetReq and add to UpdateDatasetReq
- Add pagerank update logic in dataset update endpoint
- Update API documentation to reflect changes
- Modify related test cases and SDK references

#8208

This change makes pagerank a mutable property that can only be set after
dataset creation, and only when using elasticsearch as the doc engine.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Liu An
2025-06-12 15:47:49 +08:00
committed by GitHub
parent d0c5ff04a6
commit 7fbbc9650d
9 changed files with 78 additions and 109 deletions

View File

@ -394,51 +394,6 @@ class TestDatasetCreate:
assert res["code"] == 101, res
assert "Input should be 'naive', 'book', 'email', 'laws', 'manual', 'one', 'paper', 'picture', 'presentation', 'qa', 'table' or 'tag'" in res["message"], res
@pytest.mark.p2
@pytest.mark.parametrize(
"name, pagerank",
[
("pagerank_min", 0),
("pagerank_mid", 50),
("pagerank_max", 100),
],
ids=["min", "mid", "max"],
)
def test_pagerank(self, HttpApiAuth, name, pagerank):
payload = {"name": name, "pagerank": pagerank}
res = create_dataset(HttpApiAuth, payload)
assert res["code"] == 0, res
assert res["data"]["pagerank"] == pagerank, res
@pytest.mark.p3
@pytest.mark.parametrize(
"name, pagerank, expected_message",
[
("pagerank_min_limit", -1, "Input should be greater than or equal to 0"),
("pagerank_max_limit", 101, "Input should be less than or equal to 100"),
],
ids=["min_limit", "max_limit"],
)
def test_pagerank_invalid(self, HttpApiAuth, name, pagerank, expected_message):
payload = {"name": name, "pagerank": pagerank}
res = create_dataset(HttpApiAuth, payload)
assert res["code"] == 101, res
assert expected_message in res["message"], res
@pytest.mark.p3
def test_pagerank_unset(self, HttpApiAuth):
payload = {"name": "pagerank_unset"}
res = create_dataset(HttpApiAuth, payload)
assert res["code"] == 0, res
assert res["data"]["pagerank"] == 0, res
@pytest.mark.p3
def test_pagerank_none(self, HttpApiAuth):
payload = {"name": "pagerank_unset", "pagerank": None}
res = create_dataset(HttpApiAuth, payload)
assert res["code"] == 101, res
assert "Input should be a valid integer" in res["message"], res
@pytest.mark.p1
@pytest.mark.parametrize(
"name, parser_config",
@ -730,6 +685,7 @@ class TestDatasetCreate:
{"name": "chunk_count", "chunk_count": 1},
{"name": "token_num", "token_num": 1},
{"name": "status", "status": "1"},
{"name": "pagerank", "pagerank": 50},
{"name": "unknown_field", "unknown_field": "unknown_field"},
],
)

View File

@ -13,11 +13,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import os
import uuid
from concurrent.futures import ThreadPoolExecutor, as_completed
import pytest
from common import DATASET_NAME_LIMIT, INVALID_API_TOKEN, list_datasets, update_dataset
from common import list_datasets, update_dataset
from configs import DATASET_NAME_LIMIT, INVALID_API_TOKEN
from hypothesis import HealthCheck, example, given, settings
from libs.auth import RAGFlowHttpApiAuth
from utils import encode_avatar
@ -155,10 +157,10 @@ class TestDatasetUpdate:
@pytest.mark.p3
def test_name_duplicated(self, HttpApiAuth, add_datasets_func):
dataset_ids = add_datasets_func[0]
dataset_id = add_datasets_func[0]
name = "dataset_1"
payload = {"name": name}
res = update_dataset(HttpApiAuth, dataset_ids, payload)
res = update_dataset(HttpApiAuth, dataset_id, payload)
assert res["code"] == 102, res
assert res["message"] == f"Dataset name '{name}' already exists", res
@ -425,6 +427,7 @@ class TestDatasetUpdate:
assert res["code"] == 101, res
assert "Input should be 'naive', 'book', 'email', 'laws', 'manual', 'one', 'paper', 'picture', 'presentation', 'qa', 'table' or 'tag'" in res["message"], res
@pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="#8208")
@pytest.mark.p2
@pytest.mark.parametrize("pagerank", [0, 50, 100], ids=["min", "mid", "max"])
def test_pagerank(self, HttpApiAuth, add_dataset_func, pagerank):
@ -437,6 +440,35 @@ class TestDatasetUpdate:
assert res["code"] == 0, res
assert res["data"][0]["pagerank"] == pagerank
@pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="#8208")
@pytest.mark.p2
def test_pagerank_set_to_0(self, HttpApiAuth, add_dataset_func):
dataset_id = add_dataset_func
payload = {"pagerank": 50}
res = update_dataset(HttpApiAuth, dataset_id, payload)
assert res["code"] == 0, res
res = list_datasets(HttpApiAuth, {"id": dataset_id})
assert res["code"] == 0, res
assert res["data"][0]["pagerank"] == 50, res
payload = {"pagerank": 0}
res = update_dataset(HttpApiAuth, dataset_id, payload)
assert res["code"] == 0
res = list_datasets(HttpApiAuth, {"id": dataset_id})
assert res["code"] == 0, res
assert res["data"][0]["pagerank"] == 0, res
@pytest.mark.skipif(os.getenv("DOC_ENGINE") != "infinity", reason="#8208")
@pytest.mark.p2
def test_pagerank_infinity(self, HttpApiAuth, add_dataset_func):
dataset_id = add_dataset_func
payload = {"pagerank": 50}
res = update_dataset(HttpApiAuth, dataset_id, payload)
assert res["code"] == 101, res
assert res["message"] == "'pagerank' can only be set when doc_engine is elasticsearch", res
@pytest.mark.p2
@pytest.mark.parametrize(
"pagerank, expected_message",