Fix: Move pagerank field from create to update dataset API (#8217)

### What problem does this PR solve?

- Remove pagerank from CreateDatasetReq and add to UpdateDatasetReq
- Add pagerank update logic in dataset update endpoint
- Update API documentation to reflect changes
- Modify related test cases and SDK references

#8208

This change makes pagerank a mutable property that can only be set after
dataset creation, and only when using elasticsearch as the doc engine.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Liu An
2025-06-12 15:47:49 +08:00
committed by GitHub
parent d0c5ff04a6
commit 7fbbc9650d
9 changed files with 78 additions and 109 deletions

View File

@ -344,49 +344,6 @@ class TestDatasetCreate:
client.create_dataset(**payload)
assert "not instance of" in str(excinfo.value), str(excinfo.value)
@pytest.mark.p2
@pytest.mark.parametrize(
"name, pagerank",
[
("pagerank_min", 0),
("pagerank_mid", 50),
("pagerank_max", 100),
],
ids=["min", "mid", "max"],
)
def test_pagerank(self, client, name, pagerank):
payload = {"name": name, "pagerank": pagerank}
dataset = client.create_dataset(**payload)
assert dataset.pagerank == pagerank, str(dataset)
@pytest.mark.p3
@pytest.mark.parametrize(
"name, pagerank, expected_message",
[
("pagerank_min_limit", -1, "Input should be greater than or equal to 0"),
("pagerank_max_limit", 101, "Input should be less than or equal to 100"),
],
ids=["min_limit", "max_limit"],
)
def test_pagerank_invalid(self, client, name, pagerank, expected_message):
payload = {"name": name, "pagerank": pagerank}
with pytest.raises(Exception) as excinfo:
client.create_dataset(**payload)
assert expected_message in str(excinfo.value), str(excinfo.value)
@pytest.mark.p3
def test_pagerank_unset(self, client):
payload = {"name": "pagerank_unset"}
dataset = client.create_dataset(**payload)
assert dataset.pagerank == 0, str(dataset)
@pytest.mark.p3
def test_pagerank_none(self, client):
payload = {"name": "pagerank_unset", "pagerank": None}
with pytest.raises(Exception) as excinfo:
client.create_dataset(**payload)
assert "not instance of" in str(excinfo.value), str(excinfo.value)
@pytest.mark.p1
@pytest.mark.parametrize(
"name, parser_config",
@ -689,6 +646,7 @@ class TestDatasetCreate:
{"name": "chunk_count", "chunk_count": 1},
{"name": "token_num", "token_num": 1},
{"name": "status", "status": "1"},
{"name": "pagerank", "pagerank": 50},
{"name": "unknown_field", "unknown_field": "unknown_field"},
],
)

View File

@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from operator import attrgetter
@ -324,6 +325,7 @@ class TestDatasetUpdate:
dataset.update({"chunk_method": None})
assert "Input should be 'naive', 'book', 'email', 'laws', 'manual', 'one', 'paper', 'picture', 'presentation', 'qa', 'table' or 'tag'" in str(excinfo.value), str(excinfo.value)
@pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="#8208")
@pytest.mark.p2
@pytest.mark.parametrize("pagerank", [0, 50, 100], ids=["min", "mid", "max"])
def test_pagerank(self, client, add_dataset_func, pagerank):
@ -334,6 +336,30 @@ class TestDatasetUpdate:
retrieved_dataset = client.get_dataset(name=dataset.name)
assert retrieved_dataset.pagerank == pagerank, str(retrieved_dataset)
@pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="#8208")
@pytest.mark.p2
def test_pagerank_set_to_0(self, client, add_dataset_func):
dataset = add_dataset_func
dataset.update({"pagerank": 50})
assert dataset.pagerank == 50, str(dataset)
retrieved_dataset = client.get_dataset(name=dataset.name)
assert retrieved_dataset.pagerank == 50, str(retrieved_dataset)
dataset.update({"pagerank": 0})
assert dataset.pagerank == 0, str(dataset)
retrieved_dataset = client.get_dataset(name=dataset.name)
assert retrieved_dataset.pagerank == 0, str(retrieved_dataset)
@pytest.mark.skipif(os.getenv("DOC_ENGINE") != "infinity", reason="#8208")
@pytest.mark.p2
def test_pagerank_infinity(self, client, add_dataset_func):
dataset = add_dataset_func
with pytest.raises(Exception) as excinfo:
dataset.update({"pagerank": 50})
assert "'pagerank' can only be set when doc_engine is elasticsearch" in str(excinfo.value), str(excinfo.value)
@pytest.mark.p2
@pytest.mark.parametrize(
"pagerank, expected_message",