Refa: HTTP API delete dataset / test cases / docs (#7657)

### What problem does this PR solve? This PR introduces Pydantic-based validation for the delete dataset HTTP API, improving code clarity and robustness. Key changes include: 1. Pydantic Validation 2. Error Handling 3. Test Updates 4. Documentation Updates ### Type of change - [x] Documentation Update - [x] Refactoring
2026-01-31 23:55:06 +08:00 · 2025-05-16 10:16:43 +08:00
parent 0e9ff8c1f7
commit ae8b628f0a
8 changed files with 341 additions and 173 deletions
--- a/sdk/python/test/test_http_api/conftest.py
+++ b/sdk/python/test/test_http_api/conftest.py
@ -76,7 +76,7 @@ def condition(_auth, _dataset_id):
@pytest.fixture(scope="function")
 def clear_datasets(request, get_http_api_auth):
    def cleanup():
-        delete_datasets(get_http_api_auth)
+        delete_datasets(get_http_api_auth, {"ids": None})

    request.addfinalizer(cleanup)

@ -132,7 +132,7 @@ def ragflow_tmp_dir(request, tmp_path_factory):
@pytest.fixture(scope="class")
 def add_dataset(request, get_http_api_auth):
    def cleanup():
-        delete_datasets(get_http_api_auth)
+        delete_datasets(get_http_api_auth, {"ids": None})

    request.addfinalizer(cleanup)

@ -143,12 +143,11 @@ def add_dataset(request, get_http_api_auth):
@pytest.fixture(scope="function")
 def add_dataset_func(request, get_http_api_auth):
    def cleanup():
-        delete_datasets(get_http_api_auth)
+        delete_datasets(get_http_api_auth, {"ids": None})

    request.addfinalizer(cleanup)

-    dataset_ids = batch_create_datasets(get_http_api_auth, 1)
-    return dataset_ids[0]
+    return batch_create_datasets(get_http_api_auth, 1)[0]


@pytest.fixture(scope="class")
--- a/sdk/python/test/test_http_api/test_dataset_mangement/conftest.py
+++ b/sdk/python/test/test_http_api/test_dataset_mangement/conftest.py
@ -22,7 +22,7 @@ from common import batch_create_datasets, delete_datasets
@pytest.fixture(scope="class")
 def add_datasets(get_http_api_auth, request):
    def cleanup():
-        delete_datasets(get_http_api_auth)
+        delete_datasets(get_http_api_auth, {"ids": None})

    request.addfinalizer(cleanup)

@ -32,18 +32,8 @@ def add_datasets(get_http_api_auth, request):
@pytest.fixture(scope="function")
 def add_datasets_func(get_http_api_auth, request):
    def cleanup():
-        delete_datasets(get_http_api_auth)
+        delete_datasets(get_http_api_auth, {"ids": None})

    request.addfinalizer(cleanup)

    return batch_create_datasets(get_http_api_auth, 3)
-
-
-@pytest.fixture(scope="function")
-def add_dataset_func(get_http_api_auth, request):
-    def cleanup():
-        delete_datasets(get_http_api_auth)
-
-    request.addfinalizer(cleanup)
-
-    return batch_create_datasets(get_http_api_auth, 1)[0]
--- a/sdk/python/test/test_http_api/test_dataset_mangement/test_delete_datasets.py
+++ b/sdk/python/test/test_http_api/test_dataset_mangement/test_delete_datasets.py
@ -25,8 +25,8 @@ from common import (
 from libs.auth import RAGFlowHttpApiAuth


-@pytest.mark.p1
 class TestAuthorization:
+    @pytest.mark.p1
    @pytest.mark.parametrize(
        "auth, expected_code, expected_message",
        [
@ -38,104 +38,173 @@ class TestAuthorization:
            ),
        ],
    )
-    def test_invalid_auth(self, auth, expected_code, expected_message):
+    def test_auth_invalid(self, auth, expected_code, expected_message):
        res = delete_datasets(auth)
        assert res["code"] == expected_code
        assert res["message"] == expected_message


-class TestDatasetsDeletion:
-    @pytest.mark.p1
+class TestRquest:
+    @pytest.mark.p3
+    def test_content_type_bad(self, get_http_api_auth):
+        BAD_CONTENT_TYPE = "text/xml"
+        res = delete_datasets(get_http_api_auth, headers={"Content-Type": BAD_CONTENT_TYPE})
+        assert res["code"] == 101, res
+        assert res["message"] == f"Unsupported content type: Expected application/json, got {BAD_CONTENT_TYPE}", res
+
+    @pytest.mark.p3
    @pytest.mark.parametrize(
-        "payload, expected_code, expected_message, remaining",
+        "payload, expected_message",
        [
-            (None, 0, "", 0),
-            ({"ids": []}, 0, "", 0),
-            ({"ids": ["invalid_id"]}, 102, "You don't own the dataset invalid_id", 3),
-            (
-                {"ids": ["\n!?。；！？\"'"]},
-                102,
-                "You don't own the dataset \n!?。；！？\"'",
-                3,
-            ),
-            (
-                "not json",
-                100,
-                "AttributeError(\"'str' object has no attribute 'get'\")",
-                3,
-            ),
-            (lambda r: {"ids": r[:1]}, 0, "", 2),
-            (lambda r: {"ids": r}, 0, "", 0),
+            ("a", "Malformed JSON syntax: Missing commas/brackets or invalid encoding"),
+            ('"a"', "Invalid request payload: expected object, got str"),
        ],
+        ids=["malformed_json_syntax", "invalid_request_payload_type"],
    )
-    def test_basic_scenarios(self, get_http_api_auth, add_datasets_func, payload, expected_code, expected_message, remaining):
-        dataset_ids = add_datasets_func
-        if callable(payload):
-            payload = payload(dataset_ids)
-        res = delete_datasets(get_http_api_auth, payload)
-        assert res["code"] == expected_code
-        if res["code"] != 0:
-            assert res["message"] == expected_message
+    def test_payload_bad(self, get_http_api_auth, payload, expected_message):
+        res = delete_datasets(get_http_api_auth, data=payload)
+        assert res["code"] == 101, res
+        assert res["message"] == expected_message, res
+
+    @pytest.mark.p3
+    def test_payload_unset(self, get_http_api_auth):
+        res = delete_datasets(get_http_api_auth, None)
+        assert res["code"] == 101, res
+        assert res["message"] == "Malformed JSON syntax: Missing commas/brackets or invalid encoding", res
+
+
+class TestCapability:
+    @pytest.mark.p3
+    def test_delete_dataset_1k(self, get_http_api_auth):
+        ids = batch_create_datasets(get_http_api_auth, 1_000)
+        res = delete_datasets(get_http_api_auth, {"ids": ids})
+        assert res["code"] == 0, res

        res = list_datasets(get_http_api_auth)
-        assert len(res["data"]) == remaining
-
-    @pytest.mark.p2
-    @pytest.mark.parametrize(
-        "payload",
-        [
-            lambda r: {"ids": ["invalid_id"] + r},
-            lambda r: {"ids": r[:1] + ["invalid_id"] + r[1:3]},
-            lambda r: {"ids": r + ["invalid_id"]},
-        ],
-    )
-    def test_delete_partial_invalid_id(self, get_http_api_auth, add_datasets_func, payload):
-        dataset_ids = add_datasets_func
-        if callable(payload):
-            payload = payload(dataset_ids)
-        res = delete_datasets(get_http_api_auth, payload)
-        assert res["code"] == 0
-        assert res["data"]["errors"][0] == "You don't own the dataset invalid_id"
-        assert res["data"]["success_count"] == 3
-
-        res = list_datasets(get_http_api_auth)
-        assert len(res["data"]) == 0
-
-    @pytest.mark.p2
-    def test_repeated_deletion(self, get_http_api_auth, add_datasets_func):
-        dataset_ids = add_datasets_func
-        res = delete_datasets(get_http_api_auth, {"ids": dataset_ids})
-        assert res["code"] == 0
-
-        res = delete_datasets(get_http_api_auth, {"ids": dataset_ids})
-        assert res["code"] == 102
-        assert "You don't own the dataset" in res["message"]
-
-    @pytest.mark.p2
-    def test_duplicate_deletion(self, get_http_api_auth, add_datasets_func):
-        dataset_ids = add_datasets_func
-        res = delete_datasets(get_http_api_auth, {"ids": dataset_ids + dataset_ids})
-        assert res["code"] == 0
-        assert "Duplicate dataset ids" in res["data"]["errors"][0]
-        assert res["data"]["success_count"] == 3
-
-        res = list_datasets(get_http_api_auth)
-        assert len(res["data"]) == 0
+        assert len(res["data"]) == 0, res

    @pytest.mark.p3
    def test_concurrent_deletion(self, get_http_api_auth):
-        ids = batch_create_datasets(get_http_api_auth, 100)
+        dataset_num = 1_000
+        ids = batch_create_datasets(get_http_api_auth, dataset_num)

        with ThreadPoolExecutor(max_workers=5) as executor:
-            futures = [executor.submit(delete_datasets, get_http_api_auth, {"ids": ids[i : i + 1]}) for i in range(100)]
+            futures = [executor.submit(delete_datasets, get_http_api_auth, {"ids": ids[i : i + 1]}) for i in range(dataset_num)]
        responses = [f.result() for f in futures]
-        assert all(r["code"] == 0 for r in responses)
+        assert all(r["code"] == 0 for r in responses), responses

-    @pytest.mark.p3
-    def test_delete_10k(self, get_http_api_auth):
-        ids = batch_create_datasets(get_http_api_auth, 10_000)
-        res = delete_datasets(get_http_api_auth, {"ids": ids})
-        assert res["code"] == 0
+
+class TestDatasetsDelete:
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "func, expected_code, expected_message, remaining",
+        [
+            (lambda r: {"ids": r[:1]}, 0, "", 2),
+            (lambda r: {"ids": r}, 0, "", 0),
+        ],
+        ids=["single_dataset", "multiple_datasets"],
+    )
+    def test_ids(self, get_http_api_auth, add_datasets_func, func, expected_code, expected_message, remaining):
+        dataset_ids = add_datasets_func
+        if callable(func):
+            payload = func(dataset_ids)
+        res = delete_datasets(get_http_api_auth, payload)
+        assert res["code"] == expected_code, res

        res = list_datasets(get_http_api_auth)
-        assert len(res["data"]) == 0
+        assert len(res["data"]) == remaining, res
+
+    @pytest.mark.p1
+    @pytest.mark.usefixtures("add_dataset_func")
+    def test_ids_empty(self, get_http_api_auth):
+        payload = {"ids": []}
+        res = delete_datasets(get_http_api_auth, payload)
+        assert res["code"] == 0, res
+
+        res = list_datasets(get_http_api_auth)
+        assert len(res["data"]) == 1, res
+
+    @pytest.mark.p1
+    @pytest.mark.usefixtures("add_datasets_func")
+    def test_ids_none(self, get_http_api_auth):
+        payload = {"ids": None}
+        res = delete_datasets(get_http_api_auth, payload)
+        assert res["code"] == 0, res
+
+        res = list_datasets(get_http_api_auth)
+        assert len(res["data"]) == 0, res
+
+    @pytest.mark.p2
+    @pytest.mark.usefixtures("add_dataset_func")
+    def test_id_not_uuid(self, get_http_api_auth):
+        payload = {"ids": ["not_uuid"]}
+        res = delete_datasets(get_http_api_auth, payload)
+        assert res["code"] == 101, res
+        assert "Input should be a valid UUID" in res["message"], res
+
+        res = list_datasets(get_http_api_auth)
+        assert len(res["data"]) == 1, res
+
+    @pytest.mark.p2
+    @pytest.mark.usefixtures("add_dataset_func")
+    def test_id_wrong_uuid(self, get_http_api_auth):
+        payload = {"ids": ["d94a8dc02c9711f0930f7fbc369eab6d"]}
+        res = delete_datasets(get_http_api_auth, payload)
+        assert res["code"] == 102, res
+        assert "lacks permission for dataset" in res["message"], res
+
+        res = list_datasets(get_http_api_auth)
+        assert len(res["data"]) == 1, res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "func",
+        [
+            lambda r: {"ids": ["d94a8dc02c9711f0930f7fbc369eab6d"] + r},
+            lambda r: {"ids": r[:1] + ["d94a8dc02c9711f0930f7fbc369eab6d"] + r[1:3]},
+            lambda r: {"ids": r + ["d94a8dc02c9711f0930f7fbc369eab6d"]},
+        ],
+    )
+    def test_ids_partial_invalid(self, get_http_api_auth, add_datasets_func, func):
+        dataset_ids = add_datasets_func
+        if callable(func):
+            payload = func(dataset_ids)
+        res = delete_datasets(get_http_api_auth, payload)
+        assert res["code"] == 102, res
+        assert "lacks permission for dataset" in res["message"], res
+
+        res = list_datasets(get_http_api_auth)
+        assert len(res["data"]) == 3, res
+
+    @pytest.mark.p2
+    def test_ids_duplicate(self, get_http_api_auth, add_datasets_func):
+        dataset_ids = add_datasets_func
+        payload = {"ids": dataset_ids + dataset_ids}
+        res = delete_datasets(get_http_api_auth, payload)
+        assert res["code"] == 101, res
+        assert "Duplicate ids:" in res["message"], res
+
+        res = list_datasets(get_http_api_auth)
+        assert len(res["data"]) == 3, res
+
+    @pytest.mark.p2
+    def test_repeated_delete(self, get_http_api_auth, add_datasets_func):
+        dataset_ids = add_datasets_func
+        payload = {"ids": dataset_ids}
+        res = delete_datasets(get_http_api_auth, payload)
+        assert res["code"] == 0, res
+
+        res = delete_datasets(get_http_api_auth, payload)
+        assert res["code"] == 102, res
+        assert "lacks permission for dataset" in res["message"], res
+
+    @pytest.mark.p2
+    @pytest.mark.usefixtures("add_dataset_func")
+    def test_field_unsupported(self, get_http_api_auth):
+        payload = {"unknown_field": "unknown_field"}
+        res = delete_datasets(get_http_api_auth, payload)
+        assert res["code"] == 101, res
+        assert "Extra inputs are not permitted" in res["message"], res
+
+        res = list_datasets(get_http_api_auth)
+        assert len(res["data"]) == 1, res
--- a/sdk/python/test/test_http_api/test_dataset_mangement/test_update_dataset.py
+++ b/sdk/python/test/test_http_api/test_dataset_mangement/test_update_dataset.py
@ -77,6 +77,13 @@ class TestRquest:
        assert res["code"] == 101, res
        assert res["message"] == "No properties were modified", res

+    @pytest.mark.p3
+    def test_payload_unset(self, get_http_api_auth, add_dataset_func):
+        dataset_id = add_dataset_func
+        res = update_dataset(get_http_api_auth, dataset_id, None)
+        assert res["code"] == 101, res
+        assert res["message"] == "Malformed JSON syntax: Missing commas/brackets or invalid encoding", res
+

 class TestCapability:
    @pytest.mark.p3