Refa: Move HTTP API tests to top-level test directory (#8042)

### What problem does this PR solve? Move test cases only - CI still runs tests under sdk/python ### Type of change - [x] Refactoring
2026-01-31 15:45:08 +08:00 · 2025-06-04 13:16:17 +08:00
parent b832372c98
commit 52c814b89d
39 changed files with 7934 additions and 6 deletions
--- a/test/testcases/test_http_api/test_dataset_mangement/conftest.py
+++ b/test/testcases/test_http_api/test_dataset_mangement/conftest.py
@ -0,0 +1,39 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+
+import pytest
+from common import batch_create_datasets, delete_datasets
+
+
+@pytest.fixture(scope="class")
+def add_datasets(api_key, request):
+    def cleanup():
+        delete_datasets(api_key, {"ids": None})
+
+    request.addfinalizer(cleanup)
+
+    return batch_create_datasets(api_key, 5)
+
+
+@pytest.fixture(scope="function")
+def add_datasets_func(api_key, request):
+    def cleanup():
+        delete_datasets(api_key, {"ids": None})
+
+    request.addfinalizer(cleanup)
+
+    return batch_create_datasets(api_key, 3)
--- a/test/testcases/test_http_api/test_dataset_mangement/test_create_dataset.py
+++ b/test/testcases/test_http_api/test_dataset_mangement/test_create_dataset.py
@ -0,0 +1,737 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+import pytest
+from common import create_dataset
+from configs import DATASET_NAME_LIMIT, INVALID_API_TOKEN
+from hypothesis import example, given, settings
+from libs.auth import RAGFlowHttpApiAuth
+from utils import encode_avatar
+from utils.file_utils import create_image_file
+from utils.hypothesis_utils import valid_names
+
+
+@pytest.mark.usefixtures("clear_datasets")
+class TestAuthorization:
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "invalid_auth, expected_code, expected_message",
+        [
+            (None, 0, "`Authorization` can't be empty"),
+            (
+                RAGFlowHttpApiAuth(INVALID_API_TOKEN),
+                109,
+                "Authentication error: API key is invalid!",
+            ),
+        ],
+        ids=["empty_auth", "invalid_api_token"],
+    )
+    def test_auth_invalid(self, invalid_auth, expected_code, expected_message):
+        res = create_dataset(invalid_auth, {"name": "auth_test"})
+        assert res["code"] == expected_code, res
+        assert res["message"] == expected_message, res
+
+
+class TestRquest:
+    @pytest.mark.p3
+    def test_content_type_bad(self, api_key):
+        BAD_CONTENT_TYPE = "text/xml"
+        res = create_dataset(api_key, {"name": "bad_content_type"}, headers={"Content-Type": BAD_CONTENT_TYPE})
+        assert res["code"] == 101, res
+        assert res["message"] == f"Unsupported content type: Expected application/json, got {BAD_CONTENT_TYPE}", res
+
+    @pytest.mark.p3
+    @pytest.mark.parametrize(
+        "payload, expected_message",
+        [
+            ("a", "Malformed JSON syntax: Missing commas/brackets or invalid encoding"),
+            ('"a"', "Invalid request payload: expected object, got str"),
+        ],
+        ids=["malformed_json_syntax", "invalid_request_payload_type"],
+    )
+    def test_payload_bad(self, api_key, payload, expected_message):
+        res = create_dataset(api_key, data=payload)
+        assert res["code"] == 101, res
+        assert res["message"] == expected_message, res
+
+
+@pytest.mark.usefixtures("clear_datasets")
+class TestCapability:
+    @pytest.mark.p3
+    def test_create_dataset_1k(self, api_key):
+        for i in range(1_000):
+            payload = {"name": f"dataset_{i}"}
+            res = create_dataset(api_key, payload)
+            assert res["code"] == 0, f"Failed to create dataset {i}"
+
+    @pytest.mark.p3
+    def test_create_dataset_concurrent(self, api_key):
+        with ThreadPoolExecutor(max_workers=5) as executor:
+            futures = [executor.submit(create_dataset, api_key, {"name": f"dataset_{i}"}) for i in range(100)]
+        responses = list(as_completed(futures))
+        assert all(r["code"] == 0 for r in responses), responses
+
+
+@pytest.mark.usefixtures("clear_datasets")
+class TestDatasetCreate:
+    @pytest.mark.p1
+    @given(name=valid_names())
+    @example("a" * 128)
+    @settings(max_examples=20)
+    def test_name(self, api_key, name):
+        res = create_dataset(api_key, {"name": name})
+        assert res["code"] == 0, res
+        assert res["data"]["name"] == name, res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "name, expected_message",
+        [
+            ("", "String should have at least 1 character"),
+            (" ", "String should have at least 1 character"),
+            ("a" * (DATASET_NAME_LIMIT + 1), "String should have at most 128 characters"),
+            (0, "Input should be a valid string"),
+            (None, "Input should be a valid string"),
+        ],
+        ids=["empty_name", "space_name", "too_long_name", "invalid_name", "None_name"],
+    )
+    def test_name_invalid(self, api_key, name, expected_message):
+        payload = {"name": name}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 101, res
+        assert expected_message in res["message"], res
+
+    @pytest.mark.p3
+    def test_name_duplicated(self, api_key):
+        name = "duplicated_name"
+        payload = {"name": name}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 0, res
+
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 103, res
+        assert res["message"] == f"Dataset name '{name}' already exists", res
+
+    @pytest.mark.p3
+    def test_name_case_insensitive(self, api_key):
+        name = "CaseInsensitive"
+        payload = {"name": name.upper()}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 0, res
+
+        payload = {"name": name.lower()}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 103, res
+        assert res["message"] == f"Dataset name '{name.lower()}' already exists", res
+
+    @pytest.mark.p2
+    def test_avatar(self, api_key, tmp_path):
+        fn = create_image_file(tmp_path / "ragflow_test.png")
+        payload = {
+            "name": "avatar",
+            "avatar": f"data:image/png;base64,{encode_avatar(fn)}",
+        }
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 0, res
+
+    @pytest.mark.p2
+    def test_avatar_exceeds_limit_length(self, api_key):
+        payload = {"name": "avatar_exceeds_limit_length", "avatar": "a" * 65536}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 101, res
+        assert "String should have at most 65535 characters" in res["message"], res
+
+    @pytest.mark.p3
+    @pytest.mark.parametrize(
+        "name, prefix, expected_message",
+        [
+            ("empty_prefix", "", "Missing MIME prefix. Expected format: data:<mime>;base64,<data>"),
+            ("missing_comma", "data:image/png;base64", "Missing MIME prefix. Expected format: data:<mime>;base64,<data>"),
+            ("unsupported_mine_type", "invalid_mine_prefix:image/png;base64,", "Invalid MIME prefix format. Must start with 'data:'"),
+            ("invalid_mine_type", "data:unsupported_mine_type;base64,", "Unsupported MIME type. Allowed: ['image/jpeg', 'image/png']"),
+        ],
+        ids=["empty_prefix", "missing_comma", "unsupported_mine_type", "invalid_mine_type"],
+    )
+    def test_avatar_invalid_prefix(self, api_key, tmp_path, name, prefix, expected_message):
+        fn = create_image_file(tmp_path / "ragflow_test.png")
+        payload = {
+            "name": name,
+            "avatar": f"{prefix}{encode_avatar(fn)}",
+        }
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 101, res
+        assert expected_message in res["message"], res
+
+    @pytest.mark.p3
+    def test_avatar_unset(self, api_key):
+        payload = {"name": "avatar_unset"}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 0, res
+        assert res["data"]["avatar"] is None, res
+
+    @pytest.mark.p3
+    def test_avatar_none(self, api_key):
+        payload = {"name": "avatar_none", "avatar": None}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 0, res
+        assert res["data"]["avatar"] is None, res
+
+    @pytest.mark.p2
+    def test_description(self, api_key):
+        payload = {"name": "description", "description": "description"}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 0, res
+        assert res["data"]["description"] == "description", res
+
+    @pytest.mark.p2
+    def test_description_exceeds_limit_length(self, api_key):
+        payload = {"name": "description_exceeds_limit_length", "description": "a" * 65536}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 101, res
+        assert "String should have at most 65535 characters" in res["message"], res
+
+    @pytest.mark.p3
+    def test_description_unset(self, api_key):
+        payload = {"name": "description_unset"}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 0, res
+        assert res["data"]["description"] is None, res
+
+    @pytest.mark.p3
+    def test_description_none(self, api_key):
+        payload = {"name": "description_none", "description": None}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 0, res
+        assert res["data"]["description"] is None, res
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "name, embedding_model",
+        [
+            ("BAAI/bge-large-zh-v1.5@BAAI", "BAAI/bge-large-zh-v1.5@BAAI"),
+            ("maidalun1020/bce-embedding-base_v1@Youdao", "maidalun1020/bce-embedding-base_v1@Youdao"),
+            ("embedding-3@ZHIPU-AI", "embedding-3@ZHIPU-AI"),
+        ],
+        ids=["builtin_baai", "builtin_youdao", "tenant_zhipu"],
+    )
+    def test_embedding_model(self, api_key, name, embedding_model):
+        payload = {"name": name, "embedding_model": embedding_model}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 0, res
+        assert res["data"]["embedding_model"] == embedding_model, res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "name, embedding_model",
+        [
+            ("unknown_llm_name", "unknown@ZHIPU-AI"),
+            ("unknown_llm_factory", "embedding-3@unknown"),
+            ("tenant_no_auth_default_tenant_llm", "text-embedding-v3@Tongyi-Qianwen"),
+            ("tenant_no_auth", "text-embedding-3-small@OpenAI"),
+        ],
+        ids=["unknown_llm_name", "unknown_llm_factory", "tenant_no_auth_default_tenant_llm", "tenant_no_auth"],
+    )
+    def test_embedding_model_invalid(self, api_key, name, embedding_model):
+        payload = {"name": name, "embedding_model": embedding_model}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 101, res
+        if "tenant_no_auth" in name:
+            assert res["message"] == f"Unauthorized model: <{embedding_model}>", res
+        else:
+            assert res["message"] == f"Unsupported model: <{embedding_model}>", res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "name, embedding_model",
+        [
+            ("missing_at", "BAAI/bge-large-zh-v1.5BAAI"),
+            ("missing_model_name", "@BAAI"),
+            ("missing_provider", "BAAI/bge-large-zh-v1.5@"),
+            ("whitespace_only_model_name", " @BAAI"),
+            ("whitespace_only_provider", "BAAI/bge-large-zh-v1.5@ "),
+        ],
+        ids=["missing_at", "empty_model_name", "empty_provider", "whitespace_only_model_name", "whitespace_only_provider"],
+    )
+    def test_embedding_model_format(self, api_key, name, embedding_model):
+        payload = {"name": name, "embedding_model": embedding_model}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 101, res
+        if name == "missing_at":
+            assert "Embedding model identifier must follow <model_name>@<provider> format" in res["message"], res
+        else:
+            assert "Both model_name and provider must be non-empty strings" in res["message"], res
+
+    @pytest.mark.p2
+    def test_embedding_model_unset(self, api_key):
+        payload = {"name": "embedding_model_unset"}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 0, res
+        assert res["data"]["embedding_model"] == "BAAI/bge-large-zh-v1.5@BAAI", res
+
+    @pytest.mark.p2
+    def test_embedding_model_none(self, api_key):
+        payload = {"name": "embedding_model_none", "embedding_model": None}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 101, res
+        assert "Input should be a valid string" in res["message"], res
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "name, permission",
+        [
+            ("me", "me"),
+            ("team", "team"),
+            ("me_upercase", "ME"),
+            ("team_upercase", "TEAM"),
+            ("whitespace", " ME "),
+        ],
+        ids=["me", "team", "me_upercase", "team_upercase", "whitespace"],
+    )
+    def test_permission(self, api_key, name, permission):
+        payload = {"name": name, "permission": permission}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 0, res
+        assert res["data"]["permission"] == permission.lower().strip(), res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "name, permission",
+        [
+            ("empty", ""),
+            ("unknown", "unknown"),
+            ("type_error", list()),
+        ],
+        ids=["empty", "unknown", "type_error"],
+    )
+    def test_permission_invalid(self, api_key, name, permission):
+        payload = {"name": name, "permission": permission}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 101
+        assert "Input should be 'me' or 'team'" in res["message"]
+
+    @pytest.mark.p2
+    def test_permission_unset(self, api_key):
+        payload = {"name": "permission_unset"}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 0, res
+        assert res["data"]["permission"] == "me", res
+
+    @pytest.mark.p3
+    def test_permission_none(self, api_key):
+        payload = {"name": "permission_none", "permission": None}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 101, res
+        assert "Input should be 'me' or 'team'" in res["message"], res
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "name, chunk_method",
+        [
+            ("naive", "naive"),
+            ("book", "book"),
+            ("email", "email"),
+            ("laws", "laws"),
+            ("manual", "manual"),
+            ("one", "one"),
+            ("paper", "paper"),
+            ("picture", "picture"),
+            ("presentation", "presentation"),
+            ("qa", "qa"),
+            ("table", "table"),
+            ("tag", "tag"),
+        ],
+        ids=["naive", "book", "email", "laws", "manual", "one", "paper", "picture", "presentation", "qa", "table", "tag"],
+    )
+    def test_chunk_method(self, api_key, name, chunk_method):
+        payload = {"name": name, "chunk_method": chunk_method}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 0, res
+        assert res["data"]["chunk_method"] == chunk_method, res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "name, chunk_method",
+        [
+            ("empty", ""),
+            ("unknown", "unknown"),
+            ("type_error", list()),
+        ],
+        ids=["empty", "unknown", "type_error"],
+    )
+    def test_chunk_method_invalid(self, api_key, name, chunk_method):
+        payload = {"name": name, "chunk_method": chunk_method}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 101, res
+        assert "Input should be 'naive', 'book', 'email', 'laws', 'manual', 'one', 'paper', 'picture', 'presentation', 'qa', 'table' or 'tag'" in res["message"], res
+
+    @pytest.mark.p2
+    def test_chunk_method_unset(self, api_key):
+        payload = {"name": "chunk_method_unset"}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 0, res
+        assert res["data"]["chunk_method"] == "naive", res
+
+    @pytest.mark.p3
+    def test_chunk_method_none(self, api_key):
+        payload = {"name": "chunk_method_none", "chunk_method": None}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 101, res
+        assert "Input should be 'naive', 'book', 'email', 'laws', 'manual', 'one', 'paper', 'picture', 'presentation', 'qa', 'table' or 'tag'" in res["message"], res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "name, pagerank",
+        [
+            ("pagerank_min", 0),
+            ("pagerank_mid", 50),
+            ("pagerank_max", 100),
+        ],
+        ids=["min", "mid", "max"],
+    )
+    def test_pagerank(self, api_key, name, pagerank):
+        payload = {"name": name, "pagerank": pagerank}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 0, res
+        assert res["data"]["pagerank"] == pagerank, res
+
+    @pytest.mark.p3
+    @pytest.mark.parametrize(
+        "name, pagerank, expected_message",
+        [
+            ("pagerank_min_limit", -1, "Input should be greater than or equal to 0"),
+            ("pagerank_max_limit", 101, "Input should be less than or equal to 100"),
+        ],
+        ids=["min_limit", "max_limit"],
+    )
+    def test_pagerank_invalid(self, api_key, name, pagerank, expected_message):
+        payload = {"name": name, "pagerank": pagerank}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 101, res
+        assert expected_message in res["message"], res
+
+    @pytest.mark.p3
+    def test_pagerank_unset(self, api_key):
+        payload = {"name": "pagerank_unset"}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 0, res
+        assert res["data"]["pagerank"] == 0, res
+
+    @pytest.mark.p3
+    def test_pagerank_none(self, api_key):
+        payload = {"name": "pagerank_unset", "pagerank": None}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 101, res
+        assert "Input should be a valid integer" in res["message"], res
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "name, parser_config",
+        [
+            ("auto_keywords_min", {"auto_keywords": 0}),
+            ("auto_keywords_mid", {"auto_keywords": 16}),
+            ("auto_keywords_max", {"auto_keywords": 32}),
+            ("auto_questions_min", {"auto_questions": 0}),
+            ("auto_questions_mid", {"auto_questions": 5}),
+            ("auto_questions_max", {"auto_questions": 10}),
+            ("chunk_token_num_min", {"chunk_token_num": 1}),
+            ("chunk_token_num_mid", {"chunk_token_num": 1024}),
+            ("chunk_token_num_max", {"chunk_token_num": 2048}),
+            ("delimiter", {"delimiter": "\n"}),
+            ("delimiter_space", {"delimiter": " "}),
+            ("html4excel_true", {"html4excel": True}),
+            ("html4excel_false", {"html4excel": False}),
+            ("layout_recognize_DeepDOC", {"layout_recognize": "DeepDOC"}),
+            ("layout_recognize_navie", {"layout_recognize": "Plain Text"}),
+            ("tag_kb_ids", {"tag_kb_ids": ["1", "2"]}),
+            ("topn_tags_min", {"topn_tags": 1}),
+            ("topn_tags_mid", {"topn_tags": 5}),
+            ("topn_tags_max", {"topn_tags": 10}),
+            ("filename_embd_weight_min", {"filename_embd_weight": 0.1}),
+            ("filename_embd_weight_mid", {"filename_embd_weight": 0.5}),
+            ("filename_embd_weight_max", {"filename_embd_weight": 1.0}),
+            ("task_page_size_min", {"task_page_size": 1}),
+            ("task_page_size_None", {"task_page_size": None}),
+            ("pages", {"pages": [[1, 100]]}),
+            ("pages_none", {"pages": None}),
+            ("graphrag_true", {"graphrag": {"use_graphrag": True}}),
+            ("graphrag_false", {"graphrag": {"use_graphrag": False}}),
+            ("graphrag_entity_types", {"graphrag": {"entity_types": ["age", "sex", "height", "weight"]}}),
+            ("graphrag_method_general", {"graphrag": {"method": "general"}}),
+            ("graphrag_method_light", {"graphrag": {"method": "light"}}),
+            ("graphrag_community_true", {"graphrag": {"community": True}}),
+            ("graphrag_community_false", {"graphrag": {"community": False}}),
+            ("graphrag_resolution_true", {"graphrag": {"resolution": True}}),
+            ("graphrag_resolution_false", {"graphrag": {"resolution": False}}),
+            ("raptor_true", {"raptor": {"use_raptor": True}}),
+            ("raptor_false", {"raptor": {"use_raptor": False}}),
+            ("raptor_prompt", {"raptor": {"prompt": "Who are you?"}}),
+            ("raptor_max_token_min", {"raptor": {"max_token": 1}}),
+            ("raptor_max_token_mid", {"raptor": {"max_token": 1024}}),
+            ("raptor_max_token_max", {"raptor": {"max_token": 2048}}),
+            ("raptor_threshold_min", {"raptor": {"threshold": 0.0}}),
+            ("raptor_threshold_mid", {"raptor": {"threshold": 0.5}}),
+            ("raptor_threshold_max", {"raptor": {"threshold": 1.0}}),
+            ("raptor_max_cluster_min", {"raptor": {"max_cluster": 1}}),
+            ("raptor_max_cluster_mid", {"raptor": {"max_cluster": 512}}),
+            ("raptor_max_cluster_max", {"raptor": {"max_cluster": 1024}}),
+            ("raptor_random_seed_min", {"raptor": {"random_seed": 0}}),
+        ],
+        ids=[
+            "auto_keywords_min",
+            "auto_keywords_mid",
+            "auto_keywords_max",
+            "auto_questions_min",
+            "auto_questions_mid",
+            "auto_questions_max",
+            "chunk_token_num_min",
+            "chunk_token_num_mid",
+            "chunk_token_num_max",
+            "delimiter",
+            "delimiter_space",
+            "html4excel_true",
+            "html4excel_false",
+            "layout_recognize_DeepDOC",
+            "layout_recognize_navie",
+            "tag_kb_ids",
+            "topn_tags_min",
+            "topn_tags_mid",
+            "topn_tags_max",
+            "filename_embd_weight_min",
+            "filename_embd_weight_mid",
+            "filename_embd_weight_max",
+            "task_page_size_min",
+            "task_page_size_None",
+            "pages",
+            "pages_none",
+            "graphrag_true",
+            "graphrag_false",
+            "graphrag_entity_types",
+            "graphrag_method_general",
+            "graphrag_method_light",
+            "graphrag_community_true",
+            "graphrag_community_false",
+            "graphrag_resolution_true",
+            "graphrag_resolution_false",
+            "raptor_true",
+            "raptor_false",
+            "raptor_prompt",
+            "raptor_max_token_min",
+            "raptor_max_token_mid",
+            "raptor_max_token_max",
+            "raptor_threshold_min",
+            "raptor_threshold_mid",
+            "raptor_threshold_max",
+            "raptor_max_cluster_min",
+            "raptor_max_cluster_mid",
+            "raptor_max_cluster_max",
+            "raptor_random_seed_min",
+        ],
+    )
+    def test_parser_config(self, api_key, name, parser_config):
+        payload = {"name": name, "parser_config": parser_config}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 0, res
+        for k, v in parser_config.items():
+            if isinstance(v, dict):
+                for kk, vv in v.items():
+                    assert res["data"]["parser_config"][k][kk] == vv, res
+            else:
+                assert res["data"]["parser_config"][k] == v, res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "name, parser_config, expected_message",
+        [
+            ("auto_keywords_min_limit", {"auto_keywords": -1}, "Input should be greater than or equal to 0"),
+            ("auto_keywords_max_limit", {"auto_keywords": 33}, "Input should be less than or equal to 32"),
+            ("auto_keywords_float_not_allowed", {"auto_keywords": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
+            ("auto_keywords_type_invalid", {"auto_keywords": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
+            ("auto_questions_min_limit", {"auto_questions": -1}, "Input should be greater than or equal to 0"),
+            ("auto_questions_max_limit", {"auto_questions": 11}, "Input should be less than or equal to 10"),
+            ("auto_questions_float_not_allowed", {"auto_questions": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
+            ("auto_questions_type_invalid", {"auto_questions": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
+            ("chunk_token_num_min_limit", {"chunk_token_num": 0}, "Input should be greater than or equal to 1"),
+            ("chunk_token_num_max_limit", {"chunk_token_num": 2049}, "Input should be less than or equal to 2048"),
+            ("chunk_token_num_float_not_allowed", {"chunk_token_num": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
+            ("chunk_token_num_type_invalid", {"chunk_token_num": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
+            ("delimiter_empty", {"delimiter": ""}, "String should have at least 1 character"),
+            ("html4excel_type_invalid", {"html4excel": "string"}, "Input should be a valid boolean, unable to interpret input"),
+            ("tag_kb_ids_not_list", {"tag_kb_ids": "1,2"}, "Input should be a valid list"),
+            ("tag_kb_ids_int_in_list", {"tag_kb_ids": [1, 2]}, "Input should be a valid string"),
+            ("topn_tags_min_limit", {"topn_tags": 0}, "Input should be greater than or equal to 1"),
+            ("topn_tags_max_limit", {"topn_tags": 11}, "Input should be less than or equal to 10"),
+            ("topn_tags_float_not_allowed", {"topn_tags": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
+            ("topn_tags_type_invalid", {"topn_tags": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
+            ("filename_embd_weight_min_limit", {"filename_embd_weight": -1}, "Input should be greater than or equal to 0"),
+            ("filename_embd_weight_max_limit", {"filename_embd_weight": 1.1}, "Input should be less than or equal to 1"),
+            ("filename_embd_weight_type_invalid", {"filename_embd_weight": "string"}, "Input should be a valid number, unable to parse string as a number"),
+            ("task_page_size_min_limit", {"task_page_size": 0}, "Input should be greater than or equal to 1"),
+            ("task_page_size_float_not_allowed", {"task_page_size": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
+            ("task_page_size_type_invalid", {"task_page_size": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
+            ("pages_not_list", {"pages": "1,2"}, "Input should be a valid list"),
+            ("pages_not_list_in_list", {"pages": ["1,2"]}, "Input should be a valid list"),
+            ("pages_not_int_list", {"pages": [["string1", "string2"]]}, "Input should be a valid integer, unable to parse string as an integer"),
+            ("graphrag_type_invalid", {"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean, unable to interpret input"),
+            ("graphrag_entity_types_not_list", {"graphrag": {"entity_types": "1,2"}}, "Input should be a valid list"),
+            ("graphrag_entity_types_not_str_in_list", {"graphrag": {"entity_types": [1, 2]}}, "nput should be a valid string"),
+            ("graphrag_method_unknown", {"graphrag": {"method": "unknown"}}, "Input should be 'light' or 'general'"),
+            ("graphrag_method_none", {"graphrag": {"method": None}}, "Input should be 'light' or 'general'"),
+            ("graphrag_community_type_invalid", {"graphrag": {"community": "string"}}, "Input should be a valid boolean, unable to interpret input"),
+            ("graphrag_resolution_type_invalid", {"graphrag": {"resolution": "string"}}, "Input should be a valid boolean, unable to interpret input"),
+            ("raptor_type_invalid", {"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean, unable to interpret input"),
+            ("raptor_prompt_empty", {"raptor": {"prompt": ""}}, "String should have at least 1 character"),
+            ("raptor_prompt_space", {"raptor": {"prompt": " "}}, "String should have at least 1 character"),
+            ("raptor_max_token_min_limit", {"raptor": {"max_token": 0}}, "Input should be greater than or equal to 1"),
+            ("raptor_max_token_max_limit", {"raptor": {"max_token": 2049}}, "Input should be less than or equal to 2048"),
+            ("raptor_max_token_float_not_allowed", {"raptor": {"max_token": 3.14}}, "Input should be a valid integer, got a number with a fractional part"),
+            ("raptor_max_token_type_invalid", {"raptor": {"max_token": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
+            ("raptor_threshold_min_limit", {"raptor": {"threshold": -0.1}}, "Input should be greater than or equal to 0"),
+            ("raptor_threshold_max_limit", {"raptor": {"threshold": 1.1}}, "Input should be less than or equal to 1"),
+            ("raptor_threshold_type_invalid", {"raptor": {"threshold": "string"}}, "Input should be a valid number, unable to parse string as a number"),
+            ("raptor_max_cluster_min_limit", {"raptor": {"max_cluster": 0}}, "Input should be greater than or equal to 1"),
+            ("raptor_max_cluster_max_limit", {"raptor": {"max_cluster": 1025}}, "Input should be less than or equal to 1024"),
+            ("raptor_max_cluster_float_not_allowed", {"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer, got a number with a fractional par"),
+            ("raptor_max_cluster_type_invalid", {"raptor": {"max_cluster": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
+            ("raptor_random_seed_min_limit", {"raptor": {"random_seed": -1}}, "Input should be greater than or equal to 0"),
+            ("raptor_random_seed_float_not_allowed", {"raptor": {"random_seed": 3.14}}, "Input should be a valid integer, got a number with a fractional part"),
+            ("raptor_random_seed_type_invalid", {"raptor": {"random_seed": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
+            ("parser_config_type_invalid", {"delimiter": "a" * 65536}, "Parser config exceeds size limit (max 65,535 characters)"),
+        ],
+        ids=[
+            "auto_keywords_min_limit",
+            "auto_keywords_max_limit",
+            "auto_keywords_float_not_allowed",
+            "auto_keywords_type_invalid",
+            "auto_questions_min_limit",
+            "auto_questions_max_limit",
+            "auto_questions_float_not_allowed",
+            "auto_questions_type_invalid",
+            "chunk_token_num_min_limit",
+            "chunk_token_num_max_limit",
+            "chunk_token_num_float_not_allowed",
+            "chunk_token_num_type_invalid",
+            "delimiter_empty",
+            "html4excel_type_invalid",
+            "tag_kb_ids_not_list",
+            "tag_kb_ids_int_in_list",
+            "topn_tags_min_limit",
+            "topn_tags_max_limit",
+            "topn_tags_float_not_allowed",
+            "topn_tags_type_invalid",
+            "filename_embd_weight_min_limit",
+            "filename_embd_weight_max_limit",
+            "filename_embd_weight_type_invalid",
+            "task_page_size_min_limit",
+            "task_page_size_float_not_allowed",
+            "task_page_size_type_invalid",
+            "pages_not_list",
+            "pages_not_list_in_list",
+            "pages_not_int_list",
+            "graphrag_type_invalid",
+            "graphrag_entity_types_not_list",
+            "graphrag_entity_types_not_str_in_list",
+            "graphrag_method_unknown",
+            "graphrag_method_none",
+            "graphrag_community_type_invalid",
+            "graphrag_resolution_type_invalid",
+            "raptor_type_invalid",
+            "raptor_prompt_empty",
+            "raptor_prompt_space",
+            "raptor_max_token_min_limit",
+            "raptor_max_token_max_limit",
+            "raptor_max_token_float_not_allowed",
+            "raptor_max_token_type_invalid",
+            "raptor_threshold_min_limit",
+            "raptor_threshold_max_limit",
+            "raptor_threshold_type_invalid",
+            "raptor_max_cluster_min_limit",
+            "raptor_max_cluster_max_limit",
+            "raptor_max_cluster_float_not_allowed",
+            "raptor_max_cluster_type_invalid",
+            "raptor_random_seed_min_limit",
+            "raptor_random_seed_float_not_allowed",
+            "raptor_random_seed_type_invalid",
+            "parser_config_type_invalid",
+        ],
+    )
+    def test_parser_config_invalid(self, api_key, name, parser_config, expected_message):
+        payload = {"name": name, "parser_config": parser_config}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 101, res
+        assert expected_message in res["message"], res
+
+    @pytest.mark.p2
+    def test_parser_config_empty(self, api_key):
+        payload = {"name": "parser_config_empty", "parser_config": {}}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 0, res
+        assert res["data"]["parser_config"] == {
+            "chunk_token_num": 128,
+            "delimiter": r"\n",
+            "html4excel": False,
+            "layout_recognize": "DeepDOC",
+            "raptor": {"use_raptor": False},
+        }, res
+
+    @pytest.mark.p2
+    def test_parser_config_unset(self, api_key):
+        payload = {"name": "parser_config_unset"}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 0, res
+        assert res["data"]["parser_config"] == {
+            "chunk_token_num": 128,
+            "delimiter": r"\n",
+            "html4excel": False,
+            "layout_recognize": "DeepDOC",
+            "raptor": {"use_raptor": False},
+        }, res
+
+    @pytest.mark.p3
+    def test_parser_config_none(self, api_key):
+        payload = {"name": "parser_config_none", "parser_config": None}
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 0, res
+        assert res["data"]["parser_config"] == {
+            "chunk_token_num": 128,
+            "delimiter": "\\n",
+            "html4excel": False,
+            "layout_recognize": "DeepDOC",
+            "raptor": {"use_raptor": False},
+        }, res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "payload",
+        [
+            {"name": "id", "id": "id"},
+            {"name": "tenant_id", "tenant_id": "e57c1966f99211efb41e9e45646e0111"},
+            {"name": "created_by", "created_by": "created_by"},
+            {"name": "create_date", "create_date": "Tue, 11 Mar 2025 13:37:23 GMT"},
+            {"name": "create_time", "create_time": 1741671443322},
+            {"name": "update_date", "update_date": "Tue, 11 Mar 2025 13:37:23 GMT"},
+            {"name": "update_time", "update_time": 1741671443339},
+            {"name": "document_count", "document_count": 1},
+            {"name": "chunk_count", "chunk_count": 1},
+            {"name": "token_num", "token_num": 1},
+            {"name": "status", "status": "1"},
+            {"name": "unknown_field", "unknown_field": "unknown_field"},
+        ],
+    )
+    def test_unsupported_field(self, api_key, payload):
+        res = create_dataset(api_key, payload)
+        assert res["code"] == 101, res
+        assert "Extra inputs are not permitted" in res["message"], res
--- a/test/testcases/test_http_api/test_dataset_mangement/test_delete_datasets.py
+++ b/test/testcases/test_http_api/test_dataset_mangement/test_delete_datasets.py
@ -0,0 +1,219 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import uuid
+from concurrent.futures import ThreadPoolExecutor
+
+import pytest
+from common import (
+    INVALID_API_TOKEN,
+    batch_create_datasets,
+    delete_datasets,
+    list_datasets,
+)
+from libs.auth import RAGFlowHttpApiAuth
+
+
+class TestAuthorization:
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "invalid_auth, expected_code, expected_message",
+        [
+            (None, 0, "`Authorization` can't be empty"),
+            (
+                RAGFlowHttpApiAuth(INVALID_API_TOKEN),
+                109,
+                "Authentication error: API key is invalid!",
+            ),
+        ],
+    )
+    def test_auth_invalid(self, invalid_auth, expected_code, expected_message):
+        res = delete_datasets(invalid_auth)
+        assert res["code"] == expected_code, res
+        assert res["message"] == expected_message, res
+
+
+class TestRquest:
+    @pytest.mark.p3
+    def test_content_type_bad(self, api_key):
+        BAD_CONTENT_TYPE = "text/xml"
+        res = delete_datasets(api_key, headers={"Content-Type": BAD_CONTENT_TYPE})
+        assert res["code"] == 101, res
+        assert res["message"] == f"Unsupported content type: Expected application/json, got {BAD_CONTENT_TYPE}", res
+
+    @pytest.mark.p3
+    @pytest.mark.parametrize(
+        "payload, expected_message",
+        [
+            ("a", "Malformed JSON syntax: Missing commas/brackets or invalid encoding"),
+            ('"a"', "Invalid request payload: expected object, got str"),
+        ],
+        ids=["malformed_json_syntax", "invalid_request_payload_type"],
+    )
+    def test_payload_bad(self, api_key, payload, expected_message):
+        res = delete_datasets(api_key, data=payload)
+        assert res["code"] == 101, res
+        assert res["message"] == expected_message, res
+
+    @pytest.mark.p3
+    def test_payload_unset(self, api_key):
+        res = delete_datasets(api_key, None)
+        assert res["code"] == 101, res
+        assert res["message"] == "Malformed JSON syntax: Missing commas/brackets or invalid encoding", res
+
+
+class TestCapability:
+    @pytest.mark.p3
+    def test_delete_dataset_1k(self, api_key):
+        ids = batch_create_datasets(api_key, 1_000)
+        res = delete_datasets(api_key, {"ids": ids})
+        assert res["code"] == 0, res
+
+        res = list_datasets(api_key)
+        assert len(res["data"]) == 0, res
+
+    @pytest.mark.p3
+    def test_concurrent_deletion(self, api_key):
+        dataset_num = 1_000
+        ids = batch_create_datasets(api_key, dataset_num)
+
+        with ThreadPoolExecutor(max_workers=5) as executor:
+            futures = [executor.submit(delete_datasets, api_key, {"ids": ids[i : i + 1]}) for i in range(dataset_num)]
+        responses = [f.result() for f in futures]
+        assert all(r["code"] == 0 for r in responses), responses
+
+
+class TestDatasetsDelete:
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "func, expected_code, expected_message, remaining",
+        [
+            (lambda r: {"ids": r[:1]}, 0, "", 2),
+            (lambda r: {"ids": r}, 0, "", 0),
+        ],
+        ids=["single_dataset", "multiple_datasets"],
+    )
+    def test_ids(self, api_key, add_datasets_func, func, expected_code, expected_message, remaining):
+        dataset_ids = add_datasets_func
+        if callable(func):
+            payload = func(dataset_ids)
+        res = delete_datasets(api_key, payload)
+        assert res["code"] == expected_code, res
+
+        res = list_datasets(api_key)
+        assert len(res["data"]) == remaining, res
+
+    @pytest.mark.p1
+    @pytest.mark.usefixtures("add_dataset_func")
+    def test_ids_empty(self, api_key):
+        payload = {"ids": []}
+        res = delete_datasets(api_key, payload)
+        assert res["code"] == 0, res
+
+        res = list_datasets(api_key)
+        assert len(res["data"]) == 1, res
+
+    @pytest.mark.p1
+    @pytest.mark.usefixtures("add_datasets_func")
+    def test_ids_none(self, api_key):
+        payload = {"ids": None}
+        res = delete_datasets(api_key, payload)
+        assert res["code"] == 0, res
+
+        res = list_datasets(api_key)
+        assert len(res["data"]) == 0, res
+
+    @pytest.mark.p2
+    @pytest.mark.usefixtures("add_dataset_func")
+    def test_id_not_uuid(self, api_key):
+        payload = {"ids": ["not_uuid"]}
+        res = delete_datasets(api_key, payload)
+        assert res["code"] == 101, res
+        assert "Invalid UUID1 format" in res["message"], res
+
+        res = list_datasets(api_key)
+        assert len(res["data"]) == 1, res
+
+    @pytest.mark.p3
+    @pytest.mark.usefixtures("add_dataset_func")
+    def test_id_not_uuid1(self, api_key):
+        payload = {"ids": [uuid.uuid4().hex]}
+        res = delete_datasets(api_key, payload)
+        assert res["code"] == 101, res
+        assert "Invalid UUID1 format" in res["message"], res
+
+    @pytest.mark.p2
+    @pytest.mark.usefixtures("add_dataset_func")
+    def test_id_wrong_uuid(self, api_key):
+        payload = {"ids": ["d94a8dc02c9711f0930f7fbc369eab6d"]}
+        res = delete_datasets(api_key, payload)
+        assert res["code"] == 108, res
+        assert "lacks permission for dataset" in res["message"], res
+
+        res = list_datasets(api_key)
+        assert len(res["data"]) == 1, res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "func",
+        [
+            lambda r: {"ids": ["d94a8dc02c9711f0930f7fbc369eab6d"] + r},
+            lambda r: {"ids": r[:1] + ["d94a8dc02c9711f0930f7fbc369eab6d"] + r[1:3]},
+            lambda r: {"ids": r + ["d94a8dc02c9711f0930f7fbc369eab6d"]},
+        ],
+    )
+    def test_ids_partial_invalid(self, api_key, add_datasets_func, func):
+        dataset_ids = add_datasets_func
+        if callable(func):
+            payload = func(dataset_ids)
+        res = delete_datasets(api_key, payload)
+        assert res["code"] == 108, res
+        assert "lacks permission for dataset" in res["message"], res
+
+        res = list_datasets(api_key)
+        assert len(res["data"]) == 3, res
+
+    @pytest.mark.p2
+    def test_ids_duplicate(self, api_key, add_datasets_func):
+        dataset_ids = add_datasets_func
+        payload = {"ids": dataset_ids + dataset_ids}
+        res = delete_datasets(api_key, payload)
+        assert res["code"] == 101, res
+        assert "Duplicate ids:" in res["message"], res
+
+        res = list_datasets(api_key)
+        assert len(res["data"]) == 3, res
+
+    @pytest.mark.p2
+    def test_repeated_delete(self, api_key, add_datasets_func):
+        dataset_ids = add_datasets_func
+        payload = {"ids": dataset_ids}
+        res = delete_datasets(api_key, payload)
+        assert res["code"] == 0, res
+
+        res = delete_datasets(api_key, payload)
+        assert res["code"] == 108, res
+        assert "lacks permission for dataset" in res["message"], res
+
+    @pytest.mark.p2
+    @pytest.mark.usefixtures("add_dataset_func")
+    def test_field_unsupported(self, api_key):
+        payload = {"unknown_field": "unknown_field"}
+        res = delete_datasets(api_key, payload)
+        assert res["code"] == 101, res
+        assert "Extra inputs are not permitted" in res["message"], res
+
+        res = list_datasets(api_key)
+        assert len(res["data"]) == 1, res
--- a/test/testcases/test_http_api/test_dataset_mangement/test_list_datasets.py
+++ b/test/testcases/test_http_api/test_dataset_mangement/test_list_datasets.py
@ -0,0 +1,339 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import uuid
+from concurrent.futures import ThreadPoolExecutor
+
+import pytest
+from common import INVALID_API_TOKEN, list_datasets
+from libs.auth import RAGFlowHttpApiAuth
+from utils import is_sorted
+
+
+class TestAuthorization:
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "invalid_auth, expected_code, expected_message",
+        [
+            (None, 0, "`Authorization` can't be empty"),
+            (
+                RAGFlowHttpApiAuth(INVALID_API_TOKEN),
+                109,
+                "Authentication error: API key is invalid!",
+            ),
+        ],
+    )
+    def test_auth_invalid(self, invalid_auth, expected_code, expected_message):
+        res = list_datasets(invalid_auth)
+        assert res["code"] == expected_code, res
+        assert res["message"] == expected_message, res
+
+
+class TestCapability:
+    @pytest.mark.p3
+    def test_concurrent_list(self, api_key):
+        with ThreadPoolExecutor(max_workers=5) as executor:
+            futures = [executor.submit(list_datasets, api_key) for i in range(100)]
+        responses = [f.result() for f in futures]
+        assert all(r["code"] == 0 for r in responses), responses
+
+
+@pytest.mark.usefixtures("add_datasets")
+class TestDatasetsList:
+    @pytest.mark.p1
+    def test_params_unset(self, api_key):
+        res = list_datasets(api_key, None)
+        assert res["code"] == 0, res
+        assert len(res["data"]) == 5, res
+
+    @pytest.mark.p2
+    def test_params_empty(self, api_key):
+        res = list_datasets(api_key, {})
+        assert res["code"] == 0, res
+        assert len(res["data"]) == 5, res
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "params, expected_page_size",
+        [
+            ({"page": 2, "page_size": 2}, 2),
+            ({"page": 3, "page_size": 2}, 1),
+            ({"page": 4, "page_size": 2}, 0),
+            ({"page": "2", "page_size": 2}, 2),
+            ({"page": 1, "page_size": 10}, 5),
+        ],
+        ids=["normal_middle_page", "normal_last_partial_page", "beyond_max_page", "string_page_number", "full_data_single_page"],
+    )
+    def test_page(self, api_key, params, expected_page_size):
+        res = list_datasets(api_key, params)
+        assert res["code"] == 0, res
+        assert len(res["data"]) == expected_page_size, res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "params, expected_code, expected_message",
+        [
+            ({"page": 0}, 101, "Input should be greater than or equal to 1"),
+            ({"page": "a"}, 101, "Input should be a valid integer, unable to parse string as an integer"),
+        ],
+        ids=["page_0", "page_a"],
+    )
+    def test_page_invalid(self, api_key, params, expected_code, expected_message):
+        res = list_datasets(api_key, params=params)
+        assert res["code"] == expected_code, res
+        assert expected_message in res["message"], res
+
+    @pytest.mark.p2
+    def test_page_none(self, api_key):
+        params = {"page": None}
+        res = list_datasets(api_key, params)
+        assert res["code"] == 0, res
+        assert len(res["data"]) == 5, res
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "params, expected_page_size",
+        [
+            ({"page_size": 1}, 1),
+            ({"page_size": 3}, 3),
+            ({"page_size": 5}, 5),
+            ({"page_size": 6}, 5),
+            ({"page_size": "1"}, 1),
+        ],
+        ids=["min_valid_page_size", "medium_page_size", "page_size_equals_total", "page_size_exceeds_total", "string_type_page_size"],
+    )
+    def test_page_size(self, api_key, params, expected_page_size):
+        res = list_datasets(api_key, params)
+        assert res["code"] == 0, res
+        assert len(res["data"]) == expected_page_size, res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "params, expected_code, expected_message",
+        [
+            ({"page_size": 0}, 101, "Input should be greater than or equal to 1"),
+            ({"page_size": "a"}, 101, "Input should be a valid integer, unable to parse string as an integer"),
+        ],
+    )
+    def test_page_size_invalid(self, api_key, params, expected_code, expected_message):
+        res = list_datasets(api_key, params)
+        assert res["code"] == expected_code, res
+        assert expected_message in res["message"], res
+
+    @pytest.mark.p2
+    def test_page_size_none(self, api_key):
+        params = {"page_size": None}
+        res = list_datasets(api_key, params)
+        assert res["code"] == 0, res
+        assert len(res["data"]) == 5, res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "params, assertions",
+        [
+            ({"orderby": "create_time"}, lambda r: (is_sorted(r["data"], "create_time", True))),
+            ({"orderby": "update_time"}, lambda r: (is_sorted(r["data"], "update_time", True))),
+            ({"orderby": "CREATE_TIME"}, lambda r: (is_sorted(r["data"], "create_time", True))),
+            ({"orderby": "UPDATE_TIME"}, lambda r: (is_sorted(r["data"], "update_time", True))),
+            ({"orderby": " create_time "}, lambda r: (is_sorted(r["data"], "update_time", True))),
+        ],
+        ids=["orderby_create_time", "orderby_update_time", "orderby_create_time_upper", "orderby_update_time_upper", "whitespace"],
+    )
+    def test_orderby(self, api_key, params, assertions):
+        res = list_datasets(api_key, params)
+        assert res["code"] == 0, res
+        if callable(assertions):
+            assert assertions(res), res
+
+    @pytest.mark.p3
+    @pytest.mark.parametrize(
+        "params",
+        [
+            {"orderby": ""},
+            {"orderby": "unknown"},
+        ],
+        ids=["empty", "unknown"],
+    )
+    def test_orderby_invalid(self, api_key, params):
+        res = list_datasets(api_key, params)
+        assert res["code"] == 101, res
+        assert "Input should be 'create_time' or 'update_time'" in res["message"], res
+
+    @pytest.mark.p3
+    def test_orderby_none(self, api_key):
+        params = {"order_by": None}
+        res = list_datasets(api_key, params)
+        assert res["code"] == 0, res
+        assert is_sorted(res["data"], "create_time", True), res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "params, assertions",
+        [
+            ({"desc": True}, lambda r: (is_sorted(r["data"], "create_time", True))),
+            ({"desc": False}, lambda r: (is_sorted(r["data"], "create_time", False))),
+            ({"desc": "true"}, lambda r: (is_sorted(r["data"], "create_time", True))),
+            ({"desc": "false"}, lambda r: (is_sorted(r["data"], "create_time", False))),
+            ({"desc": 1}, lambda r: (is_sorted(r["data"], "create_time", True))),
+            ({"desc": 0}, lambda r: (is_sorted(r["data"], "create_time", False))),
+            ({"desc": "yes"}, lambda r: (is_sorted(r["data"], "create_time", True))),
+            ({"desc": "no"}, lambda r: (is_sorted(r["data"], "create_time", False))),
+            ({"desc": "y"}, lambda r: (is_sorted(r["data"], "create_time", True))),
+            ({"desc": "n"}, lambda r: (is_sorted(r["data"], "create_time", False))),
+        ],
+        ids=["desc=True", "desc=False", "desc=true", "desc=false", "desc=1", "desc=0", "desc=yes", "desc=no", "desc=y", "desc=n"],
+    )
+    def test_desc(self, api_key, params, assertions):
+        res = list_datasets(api_key, params)
+        assert res["code"] == 0, res
+        if callable(assertions):
+            assert assertions(res), res
+
+    @pytest.mark.p3
+    @pytest.mark.parametrize(
+        "params",
+        [
+            {"desc": 3.14},
+            {"desc": "unknown"},
+        ],
+        ids=["empty", "unknown"],
+    )
+    def test_desc_invalid(self, api_key, params):
+        res = list_datasets(api_key, params)
+        assert res["code"] == 101, res
+        assert "Input should be a valid boolean, unable to interpret input" in res["message"], res
+
+    @pytest.mark.p3
+    def test_desc_none(self, api_key):
+        params = {"desc": None}
+        res = list_datasets(api_key, params)
+        assert res["code"] == 0, res
+        assert is_sorted(res["data"], "create_time", True), res
+
+    @pytest.mark.p1
+    def test_name(self, api_key):
+        params = {"name": "dataset_1"}
+        res = list_datasets(api_key, params)
+        assert res["code"] == 0, res
+        assert len(res["data"]) == 1, res
+        assert res["data"][0]["name"] == "dataset_1", res
+
+    @pytest.mark.p2
+    def test_name_wrong(self, api_key):
+        params = {"name": "wrong name"}
+        res = list_datasets(api_key, params)
+        assert res["code"] == 108, res
+        assert "lacks permission for dataset" in res["message"], res
+
+    @pytest.mark.p2
+    def test_name_empty(self, api_key):
+        params = {"name": ""}
+        res = list_datasets(api_key, params)
+        assert res["code"] == 0, res
+        assert len(res["data"]) == 5, res
+
+    @pytest.mark.p2
+    def test_name_none(self, api_key):
+        params = {"name": None}
+        res = list_datasets(api_key, params)
+        assert res["code"] == 0, res
+        assert len(res["data"]) == 5, res
+
+    @pytest.mark.p1
+    def test_id(self, api_key, add_datasets):
+        dataset_ids = add_datasets
+        params = {"id": dataset_ids[0]}
+        res = list_datasets(api_key, params)
+        assert res["code"] == 0
+        assert len(res["data"]) == 1
+        assert res["data"][0]["id"] == dataset_ids[0]
+
+    @pytest.mark.p2
+    def test_id_not_uuid(self, api_key):
+        params = {"id": "not_uuid"}
+        res = list_datasets(api_key, params)
+        assert res["code"] == 101, res
+        assert "Invalid UUID1 format" in res["message"], res
+
+    @pytest.mark.p2
+    def test_id_not_uuid1(self, api_key):
+        params = {"id": uuid.uuid4().hex}
+        res = list_datasets(api_key, params)
+        assert res["code"] == 101, res
+        assert "Invalid UUID1 format" in res["message"], res
+
+    @pytest.mark.p2
+    def test_id_wrong_uuid(self, api_key):
+        params = {"id": "d94a8dc02c9711f0930f7fbc369eab6d"}
+        res = list_datasets(api_key, params)
+        assert res["code"] == 108, res
+        assert "lacks permission for dataset" in res["message"], res
+
+    @pytest.mark.p2
+    def test_id_empty(self, api_key):
+        params = {"id": ""}
+        res = list_datasets(api_key, params)
+        assert res["code"] == 101, res
+        assert "Invalid UUID1 format" in res["message"], res
+
+    @pytest.mark.p2
+    def test_id_none(self, api_key):
+        params = {"id": None}
+        res = list_datasets(api_key, params)
+        assert res["code"] == 0, res
+        assert len(res["data"]) == 5, res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "func, name, expected_num",
+        [
+            (lambda r: r[0], "dataset_0", 1),
+            (lambda r: r[0], "dataset_1", 0),
+        ],
+        ids=["name_and_id_match", "name_and_id_mismatch"],
+    )
+    def test_name_and_id(self, api_key, add_datasets, func, name, expected_num):
+        dataset_ids = add_datasets
+        if callable(func):
+            params = {"id": func(dataset_ids), "name": name}
+        res = list_datasets(api_key, params)
+        assert res["code"] == 0, res
+        assert len(res["data"]) == expected_num, res
+
+    @pytest.mark.p3
+    @pytest.mark.parametrize(
+        "dataset_id, name",
+        [
+            (lambda r: r[0], "wrong_name"),
+            (uuid.uuid1().hex, "dataset_0"),
+        ],
+        ids=["name", "id"],
+    )
+    def test_name_and_id_wrong(self, api_key, add_datasets, dataset_id, name):
+        dataset_ids = add_datasets
+        if callable(dataset_id):
+            params = {"id": dataset_id(dataset_ids), "name": name}
+        else:
+            params = {"id": dataset_id, "name": name}
+        res = list_datasets(api_key, params)
+        assert res["code"] == 108, res
+        assert "lacks permission for dataset" in res["message"], res
+
+    @pytest.mark.p2
+    def test_field_unsupported(self, api_key):
+        params = {"unknown_field": "unknown_field"}
+        res = list_datasets(api_key, params)
+        assert res["code"] == 101, res
+        assert "Extra inputs are not permitted" in res["message"], res
--- a/test/testcases/test_http_api/test_dataset_mangement/test_update_dataset.py
+++ b/test/testcases/test_http_api/test_dataset_mangement/test_update_dataset.py
@ -0,0 +1,820 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import uuid
+from concurrent.futures import ThreadPoolExecutor
+
+import pytest
+from common import DATASET_NAME_LIMIT, INVALID_API_TOKEN, list_datasets, update_dataset
+from hypothesis import HealthCheck, example, given, settings
+from libs.auth import RAGFlowHttpApiAuth
+from utils import encode_avatar
+from utils.file_utils import create_image_file
+from utils.hypothesis_utils import valid_names
+
+# TODO: Missing scenario for updating embedding_model with chunk_count != 0
+
+
+class TestAuthorization:
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "invalid_auth, expected_code, expected_message",
+        [
+            (None, 0, "`Authorization` can't be empty"),
+            (
+                RAGFlowHttpApiAuth(INVALID_API_TOKEN),
+                109,
+                "Authentication error: API key is invalid!",
+            ),
+        ],
+        ids=["empty_auth", "invalid_api_token"],
+    )
+    def test_auth_invalid(self, invalid_auth, expected_code, expected_message):
+        res = update_dataset(invalid_auth, "dataset_id")
+        assert res["code"] == expected_code, res
+        assert res["message"] == expected_message, res
+
+
+class TestRquest:
+    @pytest.mark.p3
+    def test_bad_content_type(self, api_key, add_dataset_func):
+        dataset_id = add_dataset_func
+        BAD_CONTENT_TYPE = "text/xml"
+        res = update_dataset(api_key, dataset_id, {"name": "bad_content_type"}, headers={"Content-Type": BAD_CONTENT_TYPE})
+        assert res["code"] == 101, res
+        assert res["message"] == f"Unsupported content type: Expected application/json, got {BAD_CONTENT_TYPE}", res
+
+    @pytest.mark.p3
+    @pytest.mark.parametrize(
+        "payload, expected_message",
+        [
+            ("a", "Malformed JSON syntax: Missing commas/brackets or invalid encoding"),
+            ('"a"', "Invalid request payload: expected object, got str"),
+        ],
+        ids=["malformed_json_syntax", "invalid_request_payload_type"],
+    )
+    def test_payload_bad(self, api_key, add_dataset_func, payload, expected_message):
+        dataset_id = add_dataset_func
+        res = update_dataset(api_key, dataset_id, data=payload)
+        assert res["code"] == 101, res
+        assert res["message"] == expected_message, res
+
+    @pytest.mark.p2
+    def test_payload_empty(self, api_key, add_dataset_func):
+        dataset_id = add_dataset_func
+        res = update_dataset(api_key, dataset_id, {})
+        assert res["code"] == 101, res
+        assert res["message"] == "No properties were modified", res
+
+    @pytest.mark.p3
+    def test_payload_unset(self, api_key, add_dataset_func):
+        dataset_id = add_dataset_func
+        res = update_dataset(api_key, dataset_id, None)
+        assert res["code"] == 101, res
+        assert res["message"] == "Malformed JSON syntax: Missing commas/brackets or invalid encoding", res
+
+
+class TestCapability:
+    @pytest.mark.p3
+    def test_update_dateset_concurrent(self, api_key, add_dataset_func):
+        dataset_id = add_dataset_func
+        with ThreadPoolExecutor(max_workers=5) as executor:
+            futures = [executor.submit(update_dataset, api_key, dataset_id, {"name": f"dataset_{i}"}) for i in range(100)]
+        responses = [f.result() for f in futures]
+        assert all(r["code"] == 0 for r in responses), responses
+
+
+class TestDatasetUpdate:
+    @pytest.mark.p3
+    def test_dataset_id_not_uuid(self, api_key):
+        payload = {"name": "not uuid"}
+        res = update_dataset(api_key, "not_uuid", payload)
+        assert res["code"] == 101, res
+        assert "Invalid UUID1 format" in res["message"], res
+
+    @pytest.mark.p3
+    def test_dataset_id_not_uuid1(self, api_key):
+        payload = {"name": "not uuid1"}
+        res = update_dataset(api_key, uuid.uuid4().hex, payload)
+        assert res["code"] == 101, res
+        assert "Invalid UUID1 format" in res["message"], res
+
+    @pytest.mark.p3
+    def test_dataset_id_wrong_uuid(self, api_key):
+        payload = {"name": "wrong uuid"}
+        res = update_dataset(api_key, "d94a8dc02c9711f0930f7fbc369eab6d", payload)
+        assert res["code"] == 108, res
+        assert "lacks permission for dataset" in res["message"], res
+
+    @pytest.mark.p1
+    @given(name=valid_names())
+    @example("a" * 128)
+    @settings(max_examples=20, suppress_health_check=[HealthCheck.function_scoped_fixture])
+    def test_name(self, api_key, add_dataset_func, name):
+        dataset_id = add_dataset_func
+        payload = {"name": name}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 0, res
+
+        res = list_datasets(api_key)
+        assert res["code"] == 0, res
+        assert res["data"][0]["name"] == name, res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "name, expected_message",
+        [
+            ("", "String should have at least 1 character"),
+            (" ", "String should have at least 1 character"),
+            ("a" * (DATASET_NAME_LIMIT + 1), "String should have at most 128 characters"),
+            (0, "Input should be a valid string"),
+            (None, "Input should be a valid string"),
+        ],
+        ids=["empty_name", "space_name", "too_long_name", "invalid_name", "None_name"],
+    )
+    def test_name_invalid(self, api_key, add_dataset_func, name, expected_message):
+        dataset_id = add_dataset_func
+        payload = {"name": name}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 101, res
+        assert expected_message in res["message"], res
+
+    @pytest.mark.p3
+    def test_name_duplicated(self, api_key, add_datasets_func):
+        dataset_ids = add_datasets_func[0]
+        name = "dataset_1"
+        payload = {"name": name}
+        res = update_dataset(api_key, dataset_ids, payload)
+        assert res["code"] == 102, res
+        assert res["message"] == f"Dataset name '{name}' already exists", res
+
+    @pytest.mark.p3
+    def test_name_case_insensitive(self, api_key, add_datasets_func):
+        dataset_id = add_datasets_func[0]
+        name = "DATASET_1"
+        payload = {"name": name}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 102, res
+        assert res["message"] == f"Dataset name '{name}' already exists", res
+
+    @pytest.mark.p2
+    def test_avatar(self, api_key, add_dataset_func, tmp_path):
+        dataset_id = add_dataset_func
+        fn = create_image_file(tmp_path / "ragflow_test.png")
+        payload = {
+            "avatar": f"data:image/png;base64,{encode_avatar(fn)}",
+        }
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 0, res
+
+        res = list_datasets(api_key)
+        assert res["code"] == 0, res
+        assert res["data"][0]["avatar"] == f"data:image/png;base64,{encode_avatar(fn)}", res
+
+    @pytest.mark.p2
+    def test_avatar_exceeds_limit_length(self, api_key, add_dataset_func):
+        dataset_id = add_dataset_func
+        payload = {"avatar": "a" * 65536}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 101, res
+        assert "String should have at most 65535 characters" in res["message"], res
+
+    @pytest.mark.p3
+    @pytest.mark.parametrize(
+        "avatar_prefix, expected_message",
+        [
+            ("", "Missing MIME prefix. Expected format: data:<mime>;base64,<data>"),
+            ("data:image/png;base64", "Missing MIME prefix. Expected format: data:<mime>;base64,<data>"),
+            ("invalid_mine_prefix:image/png;base64,", "Invalid MIME prefix format. Must start with 'data:'"),
+            ("data:unsupported_mine_type;base64,", "Unsupported MIME type. Allowed: ['image/jpeg', 'image/png']"),
+        ],
+        ids=["empty_prefix", "missing_comma", "unsupported_mine_type", "invalid_mine_type"],
+    )
+    def test_avatar_invalid_prefix(self, api_key, add_dataset_func, tmp_path, avatar_prefix, expected_message):
+        dataset_id = add_dataset_func
+        fn = create_image_file(tmp_path / "ragflow_test.png")
+        payload = {"avatar": f"{avatar_prefix}{encode_avatar(fn)}"}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 101, res
+        assert expected_message in res["message"], res
+
+    @pytest.mark.p3
+    def test_avatar_none(self, api_key, add_dataset_func):
+        dataset_id = add_dataset_func
+        payload = {"avatar": None}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 0, res
+
+        res = list_datasets(api_key)
+        assert res["code"] == 0, res
+        assert res["data"][0]["avatar"] is None, res
+
+    @pytest.mark.p2
+    def test_description(self, api_key, add_dataset_func):
+        dataset_id = add_dataset_func
+        payload = {"description": "description"}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 0
+
+        res = list_datasets(api_key, {"id": dataset_id})
+        assert res["code"] == 0, res
+        assert res["data"][0]["description"] == "description"
+
+    @pytest.mark.p2
+    def test_description_exceeds_limit_length(self, api_key, add_dataset_func):
+        dataset_id = add_dataset_func
+        payload = {"description": "a" * 65536}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 101, res
+        assert "String should have at most 65535 characters" in res["message"], res
+
+    @pytest.mark.p3
+    def test_description_none(self, api_key, add_dataset_func):
+        dataset_id = add_dataset_func
+        payload = {"description": None}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 0, res
+
+        res = list_datasets(api_key, {"id": dataset_id})
+        assert res["code"] == 0, res
+        assert res["data"][0]["description"] is None
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "embedding_model",
+        [
+            "BAAI/bge-large-zh-v1.5@BAAI",
+            "maidalun1020/bce-embedding-base_v1@Youdao",
+            "embedding-3@ZHIPU-AI",
+        ],
+        ids=["builtin_baai", "builtin_youdao", "tenant_zhipu"],
+    )
+    def test_embedding_model(self, api_key, add_dataset_func, embedding_model):
+        dataset_id = add_dataset_func
+        payload = {"embedding_model": embedding_model}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 0, res
+
+        res = list_datasets(api_key)
+        assert res["code"] == 0, res
+        assert res["data"][0]["embedding_model"] == embedding_model, res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "name, embedding_model",
+        [
+            ("unknown_llm_name", "unknown@ZHIPU-AI"),
+            ("unknown_llm_factory", "embedding-3@unknown"),
+            ("tenant_no_auth_default_tenant_llm", "text-embedding-v3@Tongyi-Qianwen"),
+            ("tenant_no_auth", "text-embedding-3-small@OpenAI"),
+        ],
+        ids=["unknown_llm_name", "unknown_llm_factory", "tenant_no_auth_default_tenant_llm", "tenant_no_auth"],
+    )
+    def test_embedding_model_invalid(self, api_key, add_dataset_func, name, embedding_model):
+        dataset_id = add_dataset_func
+        payload = {"name": name, "embedding_model": embedding_model}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 101, res
+        if "tenant_no_auth" in name:
+            assert res["message"] == f"Unauthorized model: <{embedding_model}>", res
+        else:
+            assert res["message"] == f"Unsupported model: <{embedding_model}>", res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "name, embedding_model",
+        [
+            ("missing_at", "BAAI/bge-large-zh-v1.5BAAI"),
+            ("missing_model_name", "@BAAI"),
+            ("missing_provider", "BAAI/bge-large-zh-v1.5@"),
+            ("whitespace_only_model_name", " @BAAI"),
+            ("whitespace_only_provider", "BAAI/bge-large-zh-v1.5@ "),
+        ],
+        ids=["missing_at", "empty_model_name", "empty_provider", "whitespace_only_model_name", "whitespace_only_provider"],
+    )
+    def test_embedding_model_format(self, api_key, add_dataset_func, name, embedding_model):
+        dataset_id = add_dataset_func
+        payload = {"name": name, "embedding_model": embedding_model}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 101, res
+        if name == "missing_at":
+            assert "Embedding model identifier must follow <model_name>@<provider> format" in res["message"], res
+        else:
+            assert "Both model_name and provider must be non-empty strings" in res["message"], res
+
+    @pytest.mark.p2
+    def test_embedding_model_none(self, api_key, add_dataset_func):
+        dataset_id = add_dataset_func
+        payload = {"embedding_model": None}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 101, res
+        assert "Input should be a valid string" in res["message"], res
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "permission",
+        [
+            "me",
+            "team",
+            "ME",
+            "TEAM",
+            " ME ",
+        ],
+        ids=["me", "team", "me_upercase", "team_upercase", "whitespace"],
+    )
+    def test_permission(self, api_key, add_dataset_func, permission):
+        dataset_id = add_dataset_func
+        payload = {"permission": permission}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 0, res
+
+        res = list_datasets(api_key)
+        assert res["code"] == 0, res
+        assert res["data"][0]["permission"] == permission.lower().strip(), res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "permission",
+        [
+            "",
+            "unknown",
+            list(),
+        ],
+        ids=["empty", "unknown", "type_error"],
+    )
+    def test_permission_invalid(self, api_key, add_dataset_func, permission):
+        dataset_id = add_dataset_func
+        payload = {"permission": permission}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 101
+        assert "Input should be 'me' or 'team'" in res["message"]
+
+    @pytest.mark.p3
+    def test_permission_none(self, api_key, add_dataset_func):
+        dataset_id = add_dataset_func
+        payload = {"permission": None}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 101, res
+        assert "Input should be 'me' or 'team'" in res["message"], res
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "chunk_method",
+        [
+            "naive",
+            "book",
+            "email",
+            "laws",
+            "manual",
+            "one",
+            "paper",
+            "picture",
+            "presentation",
+            "qa",
+            "table",
+            "tag",
+        ],
+        ids=["naive", "book", "email", "laws", "manual", "one", "paper", "picture", "presentation", "qa", "table", "tag"],
+    )
+    def test_chunk_method(self, api_key, add_dataset_func, chunk_method):
+        dataset_id = add_dataset_func
+        payload = {"chunk_method": chunk_method}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 0, res
+
+        res = list_datasets(api_key)
+        assert res["code"] == 0, res
+        assert res["data"][0]["chunk_method"] == chunk_method, res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "chunk_method",
+        [
+            "",
+            "unknown",
+            list(),
+        ],
+        ids=["empty", "unknown", "type_error"],
+    )
+    def test_chunk_method_invalid(self, api_key, add_dataset_func, chunk_method):
+        dataset_id = add_dataset_func
+        payload = {"chunk_method": chunk_method}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 101, res
+        assert "Input should be 'naive', 'book', 'email', 'laws', 'manual', 'one', 'paper', 'picture', 'presentation', 'qa', 'table' or 'tag'" in res["message"], res
+
+    @pytest.mark.p3
+    def test_chunk_method_none(self, api_key, add_dataset_func):
+        dataset_id = add_dataset_func
+        payload = {"chunk_method": None}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 101, res
+        assert "Input should be 'naive', 'book', 'email', 'laws', 'manual', 'one', 'paper', 'picture', 'presentation', 'qa', 'table' or 'tag'" in res["message"], res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize("pagerank", [0, 50, 100], ids=["min", "mid", "max"])
+    def test_pagerank(self, api_key, add_dataset_func, pagerank):
+        dataset_id = add_dataset_func
+        payload = {"pagerank": pagerank}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 0
+
+        res = list_datasets(api_key, {"id": dataset_id})
+        assert res["code"] == 0, res
+        assert res["data"][0]["pagerank"] == pagerank
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "pagerank, expected_message",
+        [
+            (-1, "Input should be greater than or equal to 0"),
+            (101, "Input should be less than or equal to 100"),
+        ],
+        ids=["min_limit", "max_limit"],
+    )
+    def test_pagerank_invalid(self, api_key, add_dataset_func, pagerank, expected_message):
+        dataset_id = add_dataset_func
+        payload = {"pagerank": pagerank}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 101, res
+        assert expected_message in res["message"], res
+
+    @pytest.mark.p3
+    def test_pagerank_none(self, api_key, add_dataset_func):
+        dataset_id = add_dataset_func
+        payload = {"pagerank": None}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 101, res
+        assert "Input should be a valid integer" in res["message"], res
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "parser_config",
+        [
+            {"auto_keywords": 0},
+            {"auto_keywords": 16},
+            {"auto_keywords": 32},
+            {"auto_questions": 0},
+            {"auto_questions": 5},
+            {"auto_questions": 10},
+            {"chunk_token_num": 1},
+            {"chunk_token_num": 1024},
+            {"chunk_token_num": 2048},
+            {"delimiter": "\n"},
+            {"delimiter": " "},
+            {"html4excel": True},
+            {"html4excel": False},
+            {"layout_recognize": "DeepDOC"},
+            {"layout_recognize": "Plain Text"},
+            {"tag_kb_ids": ["1", "2"]},
+            {"topn_tags": 1},
+            {"topn_tags": 5},
+            {"topn_tags": 10},
+            {"filename_embd_weight": 0.1},
+            {"filename_embd_weight": 0.5},
+            {"filename_embd_weight": 1.0},
+            {"task_page_size": 1},
+            {"task_page_size": None},
+            {"pages": [[1, 100]]},
+            {"pages": None},
+            {"graphrag": {"use_graphrag": True}},
+            {"graphrag": {"use_graphrag": False}},
+            {"graphrag": {"entity_types": ["age", "sex", "height", "weight"]}},
+            {"graphrag": {"method": "general"}},
+            {"graphrag": {"method": "light"}},
+            {"graphrag": {"community": True}},
+            {"graphrag": {"community": False}},
+            {"graphrag": {"resolution": True}},
+            {"graphrag": {"resolution": False}},
+            {"raptor": {"use_raptor": True}},
+            {"raptor": {"use_raptor": False}},
+            {"raptor": {"prompt": "Who are you?"}},
+            {"raptor": {"max_token": 1}},
+            {"raptor": {"max_token": 1024}},
+            {"raptor": {"max_token": 2048}},
+            {"raptor": {"threshold": 0.0}},
+            {"raptor": {"threshold": 0.5}},
+            {"raptor": {"threshold": 1.0}},
+            {"raptor": {"max_cluster": 1}},
+            {"raptor": {"max_cluster": 512}},
+            {"raptor": {"max_cluster": 1024}},
+            {"raptor": {"random_seed": 0}},
+        ],
+        ids=[
+            "auto_keywords_min",
+            "auto_keywords_mid",
+            "auto_keywords_max",
+            "auto_questions_min",
+            "auto_questions_mid",
+            "auto_questions_max",
+            "chunk_token_num_min",
+            "chunk_token_num_mid",
+            "chunk_token_num_max",
+            "delimiter",
+            "delimiter_space",
+            "html4excel_true",
+            "html4excel_false",
+            "layout_recognize_DeepDOC",
+            "layout_recognize_navie",
+            "tag_kb_ids",
+            "topn_tags_min",
+            "topn_tags_mid",
+            "topn_tags_max",
+            "filename_embd_weight_min",
+            "filename_embd_weight_mid",
+            "filename_embd_weight_max",
+            "task_page_size_min",
+            "task_page_size_None",
+            "pages",
+            "pages_none",
+            "graphrag_true",
+            "graphrag_false",
+            "graphrag_entity_types",
+            "graphrag_method_general",
+            "graphrag_method_light",
+            "graphrag_community_true",
+            "graphrag_community_false",
+            "graphrag_resolution_true",
+            "graphrag_resolution_false",
+            "raptor_true",
+            "raptor_false",
+            "raptor_prompt",
+            "raptor_max_token_min",
+            "raptor_max_token_mid",
+            "raptor_max_token_max",
+            "raptor_threshold_min",
+            "raptor_threshold_mid",
+            "raptor_threshold_max",
+            "raptor_max_cluster_min",
+            "raptor_max_cluster_mid",
+            "raptor_max_cluster_max",
+            "raptor_random_seed_min",
+        ],
+    )
+    def test_parser_config(self, api_key, add_dataset_func, parser_config):
+        dataset_id = add_dataset_func
+        payload = {"parser_config": parser_config}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 0, res
+
+        res = list_datasets(api_key)
+        assert res["code"] == 0, res
+        for k, v in parser_config.items():
+            if isinstance(v, dict):
+                for kk, vv in v.items():
+                    assert res["data"][0]["parser_config"][k][kk] == vv, res
+            else:
+                assert res["data"][0]["parser_config"][k] == v, res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "parser_config, expected_message",
+        [
+            ({"auto_keywords": -1}, "Input should be greater than or equal to 0"),
+            ({"auto_keywords": 33}, "Input should be less than or equal to 32"),
+            ({"auto_keywords": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
+            ({"auto_keywords": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
+            ({"auto_questions": -1}, "Input should be greater than or equal to 0"),
+            ({"auto_questions": 11}, "Input should be less than or equal to 10"),
+            ({"auto_questions": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
+            ({"auto_questions": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
+            ({"chunk_token_num": 0}, "Input should be greater than or equal to 1"),
+            ({"chunk_token_num": 2049}, "Input should be less than or equal to 2048"),
+            ({"chunk_token_num": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
+            ({"chunk_token_num": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
+            ({"delimiter": ""}, "String should have at least 1 character"),
+            ({"html4excel": "string"}, "Input should be a valid boolean, unable to interpret input"),
+            ({"tag_kb_ids": "1,2"}, "Input should be a valid list"),
+            ({"tag_kb_ids": [1, 2]}, "Input should be a valid string"),
+            ({"topn_tags": 0}, "Input should be greater than or equal to 1"),
+            ({"topn_tags": 11}, "Input should be less than or equal to 10"),
+            ({"topn_tags": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
+            ({"topn_tags": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
+            ({"filename_embd_weight": -1}, "Input should be greater than or equal to 0"),
+            ({"filename_embd_weight": 1.1}, "Input should be less than or equal to 1"),
+            ({"filename_embd_weight": "string"}, "Input should be a valid number, unable to parse string as a number"),
+            ({"task_page_size": 0}, "Input should be greater than or equal to 1"),
+            ({"task_page_size": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
+            ({"task_page_size": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
+            ({"pages": "1,2"}, "Input should be a valid list"),
+            ({"pages": ["1,2"]}, "Input should be a valid list"),
+            ({"pages": [["string1", "string2"]]}, "Input should be a valid integer, unable to parse string as an integer"),
+            ({"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean, unable to interpret input"),
+            ({"graphrag": {"entity_types": "1,2"}}, "Input should be a valid list"),
+            ({"graphrag": {"entity_types": [1, 2]}}, "nput should be a valid string"),
+            ({"graphrag": {"method": "unknown"}}, "Input should be 'light' or 'general'"),
+            ({"graphrag": {"method": None}}, "Input should be 'light' or 'general'"),
+            ({"graphrag": {"community": "string"}}, "Input should be a valid boolean, unable to interpret input"),
+            ({"graphrag": {"resolution": "string"}}, "Input should be a valid boolean, unable to interpret input"),
+            ({"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean, unable to interpret input"),
+            ({"raptor": {"prompt": ""}}, "String should have at least 1 character"),
+            ({"raptor": {"prompt": " "}}, "String should have at least 1 character"),
+            ({"raptor": {"max_token": 0}}, "Input should be greater than or equal to 1"),
+            ({"raptor": {"max_token": 2049}}, "Input should be less than or equal to 2048"),
+            ({"raptor": {"max_token": 3.14}}, "Input should be a valid integer, got a number with a fractional part"),
+            ({"raptor": {"max_token": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
+            ({"raptor": {"threshold": -0.1}}, "Input should be greater than or equal to 0"),
+            ({"raptor": {"threshold": 1.1}}, "Input should be less than or equal to 1"),
+            ({"raptor": {"threshold": "string"}}, "Input should be a valid number, unable to parse string as a number"),
+            ({"raptor": {"max_cluster": 0}}, "Input should be greater than or equal to 1"),
+            ({"raptor": {"max_cluster": 1025}}, "Input should be less than or equal to 1024"),
+            ({"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer, got a number with a fractional par"),
+            ({"raptor": {"max_cluster": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
+            ({"raptor": {"random_seed": -1}}, "Input should be greater than or equal to 0"),
+            ({"raptor": {"random_seed": 3.14}}, "Input should be a valid integer, got a number with a fractional part"),
+            ({"raptor": {"random_seed": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
+            ({"delimiter": "a" * 65536}, "Parser config exceeds size limit (max 65,535 characters)"),
+        ],
+        ids=[
+            "auto_keywords_min_limit",
+            "auto_keywords_max_limit",
+            "auto_keywords_float_not_allowed",
+            "auto_keywords_type_invalid",
+            "auto_questions_min_limit",
+            "auto_questions_max_limit",
+            "auto_questions_float_not_allowed",
+            "auto_questions_type_invalid",
+            "chunk_token_num_min_limit",
+            "chunk_token_num_max_limit",
+            "chunk_token_num_float_not_allowed",
+            "chunk_token_num_type_invalid",
+            "delimiter_empty",
+            "html4excel_type_invalid",
+            "tag_kb_ids_not_list",
+            "tag_kb_ids_int_in_list",
+            "topn_tags_min_limit",
+            "topn_tags_max_limit",
+            "topn_tags_float_not_allowed",
+            "topn_tags_type_invalid",
+            "filename_embd_weight_min_limit",
+            "filename_embd_weight_max_limit",
+            "filename_embd_weight_type_invalid",
+            "task_page_size_min_limit",
+            "task_page_size_float_not_allowed",
+            "task_page_size_type_invalid",
+            "pages_not_list",
+            "pages_not_list_in_list",
+            "pages_not_int_list",
+            "graphrag_type_invalid",
+            "graphrag_entity_types_not_list",
+            "graphrag_entity_types_not_str_in_list",
+            "graphrag_method_unknown",
+            "graphrag_method_none",
+            "graphrag_community_type_invalid",
+            "graphrag_resolution_type_invalid",
+            "raptor_type_invalid",
+            "raptor_prompt_empty",
+            "raptor_prompt_space",
+            "raptor_max_token_min_limit",
+            "raptor_max_token_max_limit",
+            "raptor_max_token_float_not_allowed",
+            "raptor_max_token_type_invalid",
+            "raptor_threshold_min_limit",
+            "raptor_threshold_max_limit",
+            "raptor_threshold_type_invalid",
+            "raptor_max_cluster_min_limit",
+            "raptor_max_cluster_max_limit",
+            "raptor_max_cluster_float_not_allowed",
+            "raptor_max_cluster_type_invalid",
+            "raptor_random_seed_min_limit",
+            "raptor_random_seed_float_not_allowed",
+            "raptor_random_seed_type_invalid",
+            "parser_config_type_invalid",
+        ],
+    )
+    def test_parser_config_invalid(self, api_key, add_dataset_func, parser_config, expected_message):
+        dataset_id = add_dataset_func
+        payload = {"parser_config": parser_config}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 101, res
+        assert expected_message in res["message"], res
+
+    @pytest.mark.p2
+    def test_parser_config_empty(self, api_key, add_dataset_func):
+        dataset_id = add_dataset_func
+        payload = {"parser_config": {}}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 0, res
+
+        res = list_datasets(api_key)
+        assert res["code"] == 0, res
+        assert res["data"][0]["parser_config"] == {
+            "chunk_token_num": 128,
+            "delimiter": r"\n",
+            "html4excel": False,
+            "layout_recognize": "DeepDOC",
+            "raptor": {"use_raptor": False},
+        }, res
+
+    @pytest.mark.p3
+    def test_parser_config_none(self, api_key, add_dataset_func):
+        dataset_id = add_dataset_func
+        payload = {"parser_config": None}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 0, res
+
+        res = list_datasets(api_key, {"id": dataset_id})
+        assert res["code"] == 0, res
+        assert res["data"][0]["parser_config"] == {
+            "chunk_token_num": 128,
+            "delimiter": r"\n",
+            "html4excel": False,
+            "layout_recognize": "DeepDOC",
+            "raptor": {"use_raptor": False},
+        }, res
+
+    @pytest.mark.p3
+    def test_parser_config_empty_with_chunk_method_change(self, api_key, add_dataset_func):
+        dataset_id = add_dataset_func
+        payload = {"chunk_method": "qa", "parser_config": {}}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 0, res
+
+        res = list_datasets(api_key)
+        assert res["code"] == 0, res
+        assert res["data"][0]["parser_config"] == {"raptor": {"use_raptor": False}}, res
+
+    @pytest.mark.p3
+    def test_parser_config_unset_with_chunk_method_change(self, api_key, add_dataset_func):
+        dataset_id = add_dataset_func
+        payload = {"chunk_method": "qa"}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 0, res
+
+        res = list_datasets(api_key)
+        assert res["code"] == 0, res
+        assert res["data"][0]["parser_config"] == {"raptor": {"use_raptor": False}}, res
+
+    @pytest.mark.p3
+    def test_parser_config_none_with_chunk_method_change(self, api_key, add_dataset_func):
+        dataset_id = add_dataset_func
+        payload = {"chunk_method": "qa", "parser_config": None}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 0, res
+
+        res = list_datasets(api_key, {"id": dataset_id})
+        assert res["code"] == 0, res
+        assert res["data"][0]["parser_config"] == {"raptor": {"use_raptor": False}}, res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "payload",
+        [
+            {"id": "id"},
+            {"tenant_id": "e57c1966f99211efb41e9e45646e0111"},
+            {"created_by": "created_by"},
+            {"create_date": "Tue, 11 Mar 2025 13:37:23 GMT"},
+            {"create_time": 1741671443322},
+            {"update_date": "Tue, 11 Mar 2025 13:37:23 GMT"},
+            {"update_time": 1741671443339},
+            {"document_count": 1},
+            {"chunk_count": 1},
+            {"token_num": 1},
+            {"status": "1"},
+            {"unknown_field": "unknown_field"},
+        ],
+    )
+    def test_field_unsupported(self, api_key, add_dataset_func, payload):
+        dataset_id = add_dataset_func
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 101, res
+        assert "Extra inputs are not permitted" in res["message"], res
+
+    @pytest.mark.p2
+    def test_field_unset(self, api_key, add_dataset_func):
+        dataset_id = add_dataset_func
+        res = list_datasets(api_key)
+        assert res["code"] == 0, res
+        original_data = res["data"][0]
+
+        payload = {"name": "default_unset"}
+        res = update_dataset(api_key, dataset_id, payload)
+        assert res["code"] == 0, res
+
+        res = list_datasets(api_key)
+        assert res["code"] == 0, res
+        assert res["data"][0]["avatar"] == original_data["avatar"], res
+        assert res["data"][0]["description"] == original_data["description"], res
+        assert res["data"][0]["embedding_model"] == original_data["embedding_model"], res
+        assert res["data"][0]["permission"] == original_data["permission"], res
+        assert res["data"][0]["chunk_method"] == original_data["chunk_method"], res
+        assert res["data"][0]["pagerank"] == original_data["pagerank"], res
+        assert res["data"][0]["parser_config"] == {
+            "chunk_token_num": 128,
+            "delimiter": r"\n",
+            "html4excel": False,
+            "layout_recognize": "DeepDOC",
+            "raptor": {"use_raptor": False},
+        }, res