Test: add sdk Dataset test cases (#8077)

### What problem does this PR solve? Add sdk dataset test cases ### Type of change - [x] Add test case
2026-02-01 16:15:07 +08:00 · 2025-06-05 13:20:28 +08:00
parent 91804f28f1
commit ee52000870
11 changed files with 2041 additions and 24 deletions
--- a/test/testcases/test_sdk_api/test_dataset_mangement/conftest.py
+++ b/test/testcases/test_sdk_api/test_dataset_mangement/conftest.py
@ -0,0 +1,39 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+
+import pytest
+from common import batch_create_datasets
+
+
+@pytest.fixture(scope="class")
+def add_datasets(client, request):
+    def cleanup():
+        client.delete_datasets(**{"ids": None})
+
+    request.addfinalizer(cleanup)
+
+    return batch_create_datasets(client, 5)
+
+
+@pytest.fixture(scope="function")
+def add_datasets_func(client, request):
+    def cleanup():
+        client.delete_datasets(**{"ids": None})
+
+    request.addfinalizer(cleanup)
+
+    return batch_create_datasets(client, 3)
--- a/test/testcases/test_sdk_api/test_dataset_mangement/test_create_dataset.py
+++ b/test/testcases/test_sdk_api/test_dataset_mangement/test_create_dataset.py
@ -0,0 +1,698 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from operator import attrgetter
+
+import pytest
+from configs import DATASET_NAME_LIMIT, HOST_ADDRESS, INVALID_API_TOKEN
+from hypothesis import example, given, settings
+from ragflow_sdk import DataSet, RAGFlow
+from utils import encode_avatar
+from utils.file_utils import create_image_file
+from utils.hypothesis_utils import valid_names
+
+
+@pytest.mark.usefixtures("clear_datasets")
+class TestAuthorization:
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "invalid_auth, expected_message",
+        [
+            (None, "Authentication error: API key is invalid!"),
+            (INVALID_API_TOKEN, "Authentication error: API key is invalid!"),
+        ],
+        ids=["empty_auth", "invalid_api_token"],
+    )
+    def test_auth_invalid(self, invalid_auth, expected_message):
+        client = RAGFlow(invalid_auth, HOST_ADDRESS)
+        with pytest.raises(Exception) as excinfo:
+            client.create_dataset(**{"name": "auth_test"})
+        assert str(excinfo.value) == expected_message
+
+
+@pytest.mark.usefixtures("clear_datasets")
+class TestCapability:
+    @pytest.mark.p3
+    def test_create_dataset_1k(self, client):
+        count = 1_000
+        for i in range(count):
+            payload = {"name": f"dataset_{i}"}
+            client.create_dataset(**payload)
+        assert len(client.list_datasets(page_size=2000)) == count
+
+    @pytest.mark.p3
+    def test_create_dataset_concurrent(self, client):
+        count = 100
+        with ThreadPoolExecutor(max_workers=5) as executor:
+            futures = [executor.submit(client.create_dataset, **{"name": f"dataset_{i}"}) for i in range(100)]
+        responses = list(as_completed(futures))
+        assert len(responses) == count, responses
+
+
+@pytest.mark.usefixtures("clear_datasets")
+class TestDatasetCreate:
+    @pytest.mark.p1
+    @given(name=valid_names())
+    @example("a" * 128)
+    @settings(max_examples=20)
+    def test_name(self, client, name):
+        dataset = client.create_dataset(**{"name": name})
+        assert dataset.name == name, str(dataset)
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "name, expected_message",
+        [
+            ("", "String should have at least 1 character"),
+            (" ", "String should have at least 1 character"),
+            ("a" * (DATASET_NAME_LIMIT + 1), "String should have at most 128 characters"),
+            (0, "not instance of"),
+            (None, "not instance of"),
+        ],
+        ids=["empty_name", "space_name", "too_long_name", "invalid_name", "None_name"],
+    )
+    def test_name_invalid(self, client, name, expected_message):
+        with pytest.raises(Exception) as excinfo:
+            client.create_dataset(**{"name": name})
+        assert expected_message in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p3
+    def test_name_duplicated(self, client):
+        name = "duplicated_name"
+        payload = {"name": name}
+        client.create_dataset(**payload)
+
+        with pytest.raises(Exception) as excinfo:
+            client.create_dataset(**payload)
+        assert str(excinfo.value) == f"Dataset name '{name}' already exists", str(excinfo.value)
+
+    @pytest.mark.p3
+    def test_name_case_insensitive(self, client):
+        name = "CaseInsensitive"
+        payload = {"name": name.upper()}
+        client.create_dataset(**payload)
+
+        payload = {"name": name.lower()}
+        with pytest.raises(Exception) as excinfo:
+            client.create_dataset(**payload)
+        assert str(excinfo.value) == f"Dataset name '{name.lower()}' already exists", str(excinfo.value)
+
+    @pytest.mark.p2
+    def test_avatar(self, client, tmp_path):
+        fn = create_image_file(tmp_path / "ragflow_test.png")
+        payload = {
+            "name": "avatar",
+            "avatar": f"data:image/png;base64,{encode_avatar(fn)}",
+        }
+        client.create_dataset(**payload)
+
+    @pytest.mark.p2
+    def test_avatar_exceeds_limit_length(self, client):
+        payload = {"name": "avatar_exceeds_limit_length", "avatar": "a" * 65536}
+        with pytest.raises(Exception) as excinfo:
+            client.create_dataset(**payload)
+        assert "String should have at most 65535 characters" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p3
+    @pytest.mark.parametrize(
+        "name, prefix, expected_message",
+        [
+            ("empty_prefix", "", "Missing MIME prefix. Expected format: data:<mime>;base64,<data>"),
+            ("missing_comma", "data:image/png;base64", "Missing MIME prefix. Expected format: data:<mime>;base64,<data>"),
+            ("unsupported_mine_type", "invalid_mine_prefix:image/png;base64,", "Invalid MIME prefix format. Must start with 'data:'"),
+            ("invalid_mine_type", "data:unsupported_mine_type;base64,", "Unsupported MIME type. Allowed: ['image/jpeg', 'image/png']"),
+        ],
+        ids=["empty_prefix", "missing_comma", "unsupported_mine_type", "invalid_mine_type"],
+    )
+    def test_avatar_invalid_prefix(self, client, tmp_path, name, prefix, expected_message):
+        fn = create_image_file(tmp_path / "ragflow_test.png")
+        payload = {
+            "name": name,
+            "avatar": f"{prefix}{encode_avatar(fn)}",
+        }
+        with pytest.raises(Exception) as excinfo:
+            client.create_dataset(**payload)
+        assert expected_message in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p3
+    def test_avatar_unset(self, client):
+        payload = {"name": "avatar_unset"}
+        dataset = client.create_dataset(**payload)
+        assert dataset.avatar is None, str(dataset)
+
+    @pytest.mark.p2
+    def test_description(self, client):
+        payload = {"name": "description", "description": "description"}
+        dataset = client.create_dataset(**payload)
+        assert dataset.description == "description", str(dataset)
+
+    @pytest.mark.p2
+    def test_description_exceeds_limit_length(self, client):
+        payload = {"name": "description_exceeds_limit_length", "description": "a" * 65536}
+        with pytest.raises(Exception) as excinfo:
+            client.create_dataset(**payload)
+        assert "String should have at most 65535 characters" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p3
+    def test_description_unset(self, client):
+        payload = {"name": "description_unset"}
+        dataset = client.create_dataset(**payload)
+        assert dataset.description is None, str(dataset)
+
+    @pytest.mark.p3
+    def test_description_none(self, client):
+        payload = {"name": "description_none", "description": None}
+        dataset = client.create_dataset(**payload)
+        assert dataset.description is None, str(dataset)
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "name, embedding_model",
+        [
+            ("BAAI/bge-large-zh-v1.5@BAAI", "BAAI/bge-large-zh-v1.5@BAAI"),
+            ("maidalun1020/bce-embedding-base_v1@Youdao", "maidalun1020/bce-embedding-base_v1@Youdao"),
+            ("embedding-3@ZHIPU-AI", "embedding-3@ZHIPU-AI"),
+        ],
+        ids=["builtin_baai", "builtin_youdao", "tenant_zhipu"],
+    )
+    def test_embedding_model(self, client, name, embedding_model):
+        payload = {"name": name, "embedding_model": embedding_model}
+        dataset = client.create_dataset(**payload)
+        assert dataset.embedding_model == embedding_model, str(dataset)
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "name, embedding_model",
+        [
+            ("unknown_llm_name", "unknown@ZHIPU-AI"),
+            ("unknown_llm_factory", "embedding-3@unknown"),
+            ("tenant_no_auth_default_tenant_llm", "text-embedding-v3@Tongyi-Qianwen"),
+            ("tenant_no_auth", "text-embedding-3-small@OpenAI"),
+        ],
+        ids=["unknown_llm_name", "unknown_llm_factory", "tenant_no_auth_default_tenant_llm", "tenant_no_auth"],
+    )
+    def test_embedding_model_invalid(self, client, name, embedding_model):
+        payload = {"name": name, "embedding_model": embedding_model}
+        with pytest.raises(Exception) as excinfo:
+            client.create_dataset(**payload)
+        if "tenant_no_auth" in name:
+            assert str(excinfo.value) == f"Unauthorized model: <{embedding_model}>", str(excinfo.value)
+        else:
+            assert str(excinfo.value) == f"Unsupported model: <{embedding_model}>", str(excinfo.value)
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "name, embedding_model",
+        [
+            ("missing_at", "BAAI/bge-large-zh-v1.5BAAI"),
+            ("missing_model_name", "@BAAI"),
+            ("missing_provider", "BAAI/bge-large-zh-v1.5@"),
+            ("whitespace_only_model_name", " @BAAI"),
+            ("whitespace_only_provider", "BAAI/bge-large-zh-v1.5@ "),
+        ],
+        ids=["missing_at", "empty_model_name", "empty_provider", "whitespace_only_model_name", "whitespace_only_provider"],
+    )
+    def test_embedding_model_format(self, client, name, embedding_model):
+        payload = {"name": name, "embedding_model": embedding_model}
+        with pytest.raises(Exception) as excinfo:
+            client.create_dataset(**payload)
+        if name == "missing_at":
+            assert "Embedding model identifier must follow <model_name>@<provider> format" in str(excinfo.value), str(excinfo.value)
+        else:
+            assert "Both model_name and provider must be non-empty strings" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p2
+    def test_embedding_model_unset(self, client):
+        payload = {"name": "embedding_model_unset"}
+        dataset = client.create_dataset(**payload)
+        assert dataset.embedding_model == "BAAI/bge-large-zh-v1.5@BAAI", str(dataset)
+
+    @pytest.mark.p2
+    def test_embedding_model_none(self, client):
+        payload = {"name": "embedding_model_none", "embedding_model": None}
+        with pytest.raises(Exception) as excinfo:
+            client.create_dataset(**payload)
+        assert "Input should be a valid string" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "name, permission",
+        [
+            ("me", "me"),
+            ("team", "team"),
+            ("me_upercase", "ME"),
+            ("team_upercase", "TEAM"),
+            ("whitespace", " ME "),
+        ],
+        ids=["me", "team", "me_upercase", "team_upercase", "whitespace"],
+    )
+    def test_permission(self, client, name, permission):
+        payload = {"name": name, "permission": permission}
+        dataset = client.create_dataset(**payload)
+        assert dataset.permission == permission.lower().strip(), str(dataset)
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "name, permission",
+        [
+            ("empty", ""),
+            ("unknown", "unknown"),
+        ],
+        ids=["empty", "unknown"],
+    )
+    def test_permission_invalid(self, client, name, permission):
+        payload = {"name": name, "permission": permission}
+        with pytest.raises(Exception) as excinfo:
+            client.create_dataset(**payload)
+        assert "Input should be 'me' or 'team'" in str(excinfo.value)
+
+    @pytest.mark.p2
+    def test_permission_unset(self, client):
+        payload = {"name": "permission_unset"}
+        dataset = client.create_dataset(**payload)
+        assert dataset.permission == "me", str(dataset)
+
+    @pytest.mark.p3
+    def test_permission_none(self, client):
+        payload = {"name": "permission_none", "permission": None}
+        with pytest.raises(Exception) as excinfo:
+            client.create_dataset(**payload)
+        assert "not instance of" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "name, chunk_method",
+        [
+            ("naive", "naive"),
+            ("book", "book"),
+            ("email", "email"),
+            ("laws", "laws"),
+            ("manual", "manual"),
+            ("one", "one"),
+            ("paper", "paper"),
+            ("picture", "picture"),
+            ("presentation", "presentation"),
+            ("qa", "qa"),
+            ("table", "table"),
+            ("tag", "tag"),
+        ],
+        ids=["naive", "book", "email", "laws", "manual", "one", "paper", "picture", "presentation", "qa", "table", "tag"],
+    )
+    def test_chunk_method(self, client, name, chunk_method):
+        payload = {"name": name, "chunk_method": chunk_method}
+        dataset = client.create_dataset(**payload)
+        assert dataset.chunk_method == chunk_method, str(dataset)
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "name, chunk_method",
+        [
+            ("empty", ""),
+            ("unknown", "unknown"),
+        ],
+        ids=["empty", "unknown"],
+    )
+    def test_chunk_method_invalid(self, client, name, chunk_method):
+        payload = {"name": name, "chunk_method": chunk_method}
+        with pytest.raises(Exception) as excinfo:
+            client.create_dataset(**payload)
+        assert "Input should be 'naive', 'book', 'email', 'laws', 'manual', 'one', 'paper', 'picture', 'presentation', 'qa', 'table' or 'tag'" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p2
+    def test_chunk_method_unset(self, client):
+        payload = {"name": "chunk_method_unset"}
+        dataset = client.create_dataset(**payload)
+        assert dataset.chunk_method == "naive", str(dataset)
+
+    @pytest.mark.p3
+    def test_chunk_method_none(self, client):
+        payload = {"name": "chunk_method_none", "chunk_method": None}
+        with pytest.raises(Exception) as excinfo:
+            client.create_dataset(**payload)
+        assert "not instance of" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "name, pagerank",
+        [
+            ("pagerank_min", 0),
+            ("pagerank_mid", 50),
+            ("pagerank_max", 100),
+        ],
+        ids=["min", "mid", "max"],
+    )
+    def test_pagerank(self, client, name, pagerank):
+        payload = {"name": name, "pagerank": pagerank}
+        dataset = client.create_dataset(**payload)
+        assert dataset.pagerank == pagerank, str(dataset)
+
+    @pytest.mark.p3
+    @pytest.mark.parametrize(
+        "name, pagerank, expected_message",
+        [
+            ("pagerank_min_limit", -1, "Input should be greater than or equal to 0"),
+            ("pagerank_max_limit", 101, "Input should be less than or equal to 100"),
+        ],
+        ids=["min_limit", "max_limit"],
+    )
+    def test_pagerank_invalid(self, client, name, pagerank, expected_message):
+        payload = {"name": name, "pagerank": pagerank}
+        with pytest.raises(Exception) as excinfo:
+            client.create_dataset(**payload)
+        assert expected_message in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p3
+    def test_pagerank_unset(self, client):
+        payload = {"name": "pagerank_unset"}
+        dataset = client.create_dataset(**payload)
+        assert dataset.pagerank == 0, str(dataset)
+
+    @pytest.mark.p3
+    def test_pagerank_none(self, client):
+        payload = {"name": "pagerank_unset", "pagerank": None}
+        with pytest.raises(Exception) as excinfo:
+            client.create_dataset(**payload)
+        assert "not instance of" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "name, parser_config",
+        [
+            ("auto_keywords_min", {"auto_keywords": 0}),
+            ("auto_keywords_mid", {"auto_keywords": 16}),
+            ("auto_keywords_max", {"auto_keywords": 32}),
+            ("auto_questions_min", {"auto_questions": 0}),
+            ("auto_questions_mid", {"auto_questions": 5}),
+            ("auto_questions_max", {"auto_questions": 10}),
+            ("chunk_token_num_min", {"chunk_token_num": 1}),
+            ("chunk_token_num_mid", {"chunk_token_num": 1024}),
+            ("chunk_token_num_max", {"chunk_token_num": 2048}),
+            ("delimiter", {"delimiter": "\n"}),
+            ("delimiter_space", {"delimiter": " "}),
+            ("html4excel_true", {"html4excel": True}),
+            ("html4excel_false", {"html4excel": False}),
+            ("layout_recognize_DeepDOC", {"layout_recognize": "DeepDOC"}),
+            ("layout_recognize_navie", {"layout_recognize": "Plain Text"}),
+            ("tag_kb_ids", {"tag_kb_ids": ["1", "2"]}),
+            ("topn_tags_min", {"topn_tags": 1}),
+            ("topn_tags_mid", {"topn_tags": 5}),
+            ("topn_tags_max", {"topn_tags": 10}),
+            ("filename_embd_weight_min", {"filename_embd_weight": 0.1}),
+            ("filename_embd_weight_mid", {"filename_embd_weight": 0.5}),
+            ("filename_embd_weight_max", {"filename_embd_weight": 1.0}),
+            ("task_page_size_min", {"task_page_size": 1}),
+            ("task_page_size_None", {"task_page_size": None}),
+            ("pages", {"pages": [[1, 100]]}),
+            ("pages_none", {"pages": None}),
+            ("graphrag_true", {"graphrag": {"use_graphrag": True}}),
+            ("graphrag_false", {"graphrag": {"use_graphrag": False}}),
+            ("graphrag_entity_types", {"graphrag": {"entity_types": ["age", "sex", "height", "weight"]}}),
+            ("graphrag_method_general", {"graphrag": {"method": "general"}}),
+            ("graphrag_method_light", {"graphrag": {"method": "light"}}),
+            ("graphrag_community_true", {"graphrag": {"community": True}}),
+            ("graphrag_community_false", {"graphrag": {"community": False}}),
+            ("graphrag_resolution_true", {"graphrag": {"resolution": True}}),
+            ("graphrag_resolution_false", {"graphrag": {"resolution": False}}),
+            ("raptor_true", {"raptor": {"use_raptor": True}}),
+            ("raptor_false", {"raptor": {"use_raptor": False}}),
+            ("raptor_prompt", {"raptor": {"prompt": "Who are you?"}}),
+            ("raptor_max_token_min", {"raptor": {"max_token": 1}}),
+            ("raptor_max_token_mid", {"raptor": {"max_token": 1024}}),
+            ("raptor_max_token_max", {"raptor": {"max_token": 2048}}),
+            ("raptor_threshold_min", {"raptor": {"threshold": 0.0}}),
+            ("raptor_threshold_mid", {"raptor": {"threshold": 0.5}}),
+            ("raptor_threshold_max", {"raptor": {"threshold": 1.0}}),
+            ("raptor_max_cluster_min", {"raptor": {"max_cluster": 1}}),
+            ("raptor_max_cluster_mid", {"raptor": {"max_cluster": 512}}),
+            ("raptor_max_cluster_max", {"raptor": {"max_cluster": 1024}}),
+            ("raptor_random_seed_min", {"raptor": {"random_seed": 0}}),
+        ],
+        ids=[
+            "auto_keywords_min",
+            "auto_keywords_mid",
+            "auto_keywords_max",
+            "auto_questions_min",
+            "auto_questions_mid",
+            "auto_questions_max",
+            "chunk_token_num_min",
+            "chunk_token_num_mid",
+            "chunk_token_num_max",
+            "delimiter",
+            "delimiter_space",
+            "html4excel_true",
+            "html4excel_false",
+            "layout_recognize_DeepDOC",
+            "layout_recognize_navie",
+            "tag_kb_ids",
+            "topn_tags_min",
+            "topn_tags_mid",
+            "topn_tags_max",
+            "filename_embd_weight_min",
+            "filename_embd_weight_mid",
+            "filename_embd_weight_max",
+            "task_page_size_min",
+            "task_page_size_None",
+            "pages",
+            "pages_none",
+            "graphrag_true",
+            "graphrag_false",
+            "graphrag_entity_types",
+            "graphrag_method_general",
+            "graphrag_method_light",
+            "graphrag_community_true",
+            "graphrag_community_false",
+            "graphrag_resolution_true",
+            "graphrag_resolution_false",
+            "raptor_true",
+            "raptor_false",
+            "raptor_prompt",
+            "raptor_max_token_min",
+            "raptor_max_token_mid",
+            "raptor_max_token_max",
+            "raptor_threshold_min",
+            "raptor_threshold_mid",
+            "raptor_threshold_max",
+            "raptor_max_cluster_min",
+            "raptor_max_cluster_mid",
+            "raptor_max_cluster_max",
+            "raptor_random_seed_min",
+        ],
+    )
+    def test_parser_config(self, client, name, parser_config):
+        parser_config_o = DataSet.ParserConfig(client, parser_config)
+        payload = {"name": name, "parser_config": parser_config_o}
+        dataset = client.create_dataset(**payload)
+        for k, v in parser_config.items():
+            if isinstance(v, dict):
+                for kk, vv in v.items():
+                    assert attrgetter(f"{k}.{kk}")(dataset.parser_config) == vv, str(dataset)
+            else:
+                assert attrgetter(k)(dataset.parser_config) == v, str(dataset)
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "name, parser_config, expected_message",
+        [
+            ("auto_keywords_min_limit", {"auto_keywords": -1}, "Input should be greater than or equal to 0"),
+            ("auto_keywords_max_limit", {"auto_keywords": 33}, "Input should be less than or equal to 32"),
+            ("auto_keywords_float_not_allowed", {"auto_keywords": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
+            ("auto_keywords_type_invalid", {"auto_keywords": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
+            ("auto_questions_min_limit", {"auto_questions": -1}, "Input should be greater than or equal to 0"),
+            ("auto_questions_max_limit", {"auto_questions": 11}, "Input should be less than or equal to 10"),
+            ("auto_questions_float_not_allowed", {"auto_questions": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
+            ("auto_questions_type_invalid", {"auto_questions": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
+            ("chunk_token_num_min_limit", {"chunk_token_num": 0}, "Input should be greater than or equal to 1"),
+            ("chunk_token_num_max_limit", {"chunk_token_num": 2049}, "Input should be less than or equal to 2048"),
+            ("chunk_token_num_float_not_allowed", {"chunk_token_num": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
+            ("chunk_token_num_type_invalid", {"chunk_token_num": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
+            ("delimiter_empty", {"delimiter": ""}, "String should have at least 1 character"),
+            ("html4excel_type_invalid", {"html4excel": "string"}, "Input should be a valid boolean, unable to interpret input"),
+            ("tag_kb_ids_not_list", {"tag_kb_ids": "1,2"}, "Input should be a valid list"),
+            ("tag_kb_ids_int_in_list", {"tag_kb_ids": [1, 2]}, "Input should be a valid string"),
+            ("topn_tags_min_limit", {"topn_tags": 0}, "Input should be greater than or equal to 1"),
+            ("topn_tags_max_limit", {"topn_tags": 11}, "Input should be less than or equal to 10"),
+            ("topn_tags_float_not_allowed", {"topn_tags": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
+            ("topn_tags_type_invalid", {"topn_tags": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
+            ("filename_embd_weight_min_limit", {"filename_embd_weight": -1}, "Input should be greater than or equal to 0"),
+            ("filename_embd_weight_max_limit", {"filename_embd_weight": 1.1}, "Input should be less than or equal to 1"),
+            ("filename_embd_weight_type_invalid", {"filename_embd_weight": "string"}, "Input should be a valid number, unable to parse string as a number"),
+            ("task_page_size_min_limit", {"task_page_size": 0}, "Input should be greater than or equal to 1"),
+            ("task_page_size_float_not_allowed", {"task_page_size": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
+            ("task_page_size_type_invalid", {"task_page_size": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
+            ("pages_not_list", {"pages": "1,2"}, "Input should be a valid list"),
+            ("pages_not_list_in_list", {"pages": ["1,2"]}, "Input should be a valid list"),
+            ("pages_not_int_list", {"pages": [["string1", "string2"]]}, "Input should be a valid integer, unable to parse string as an integer"),
+            ("graphrag_type_invalid", {"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean, unable to interpret input"),
+            ("graphrag_entity_types_not_list", {"graphrag": {"entity_types": "1,2"}}, "Input should be a valid list"),
+            ("graphrag_entity_types_not_str_in_list", {"graphrag": {"entity_types": [1, 2]}}, "nput should be a valid string"),
+            ("graphrag_method_unknown", {"graphrag": {"method": "unknown"}}, "Input should be 'light' or 'general'"),
+            ("graphrag_method_none", {"graphrag": {"method": None}}, "Input should be 'light' or 'general'"),
+            ("graphrag_community_type_invalid", {"graphrag": {"community": "string"}}, "Input should be a valid boolean, unable to interpret input"),
+            ("graphrag_resolution_type_invalid", {"graphrag": {"resolution": "string"}}, "Input should be a valid boolean, unable to interpret input"),
+            ("raptor_type_invalid", {"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean, unable to interpret input"),
+            ("raptor_prompt_empty", {"raptor": {"prompt": ""}}, "String should have at least 1 character"),
+            ("raptor_prompt_space", {"raptor": {"prompt": " "}}, "String should have at least 1 character"),
+            ("raptor_max_token_min_limit", {"raptor": {"max_token": 0}}, "Input should be greater than or equal to 1"),
+            ("raptor_max_token_max_limit", {"raptor": {"max_token": 2049}}, "Input should be less than or equal to 2048"),
+            ("raptor_max_token_float_not_allowed", {"raptor": {"max_token": 3.14}}, "Input should be a valid integer, got a number with a fractional part"),
+            ("raptor_max_token_type_invalid", {"raptor": {"max_token": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
+            ("raptor_threshold_min_limit", {"raptor": {"threshold": -0.1}}, "Input should be greater than or equal to 0"),
+            ("raptor_threshold_max_limit", {"raptor": {"threshold": 1.1}}, "Input should be less than or equal to 1"),
+            ("raptor_threshold_type_invalid", {"raptor": {"threshold": "string"}}, "Input should be a valid number, unable to parse string as a number"),
+            ("raptor_max_cluster_min_limit", {"raptor": {"max_cluster": 0}}, "Input should be greater than or equal to 1"),
+            ("raptor_max_cluster_max_limit", {"raptor": {"max_cluster": 1025}}, "Input should be less than or equal to 1024"),
+            ("raptor_max_cluster_float_not_allowed", {"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer, got a number with a fractional par"),
+            ("raptor_max_cluster_type_invalid", {"raptor": {"max_cluster": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
+            ("raptor_random_seed_min_limit", {"raptor": {"random_seed": -1}}, "Input should be greater than or equal to 0"),
+            ("raptor_random_seed_float_not_allowed", {"raptor": {"random_seed": 3.14}}, "Input should be a valid integer, got a number with a fractional part"),
+            ("raptor_random_seed_type_invalid", {"raptor": {"random_seed": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
+            ("parser_config_type_invalid", {"delimiter": "a" * 65536}, "Parser config exceeds size limit (max 65,535 characters)"),
+        ],
+        ids=[
+            "auto_keywords_min_limit",
+            "auto_keywords_max_limit",
+            "auto_keywords_float_not_allowed",
+            "auto_keywords_type_invalid",
+            "auto_questions_min_limit",
+            "auto_questions_max_limit",
+            "auto_questions_float_not_allowed",
+            "auto_questions_type_invalid",
+            "chunk_token_num_min_limit",
+            "chunk_token_num_max_limit",
+            "chunk_token_num_float_not_allowed",
+            "chunk_token_num_type_invalid",
+            "delimiter_empty",
+            "html4excel_type_invalid",
+            "tag_kb_ids_not_list",
+            "tag_kb_ids_int_in_list",
+            "topn_tags_min_limit",
+            "topn_tags_max_limit",
+            "topn_tags_float_not_allowed",
+            "topn_tags_type_invalid",
+            "filename_embd_weight_min_limit",
+            "filename_embd_weight_max_limit",
+            "filename_embd_weight_type_invalid",
+            "task_page_size_min_limit",
+            "task_page_size_float_not_allowed",
+            "task_page_size_type_invalid",
+            "pages_not_list",
+            "pages_not_list_in_list",
+            "pages_not_int_list",
+            "graphrag_type_invalid",
+            "graphrag_entity_types_not_list",
+            "graphrag_entity_types_not_str_in_list",
+            "graphrag_method_unknown",
+            "graphrag_method_none",
+            "graphrag_community_type_invalid",
+            "graphrag_resolution_type_invalid",
+            "raptor_type_invalid",
+            "raptor_prompt_empty",
+            "raptor_prompt_space",
+            "raptor_max_token_min_limit",
+            "raptor_max_token_max_limit",
+            "raptor_max_token_float_not_allowed",
+            "raptor_max_token_type_invalid",
+            "raptor_threshold_min_limit",
+            "raptor_threshold_max_limit",
+            "raptor_threshold_type_invalid",
+            "raptor_max_cluster_min_limit",
+            "raptor_max_cluster_max_limit",
+            "raptor_max_cluster_float_not_allowed",
+            "raptor_max_cluster_type_invalid",
+            "raptor_random_seed_min_limit",
+            "raptor_random_seed_float_not_allowed",
+            "raptor_random_seed_type_invalid",
+            "parser_config_type_invalid",
+        ],
+    )
+    def test_parser_config_invalid(self, client, name, parser_config, expected_message):
+        parser_config_o = DataSet.ParserConfig(client, parser_config)
+        payload = {"name": name, "parser_config": parser_config_o}
+        with pytest.raises(Exception) as excinfo:
+            client.create_dataset(**payload)
+        assert expected_message in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p2
+    def test_parser_config_empty(self, client):
+        excepted_value = DataSet.ParserConfig(
+            client,
+            {
+                "chunk_token_num": 128,
+                "delimiter": r"\n",
+                "html4excel": False,
+                "layout_recognize": "DeepDOC",
+                "raptor": {"use_raptor": False},
+            },
+        )
+        parser_config_o = DataSet.ParserConfig(client, {})
+        payload = {"name": "parser_config_empty", "parser_config": parser_config_o}
+        dataset = client.create_dataset(**payload)
+        assert str(dataset.parser_config) == str(excepted_value), str(dataset)
+
+    @pytest.mark.p2
+    def test_parser_config_unset(self, client):
+        excepted_value = DataSet.ParserConfig(
+            client,
+            {
+                "chunk_token_num": 128,
+                "delimiter": r"\n",
+                "html4excel": False,
+                "layout_recognize": "DeepDOC",
+                "raptor": {"use_raptor": False},
+            },
+        )
+        payload = {"name": "parser_config_unset"}
+        dataset = client.create_dataset(**payload)
+        assert str(dataset.parser_config) == str(excepted_value), str(dataset)
+
+    @pytest.mark.p3
+    def test_parser_config_none(self, client):
+        excepted_value = DataSet.ParserConfig(
+            client,
+            {
+                "chunk_token_num": 128,
+                "delimiter": r"\n",
+                "html4excel": False,
+                "layout_recognize": "DeepDOC",
+                "raptor": {"use_raptor": False},
+            },
+        )
+        payload = {"name": "parser_config_empty", "parser_config": None}
+        dataset = client.create_dataset(**payload)
+        assert str(dataset.parser_config) == str(excepted_value), str(dataset)
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "payload",
+        [
+            {"name": "id", "id": "id"},
+            {"name": "tenant_id", "tenant_id": "e57c1966f99211efb41e9e45646e0111"},
+            {"name": "created_by", "created_by": "created_by"},
+            {"name": "create_date", "create_date": "Tue, 11 Mar 2025 13:37:23 GMT"},
+            {"name": "create_time", "create_time": 1741671443322},
+            {"name": "update_date", "update_date": "Tue, 11 Mar 2025 13:37:23 GMT"},
+            {"name": "update_time", "update_time": 1741671443339},
+            {"name": "document_count", "document_count": 1},
+            {"name": "chunk_count", "chunk_count": 1},
+            {"name": "token_num", "token_num": 1},
+            {"name": "status", "status": "1"},
+            {"name": "unknown_field", "unknown_field": "unknown_field"},
+        ],
+    )
+    def test_unsupported_field(self, client, payload):
+        with pytest.raises(Exception) as excinfo:
+            client.create_dataset(**payload)
+        assert "got an unexpected keyword argument" in str(excinfo.value), str(excinfo.value)
--- a/test/testcases/test_sdk_api/test_dataset_mangement/test_delete_datasets.py
+++ b/test/testcases/test_sdk_api/test_dataset_mangement/test_delete_datasets.py
@ -0,0 +1,178 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import uuid
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+import pytest
+from common import batch_create_datasets
+from configs import HOST_ADDRESS, INVALID_API_TOKEN
+from ragflow_sdk import RAGFlow
+
+
+class TestAuthorization:
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "invalid_auth, expected_message",
+        [
+            (None, "Authentication error: API key is invalid!"),
+            (INVALID_API_TOKEN, "Authentication error: API key is invalid!"),
+        ],
+    )
+    def test_auth_invalid(self, invalid_auth, expected_message):
+        client = RAGFlow(invalid_auth, HOST_ADDRESS)
+        with pytest.raises(Exception) as excinfo:
+            client.delete_datasets()
+        assert str(excinfo.value) == expected_message
+
+
+class TestCapability:
+    @pytest.mark.p3
+    def test_delete_dataset_1k(self, client):
+        datasets = batch_create_datasets(client, 1_000)
+        client.delete_datasets(**{"ids": [dataset.id for dataset in datasets]})
+
+        datasets = client.list_datasets()
+        assert len(datasets) == 0, datasets
+
+    @pytest.mark.p3
+    def test_concurrent_deletion(self, client):
+        count = 1_000
+        datasets = batch_create_datasets(client, count)
+        with ThreadPoolExecutor(max_workers=5) as executor:
+            futures = [executor.submit(client.delete_datasets, **{"ids": [dataset.id for dataset in datasets][i : i + 1]}) for i in range(count)]
+        responses = list(as_completed(futures))
+        assert len(responses) == count, responses
+
+        datasets = client.list_datasets()
+        assert len(datasets) == 0, datasets
+
+
+class TestDatasetsDelete:
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "func, remaining",
+        [
+            (lambda r: {"ids": r[:1]}, 2),
+            (lambda r: {"ids": r}, 0),
+        ],
+        ids=["single_dataset", "multiple_datasets"],
+    )
+    def test_ids(self, client, add_datasets_func, func, remaining):
+        if callable(func):
+            payload = func([dataset.id for dataset in add_datasets_func])
+        client.delete_datasets(**payload)
+
+        datasets = client.list_datasets()
+        assert len(datasets) == remaining, str(datasets)
+
+    @pytest.mark.p1
+    @pytest.mark.usefixtures("add_dataset_func")
+    def test_ids_empty(self, client):
+        payload = {"ids": []}
+        client.delete_datasets(**payload)
+
+        datasets = client.list_datasets()
+        assert len(datasets) == 1, str(datasets)
+
+    @pytest.mark.p1
+    @pytest.mark.usefixtures("add_datasets_func")
+    def test_ids_none(self, client):
+        payload = {"ids": None}
+        client.delete_datasets(**payload)
+
+        datasets = client.list_datasets()
+        assert len(datasets) == 0, str(datasets)
+
+    @pytest.mark.p2
+    @pytest.mark.usefixtures("add_dataset_func")
+    def test_id_not_uuid(self, client):
+        payload = {"ids": ["not_uuid"]}
+        with pytest.raises(Exception) as excinfo:
+            client.delete_datasets(**payload)
+        assert "Invalid UUID1 format" in str(excinfo.value), str(excinfo.value)
+
+        datasets = client.list_datasets()
+        assert len(datasets) == 1, str(datasets)
+
+    @pytest.mark.p3
+    @pytest.mark.usefixtures("add_dataset_func")
+    def test_id_not_uuid1(self, client):
+        payload = {"ids": [uuid.uuid4().hex]}
+        with pytest.raises(Exception) as excinfo:
+            client.delete_datasets(**payload)
+        assert "Invalid UUID1 format" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p2
+    @pytest.mark.usefixtures("add_dataset_func")
+    def test_id_wrong_uuid(self, client):
+        payload = {"ids": ["d94a8dc02c9711f0930f7fbc369eab6d"]}
+        with pytest.raises(Exception) as excinfo:
+            client.delete_datasets(**payload)
+        assert "lacks permission for dataset" in str(excinfo.value), str(excinfo.value)
+
+        datasets = client.list_datasets()
+        assert len(datasets) == 1, str(datasets)
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "func",
+        [
+            lambda r: {"ids": ["d94a8dc02c9711f0930f7fbc369eab6d"] + r},
+            lambda r: {"ids": r[:1] + ["d94a8dc02c9711f0930f7fbc369eab6d"] + r[1:3]},
+            lambda r: {"ids": r + ["d94a8dc02c9711f0930f7fbc369eab6d"]},
+        ],
+    )
+    def test_ids_partial_invalid(self, client, add_datasets_func, func):
+        if callable(func):
+            payload = func([dataset.id for dataset in add_datasets_func])
+        with pytest.raises(Exception) as excinfo:
+            client.delete_datasets(**payload)
+        assert "lacks permission for dataset" in str(excinfo.value), str(excinfo.value)
+
+        datasets = client.list_datasets()
+        assert len(datasets) == 3, str(datasets)
+
+    @pytest.mark.p2
+    def test_ids_duplicate(self, client, add_datasets_func):
+        dataset_ids = [dataset.id for dataset in add_datasets_func]
+        payload = {"ids": dataset_ids + dataset_ids}
+        with pytest.raises(Exception) as excinfo:
+            client.delete_datasets(**payload)
+        assert "Duplicate ids:" in str(excinfo.value), str(excinfo.value)
+
+        datasets = client.list_datasets()
+        assert len(datasets) == 3, str(datasets)
+
+    @pytest.mark.p2
+    def test_repeated_delete(self, client, add_datasets_func):
+        dataset_ids = [dataset.id for dataset in add_datasets_func]
+        payload = {"ids": dataset_ids}
+        client.delete_datasets(**payload)
+
+        with pytest.raises(Exception) as excinfo:
+            client.delete_datasets(**payload)
+        assert "lacks permission for dataset" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p2
+    @pytest.mark.usefixtures("add_dataset_func")
+    def test_field_unsupported(self, client):
+        payload = {"unknown_field": "unknown_field"}
+        with pytest.raises(Exception) as excinfo:
+            client.delete_datasets(**payload)
+        assert "got an unexpected keyword argument 'unknown_field'" in str(excinfo.value), str(excinfo.value)
+
+        datasets = client.list_datasets()
+        assert len(datasets) == 1, str(datasets)
--- a/test/testcases/test_sdk_api/test_dataset_mangement/test_list_datasets.py
+++ b/test/testcases/test_sdk_api/test_dataset_mangement/test_list_datasets.py
@ -0,0 +1,313 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import uuid
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+import pytest
+from configs import HOST_ADDRESS, INVALID_API_TOKEN
+from ragflow_sdk import RAGFlow
+
+
+class TestAuthorization:
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "invalid_auth, expected_message",
+        [
+            (None, "Authentication error: API key is invalid!"),
+            (INVALID_API_TOKEN, "Authentication error: API key is invalid!"),
+        ],
+    )
+    def test_auth_invalid(self, invalid_auth, expected_message):
+        client = RAGFlow(invalid_auth, HOST_ADDRESS)
+        with pytest.raises(Exception) as excinfo:
+            client.list_datasets()
+        assert expected_message in str(excinfo.value)
+
+
+class TestCapability:
+    @pytest.mark.p3
+    def test_concurrent_list(self, client):
+        count = 100
+        with ThreadPoolExecutor(max_workers=5) as executor:
+            futures = [
+                executor.submit(
+                    client.list_datasets,
+                )
+                for i in range(count)
+            ]
+        responses = list(as_completed(futures))
+        assert len(responses) == count, responses
+
+
+@pytest.mark.usefixtures("add_datasets")
+class TestDatasetsList:
+    @pytest.mark.p1
+    def test_params_unset(self, client):
+        datasets = client.list_datasets()
+        assert len(datasets) == 5, str(datasets)
+
+    @pytest.mark.p2
+    def test_params_empty(self, client):
+        datasets = client.list_datasets(**{})
+        assert len(datasets) == 5, str(datasets)
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "params, expected_page_size",
+        [
+            ({"page": 2, "page_size": 2}, 2),
+            ({"page": 3, "page_size": 2}, 1),
+            ({"page": 4, "page_size": 2}, 0),
+            ({"page": 1, "page_size": 10}, 5),
+        ],
+        ids=["normal_middle_page", "normal_last_partial_page", "beyond_max_page", "full_data_single_page"],
+    )
+    def test_page(self, client, params, expected_page_size):
+        datasets = client.list_datasets(**params)
+        assert len(datasets) == expected_page_size, str(datasets)
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "params, expected_message",
+        [
+            ({"page": 0}, "Input should be greater than or equal to 1"),
+            ({"page": "a"}, "not instance of"),
+        ],
+        ids=["page_0", "page_a"],
+    )
+    def test_page_invalid(self, client, params, expected_message):
+        with pytest.raises(Exception) as excinfo:
+            client.list_datasets(**params)
+        assert expected_message in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p2
+    def test_page_none(self, client):
+        params = {"page": None}
+        with pytest.raises(Exception) as excinfo:
+            client.list_datasets(**params)
+        assert "not instance of" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "params, expected_page_size",
+        [
+            ({"page_size": 1}, 1),
+            ({"page_size": 3}, 3),
+            ({"page_size": 5}, 5),
+            ({"page_size": 6}, 5),
+        ],
+        ids=["min_valid_page_size", "medium_page_size", "page_size_equals_total", "page_size_exceeds_total"],
+    )
+    def test_page_size(self, client, params, expected_page_size):
+        datasets = client.list_datasets(**params)
+        assert len(datasets) == expected_page_size, str(datasets)
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "params, expected_message",
+        [
+            ({"page_size": 0}, "Input should be greater than or equal to 1"),
+            ({"page_size": "a"}, "not instance of"),
+        ],
+    )
+    def test_page_size_invalid(self, client, params, expected_message):
+        with pytest.raises(Exception) as excinfo:
+            client.list_datasets(**params)
+        assert expected_message in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p2
+    def test_page_size_none(self, client):
+        params = {"page_size": None}
+        with pytest.raises(Exception) as excinfo:
+            client.list_datasets(**params)
+        assert "not instance of" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "params",
+        [
+            {"orderby": "create_time"},
+            {"orderby": "update_time"},
+            {"orderby": "CREATE_TIME"},
+            {"orderby": "UPDATE_TIME"},
+            {"orderby": " create_time "},
+        ],
+        ids=["orderby_create_time", "orderby_update_time", "orderby_create_time_upper", "orderby_update_time_upper", "whitespace"],
+    )
+    def test_orderby(self, client, params):
+        client.list_datasets(**params)
+
+    @pytest.mark.p3
+    @pytest.mark.parametrize(
+        "params",
+        [
+            {"orderby": ""},
+            {"orderby": "unknown"},
+        ],
+        ids=["empty", "unknown"],
+    )
+    def test_orderby_invalid(self, client, params):
+        with pytest.raises(Exception) as excinfo:
+            client.list_datasets(**params)
+        assert "Input should be 'create_time' or 'update_time'" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p3
+    def test_orderby_none(self, client):
+        params = {"orderby": None}
+        with pytest.raises(Exception) as excinfo:
+            client.list_datasets(**params)
+        assert "not instance of" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "params",
+        [
+            {"desc": True},
+            {"desc": False},
+        ],
+        ids=["desc=True", "desc=False"],
+    )
+    def test_desc(self, client, params):
+        client.list_datasets(**params)
+
+    @pytest.mark.p3
+    @pytest.mark.parametrize(
+        "params",
+        [
+            {"desc": 3.14},
+            {"desc": "unknown"},
+        ],
+        ids=["float_value", "invalid_string"],
+    )
+    def test_desc_invalid(self, client, params):
+        with pytest.raises(Exception) as excinfo:
+            client.list_datasets(**params)
+        assert "not instance of" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p3
+    def test_desc_none(self, client):
+        params = {"desc": None}
+        with pytest.raises(Exception) as excinfo:
+            client.list_datasets(**params)
+        assert "not instance of" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p1
+    def test_name(self, client):
+        params = {"name": "dataset_1"}
+        datasets = client.list_datasets(**params)
+        assert len(datasets) == 1, str(datasets)
+        assert datasets[0].name == "dataset_1", str(datasets)
+
+    @pytest.mark.p2
+    def test_name_wrong(self, client):
+        params = {"name": "wrong name"}
+        with pytest.raises(Exception) as excinfo:
+            client.list_datasets(**params)
+        assert "lacks permission for dataset" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p2
+    def test_name_empty(self, client):
+        params = {"name": ""}
+        datasets = client.list_datasets(**params)
+        assert len(datasets) == 5, str(datasets)
+
+    @pytest.mark.p2
+    def test_name_none(self, client):
+        params = {"name": None}
+        datasets = client.list_datasets(**params)
+        assert len(datasets) == 5, str(datasets)
+
+    @pytest.mark.p1
+    def test_id(self, client, add_datasets):
+        dataset_ids = [dataset.id for dataset in add_datasets]
+        params = {"id": dataset_ids[0]}
+        datasets = client.list_datasets(**params)
+        assert len(datasets) == 1, str(datasets)
+        assert datasets[0].id == dataset_ids[0], str(datasets)
+
+    @pytest.mark.p2
+    def test_id_not_uuid(self, client):
+        params = {"id": "not_uuid"}
+        with pytest.raises(Exception) as excinfo:
+            client.list_datasets(**params)
+        assert "Invalid UUID1 format" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p2
+    def test_id_not_uuid1(self, client):
+        params = {"id": uuid.uuid4().hex}
+        with pytest.raises(Exception) as excinfo:
+            client.list_datasets(**params)
+        assert "Invalid UUID1 format" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p2
+    def test_id_wrong_uuid(self, client):
+        params = {"id": "d94a8dc02c9711f0930f7fbc369eab6d"}
+        with pytest.raises(Exception) as excinfo:
+            client.list_datasets(**params)
+        assert "lacks permission for dataset" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p2
+    def test_id_empty(self, client):
+        params = {"id": ""}
+        with pytest.raises(Exception) as excinfo:
+            client.list_datasets(**params)
+        assert "Invalid UUID1 format" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p2
+    def test_id_none(self, client):
+        params = {"id": None}
+        datasets = client.list_datasets(**params)
+        assert len(datasets) == 5, str(datasets)
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "func, name, expected_num",
+        [
+            (lambda r: r[0].id, "dataset_0", 1),
+            (lambda r: r[0].id, "dataset_1", 0),
+        ],
+        ids=["name_and_id_match", "name_and_id_mismatch"],
+    )
+    def test_name_and_id(self, client, add_datasets, func, name, expected_num):
+        if callable(func):
+            params = {"id": func(add_datasets), "name": name}
+        datasets = client.list_datasets(**params)
+        assert len(datasets) == expected_num, str(datasets)
+
+    @pytest.mark.p3
+    @pytest.mark.parametrize(
+        "dataset_id, name",
+        [
+            (lambda r: r[0].id, "wrong_name"),
+            (uuid.uuid1().hex, "dataset_0"),
+        ],
+        ids=["name", "id"],
+    )
+    def test_name_and_id_wrong(self, client, add_datasets, dataset_id, name):
+        if callable(dataset_id):
+            params = {"id": dataset_id(add_datasets), "name": name}
+        else:
+            params = {"id": dataset_id, "name": name}
+        with pytest.raises(Exception) as excinfo:
+            client.list_datasets(**params)
+        assert "lacks permission for dataset" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p2
+    def test_field_unsupported(self, client):
+        params = {"unknown_field": "unknown_field"}
+        with pytest.raises(Exception) as excinfo:
+            client.list_datasets(**params)
+        assert "got an unexpected keyword argument" in str(excinfo.value), str(excinfo.value)
--- a/test/testcases/test_sdk_api/test_dataset_mangement/test_update_dataset.py
+++ b/test/testcases/test_sdk_api/test_dataset_mangement/test_update_dataset.py
@ -0,0 +1,724 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from operator import attrgetter
+
+import pytest
+from configs import DATASET_NAME_LIMIT
+from hypothesis import HealthCheck, example, given, settings
+from ragflow_sdk import DataSet
+from utils import encode_avatar
+from utils.file_utils import create_image_file
+from utils.hypothesis_utils import valid_names
+
+
+class TestRquest:
+    @pytest.mark.p2
+    def test_payload_empty(self, add_dataset_func):
+        dataset = add_dataset_func
+        with pytest.raises(Exception) as excinfo:
+            dataset.update({})
+        assert "No properties were modified" in str(excinfo.value), str(excinfo.value)
+
+
+class TestCapability:
+    @pytest.mark.p3
+    def test_update_dateset_concurrent(self, add_dataset_func):
+        dataset = add_dataset_func
+        count = 100
+        with ThreadPoolExecutor(max_workers=5) as executor:
+            futures = [executor.submit(dataset.update, {"name": f"dataset_{i}"}) for i in range(count)]
+        responses = list(as_completed(futures))
+        assert len(responses) == count, responses
+
+
+class TestDatasetUpdate:
+    @pytest.mark.p1
+    @given(name=valid_names())
+    @example("a" * 128)
+    @settings(max_examples=20, suppress_health_check=[HealthCheck.function_scoped_fixture])
+    def test_name(self, client, add_dataset_func, name):
+        dataset = add_dataset_func
+        payload = {"name": name}
+        dataset.update(payload)
+        assert dataset.name == name, str(dataset)
+
+        retrieved_dataset = client.get_dataset(name=dataset.name)
+        assert retrieved_dataset.name == name, str(retrieved_dataset)
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "name, expected_message",
+        [
+            ("", "String should have at least 1 character"),
+            (" ", "String should have at least 1 character"),
+            ("a" * (DATASET_NAME_LIMIT + 1), "String should have at most 128 characters"),
+            (0, "Input should be a valid string"),
+            (None, "Input should be a valid string"),
+        ],
+        ids=["empty_name", "space_name", "too_long_name", "invalid_name", "None_name"],
+    )
+    def test_name_invalid(self, add_dataset_func, name, expected_message):
+        dataset = add_dataset_func
+        with pytest.raises(Exception) as excinfo:
+            dataset.update({"name": name})
+        assert expected_message in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p3
+    def test_name_duplicated(self, add_datasets_func):
+        datasets = add_datasets_func
+        name = "dataset_1"
+        with pytest.raises(Exception) as excinfo:
+            datasets[0].update({"name": name})
+        assert f"Dataset name '{name}' already exists" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p3
+    def test_name_case_insensitive(self, add_datasets_func):
+        dataset = add_datasets_func[0]
+        name = "DATASET_1"
+        with pytest.raises(Exception) as excinfo:
+            dataset.update({"name": name})
+        assert f"Dataset name '{name}' already exists" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p2
+    def test_avatar(self, client, add_dataset_func, tmp_path):
+        dataset = add_dataset_func
+        fn = create_image_file(tmp_path / "ragflow_test.png")
+        avatar_data = f"data:image/png;base64,{encode_avatar(fn)}"
+        dataset.update({"avatar": avatar_data})
+        assert dataset.avatar == avatar_data, str(dataset)
+
+        retrieved_dataset = client.get_dataset(name=dataset.name)
+        assert retrieved_dataset.avatar == avatar_data, str(retrieved_dataset)
+
+    @pytest.mark.p2
+    def test_avatar_exceeds_limit_length(self, add_dataset_func):
+        dataset = add_dataset_func
+        with pytest.raises(Exception) as excinfo:
+            dataset.update({"avatar": "a" * 65536})
+        assert "String should have at most 65535 characters" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p3
+    @pytest.mark.parametrize(
+        "avatar_prefix, expected_message",
+        [
+            ("", "Missing MIME prefix. Expected format: data:<mime>;base64,<data>"),
+            ("data:image/png;base64", "Missing MIME prefix. Expected format: data:<mime>;base64,<data>"),
+            ("invalid_mine_prefix:image/png;base64,", "Invalid MIME prefix format. Must start with 'data:'"),
+            ("data:unsupported_mine_type;base64,", "Unsupported MIME type. Allowed: ['image/jpeg', 'image/png']"),
+        ],
+        ids=["empty_prefix", "missing_comma", "unsupported_mine_type", "invalid_mine_type"],
+    )
+    def test_avatar_invalid_prefix(self, add_dataset_func, tmp_path, avatar_prefix, expected_message):
+        dataset = add_dataset_func
+        fn = create_image_file(tmp_path / "ragflow_test.png")
+        with pytest.raises(Exception) as excinfo:
+            dataset.update({"avatar": f"{avatar_prefix}{encode_avatar(fn)}"})
+        assert expected_message in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p3
+    def test_avatar_none(self, client, add_dataset_func):
+        dataset = add_dataset_func
+        dataset.update({"avatar": None})
+        assert dataset.avatar is None, str(dataset)
+
+        retrieved_dataset = client.get_dataset(name=dataset.name)
+        assert retrieved_dataset.avatar is None, str(retrieved_dataset)
+
+    @pytest.mark.p2
+    def test_description(self, client, add_dataset_func):
+        dataset = add_dataset_func
+        dataset.update({"description": "description"})
+        assert dataset.description == "description", str(dataset)
+
+        retrieved_dataset = client.get_dataset(name=dataset.name)
+        assert retrieved_dataset.description == "description", str(retrieved_dataset)
+
+    @pytest.mark.p2
+    def test_description_exceeds_limit_length(self, add_dataset_func):
+        dataset = add_dataset_func
+        with pytest.raises(Exception) as excinfo:
+            dataset.update({"description": "a" * 65536})
+        assert "String should have at most 65535 characters" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p3
+    def test_description_none(self, client, add_dataset_func):
+        dataset = add_dataset_func
+        dataset.update({"description": None})
+        assert dataset.description is None, str(dataset)
+
+        retrieved_dataset = client.get_dataset(name=dataset.name)
+        assert retrieved_dataset.description is None, str(retrieved_dataset)
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "embedding_model",
+        [
+            "BAAI/bge-large-zh-v1.5@BAAI",
+            "maidalun1020/bce-embedding-base_v1@Youdao",
+            "embedding-3@ZHIPU-AI",
+        ],
+        ids=["builtin_baai", "builtin_youdao", "tenant_zhipu"],
+    )
+    def test_embedding_model(self, client, add_dataset_func, embedding_model):
+        dataset = add_dataset_func
+        dataset.update({"embedding_model": embedding_model})
+        assert dataset.embedding_model == embedding_model, str(dataset)
+
+        retrieved_dataset = client.get_dataset(name=dataset.name)
+        assert retrieved_dataset.embedding_model == embedding_model, str(retrieved_dataset)
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "name, embedding_model",
+        [
+            ("unknown_llm_name", "unknown@ZHIPU-AI"),
+            ("unknown_llm_factory", "embedding-3@unknown"),
+            ("tenant_no_auth_default_tenant_llm", "text-embedding-v3@Tongyi-Qianwen"),
+            ("tenant_no_auth", "text-embedding-3-small@OpenAI"),
+        ],
+        ids=["unknown_llm_name", "unknown_llm_factory", "tenant_no_auth_default_tenant_llm", "tenant_no_auth"],
+    )
+    def test_embedding_model_invalid(self, add_dataset_func, name, embedding_model):
+        dataset = add_dataset_func
+        with pytest.raises(Exception) as excinfo:
+            dataset.update({"name": name, "embedding_model": embedding_model})
+        error_msg = str(excinfo.value)
+        if "tenant_no_auth" in name:
+            assert error_msg == f"Unauthorized model: <{embedding_model}>", error_msg
+        else:
+            assert error_msg == f"Unsupported model: <{embedding_model}>", error_msg
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "name, embedding_model",
+        [
+            ("missing_at", "BAAI/bge-large-zh-v1.5BAAI"),
+            ("missing_model_name", "@BAAI"),
+            ("missing_provider", "BAAI/bge-large-zh-v1.5@"),
+            ("whitespace_only_model_name", " @BAAI"),
+            ("whitespace_only_provider", "BAAI/bge-large-zh-v1.5@ "),
+        ],
+        ids=["missing_at", "empty_model_name", "empty_provider", "whitespace_only_model_name", "whitespace_only_provider"],
+    )
+    def test_embedding_model_format(self, add_dataset_func, name, embedding_model):
+        dataset = add_dataset_func
+        with pytest.raises(Exception) as excinfo:
+            dataset.update({"name": name, "embedding_model": embedding_model})
+        error_msg = str(excinfo.value)
+        if name == "missing_at":
+            assert "Embedding model identifier must follow <model_name>@<provider> format" in error_msg, error_msg
+        else:
+            assert "Both model_name and provider must be non-empty strings" in error_msg, error_msg
+
+    @pytest.mark.p2
+    def test_embedding_model_none(self, add_dataset_func):
+        dataset = add_dataset_func
+        with pytest.raises(Exception) as excinfo:
+            dataset.update({"embedding_model": None})
+        assert "Input should be a valid string" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "permission",
+        [
+            "me",
+            "team",
+            "ME",
+            "TEAM",
+            " ME ",
+        ],
+        ids=["me", "team", "me_upercase", "team_upercase", "whitespace"],
+    )
+    def test_permission(self, client, add_dataset_func, permission):
+        dataset = add_dataset_func
+        dataset.update({"permission": permission})
+        assert dataset.permission == permission.lower().strip(), str(dataset)
+
+        retrieved_dataset = client.get_dataset(name=dataset.name)
+        assert retrieved_dataset.permission == permission.lower().strip(), str(retrieved_dataset)
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "permission",
+        [
+            "",
+            "unknown",
+            list(),
+        ],
+        ids=["empty", "unknown", "type_error"],
+    )
+    def test_permission_invalid(self, add_dataset_func, permission):
+        dataset = add_dataset_func
+        with pytest.raises(Exception) as excinfo:
+            dataset.update({"permission": permission})
+        assert "Input should be 'me' or 'team'" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p3
+    def test_permission_none(self, add_dataset_func):
+        dataset = add_dataset_func
+        with pytest.raises(Exception) as excinfo:
+            dataset.update({"permission": None})
+        assert "Input should be 'me' or 'team'" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "chunk_method",
+        [
+            "naive",
+            "book",
+            "email",
+            "laws",
+            "manual",
+            "one",
+            "paper",
+            "picture",
+            "presentation",
+            "qa",
+            "table",
+            "tag",
+        ],
+        ids=["naive", "book", "email", "laws", "manual", "one", "paper", "picture", "presentation", "qa", "table", "tag"],
+    )
+    def test_chunk_method(self, client, add_dataset_func, chunk_method):
+        dataset = add_dataset_func
+        dataset.update({"chunk_method": chunk_method})
+        assert dataset.chunk_method == chunk_method, str(dataset)
+
+        retrieved_dataset = client.get_dataset(name=dataset.name)
+        assert retrieved_dataset.chunk_method == chunk_method, str(retrieved_dataset)
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "chunk_method",
+        [
+            "",
+            "unknown",
+            list(),
+        ],
+        ids=["empty", "unknown", "type_error"],
+    )
+    def test_chunk_method_invalid(self, add_dataset_func, chunk_method):
+        dataset = add_dataset_func
+        with pytest.raises(Exception) as excinfo:
+            dataset.update({"chunk_method": chunk_method})
+        assert "Input should be 'naive', 'book', 'email', 'laws', 'manual', 'one', 'paper', 'picture', 'presentation', 'qa', 'table' or 'tag'" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p3
+    def test_chunk_method_none(self, add_dataset_func):
+        dataset = add_dataset_func
+        with pytest.raises(Exception) as excinfo:
+            dataset.update({"chunk_method": None})
+        assert "Input should be 'naive', 'book', 'email', 'laws', 'manual', 'one', 'paper', 'picture', 'presentation', 'qa', 'table' or 'tag'" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize("pagerank", [0, 50, 100], ids=["min", "mid", "max"])
+    def test_pagerank(self, client, add_dataset_func, pagerank):
+        dataset = add_dataset_func
+        dataset.update({"pagerank": pagerank})
+        assert dataset.pagerank == pagerank, str(dataset)
+
+        retrieved_dataset = client.get_dataset(name=dataset.name)
+        assert retrieved_dataset.pagerank == pagerank, str(retrieved_dataset)
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "pagerank, expected_message",
+        [
+            (-1, "Input should be greater than or equal to 0"),
+            (101, "Input should be less than or equal to 100"),
+        ],
+        ids=["min_limit", "max_limit"],
+    )
+    def test_pagerank_invalid(self, add_dataset_func, pagerank, expected_message):
+        dataset = add_dataset_func
+        with pytest.raises(Exception) as excinfo:
+            dataset.update({"pagerank": pagerank})
+        assert expected_message in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p3
+    def test_pagerank_none(self, add_dataset_func):
+        dataset = add_dataset_func
+        with pytest.raises(Exception) as excinfo:
+            dataset.update({"pagerank": None})
+        assert "Input should be a valid integer" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "parser_config",
+        [
+            {"auto_keywords": 0},
+            {"auto_keywords": 16},
+            {"auto_keywords": 32},
+            {"auto_questions": 0},
+            {"auto_questions": 5},
+            {"auto_questions": 10},
+            {"chunk_token_num": 1},
+            {"chunk_token_num": 1024},
+            {"chunk_token_num": 2048},
+            {"delimiter": "\n"},
+            {"delimiter": " "},
+            {"html4excel": True},
+            {"html4excel": False},
+            {"layout_recognize": "DeepDOC"},
+            {"layout_recognize": "Plain Text"},
+            {"tag_kb_ids": ["1", "2"]},
+            {"topn_tags": 1},
+            {"topn_tags": 5},
+            {"topn_tags": 10},
+            {"filename_embd_weight": 0.1},
+            {"filename_embd_weight": 0.5},
+            {"filename_embd_weight": 1.0},
+            {"task_page_size": 1},
+            {"task_page_size": None},
+            {"pages": [[1, 100]]},
+            {"pages": None},
+            {"graphrag": {"use_graphrag": True}},
+            {"graphrag": {"use_graphrag": False}},
+            {"graphrag": {"entity_types": ["age", "sex", "height", "weight"]}},
+            {"graphrag": {"method": "general"}},
+            {"graphrag": {"method": "light"}},
+            {"graphrag": {"community": True}},
+            {"graphrag": {"community": False}},
+            {"graphrag": {"resolution": True}},
+            {"graphrag": {"resolution": False}},
+            {"raptor": {"use_raptor": True}},
+            {"raptor": {"use_raptor": False}},
+            {"raptor": {"prompt": "Who are you?"}},
+            {"raptor": {"max_token": 1}},
+            {"raptor": {"max_token": 1024}},
+            {"raptor": {"max_token": 2048}},
+            {"raptor": {"threshold": 0.0}},
+            {"raptor": {"threshold": 0.5}},
+            {"raptor": {"threshold": 1.0}},
+            {"raptor": {"max_cluster": 1}},
+            {"raptor": {"max_cluster": 512}},
+            {"raptor": {"max_cluster": 1024}},
+            {"raptor": {"random_seed": 0}},
+        ],
+        ids=[
+            "auto_keywords_min",
+            "auto_keywords_mid",
+            "auto_keywords_max",
+            "auto_questions_min",
+            "auto_questions_mid",
+            "auto_questions_max",
+            "chunk_token_num_min",
+            "chunk_token_num_mid",
+            "chunk_token_num_max",
+            "delimiter",
+            "delimiter_space",
+            "html4excel_true",
+            "html4excel_false",
+            "layout_recognize_DeepDOC",
+            "layout_recognize_navie",
+            "tag_kb_ids",
+            "topn_tags_min",
+            "topn_tags_mid",
+            "topn_tags_max",
+            "filename_embd_weight_min",
+            "filename_embd_weight_mid",
+            "filename_embd_weight_max",
+            "task_page_size_min",
+            "task_page_size_None",
+            "pages",
+            "pages_none",
+            "graphrag_true",
+            "graphrag_false",
+            "graphrag_entity_types",
+            "graphrag_method_general",
+            "graphrag_method_light",
+            "graphrag_community_true",
+            "graphrag_community_false",
+            "graphrag_resolution_true",
+            "graphrag_resolution_false",
+            "raptor_true",
+            "raptor_false",
+            "raptor_prompt",
+            "raptor_max_token_min",
+            "raptor_max_token_mid",
+            "raptor_max_token_max",
+            "raptor_threshold_min",
+            "raptor_threshold_mid",
+            "raptor_threshold_max",
+            "raptor_max_cluster_min",
+            "raptor_max_cluster_mid",
+            "raptor_max_cluster_max",
+            "raptor_random_seed_min",
+        ],
+    )
+    def test_parser_config(self, client, add_dataset_func, parser_config):
+        dataset = add_dataset_func
+        dataset.update({"parser_config": parser_config})
+        for k, v in parser_config.items():
+            if isinstance(v, dict):
+                for kk, vv in v.items():
+                    assert attrgetter(f"{k}.{kk}")(dataset.parser_config) == vv, str(dataset)
+            else:
+                assert attrgetter(k)(dataset.parser_config) == v, str(dataset)
+
+        retrieved_dataset = client.get_dataset(name=dataset.name)
+        for k, v in parser_config.items():
+            if isinstance(v, dict):
+                for kk, vv in v.items():
+                    assert attrgetter(f"{k}.{kk}")(retrieved_dataset.parser_config) == vv, str(retrieved_dataset)
+            else:
+                assert attrgetter(k)(retrieved_dataset.parser_config) == v, str(retrieved_dataset)
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "parser_config, expected_message",
+        [
+            ({"auto_keywords": -1}, "Input should be greater than or equal to 0"),
+            ({"auto_keywords": 33}, "Input should be less than or equal to 32"),
+            ({"auto_keywords": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
+            ({"auto_keywords": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
+            ({"auto_questions": -1}, "Input should be greater than or equal to 0"),
+            ({"auto_questions": 11}, "Input should be less than or equal to 10"),
+            ({"auto_questions": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
+            ({"auto_questions": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
+            ({"chunk_token_num": 0}, "Input should be greater than or equal to 1"),
+            ({"chunk_token_num": 2049}, "Input should be less than or equal to 2048"),
+            ({"chunk_token_num": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
+            ({"chunk_token_num": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
+            ({"delimiter": ""}, "String should have at least 1 character"),
+            ({"html4excel": "string"}, "Input should be a valid boolean, unable to interpret input"),
+            ({"tag_kb_ids": "1,2"}, "Input should be a valid list"),
+            ({"tag_kb_ids": [1, 2]}, "Input should be a valid string"),
+            ({"topn_tags": 0}, "Input should be greater than or equal to 1"),
+            ({"topn_tags": 11}, "Input should be less than or equal to 10"),
+            ({"topn_tags": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
+            ({"topn_tags": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
+            ({"filename_embd_weight": -1}, "Input should be greater than or equal to 0"),
+            ({"filename_embd_weight": 1.1}, "Input should be less than or equal to 1"),
+            ({"filename_embd_weight": "string"}, "Input should be a valid number, unable to parse string as a number"),
+            ({"task_page_size": 0}, "Input should be greater than or equal to 1"),
+            ({"task_page_size": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
+            ({"task_page_size": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
+            ({"pages": "1,2"}, "Input should be a valid list"),
+            ({"pages": ["1,2"]}, "Input should be a valid list"),
+            ({"pages": [["string1", "string2"]]}, "Input should be a valid integer, unable to parse string as an integer"),
+            ({"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean, unable to interpret input"),
+            ({"graphrag": {"entity_types": "1,2"}}, "Input should be a valid list"),
+            ({"graphrag": {"entity_types": [1, 2]}}, "nput should be a valid string"),
+            ({"graphrag": {"method": "unknown"}}, "Input should be 'light' or 'general'"),
+            ({"graphrag": {"method": None}}, "Input should be 'light' or 'general'"),
+            ({"graphrag": {"community": "string"}}, "Input should be a valid boolean, unable to interpret input"),
+            ({"graphrag": {"resolution": "string"}}, "Input should be a valid boolean, unable to interpret input"),
+            ({"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean, unable to interpret input"),
+            ({"raptor": {"prompt": ""}}, "String should have at least 1 character"),
+            ({"raptor": {"prompt": " "}}, "String should have at least 1 character"),
+            ({"raptor": {"max_token": 0}}, "Input should be greater than or equal to 1"),
+            ({"raptor": {"max_token": 2049}}, "Input should be less than or equal to 2048"),
+            ({"raptor": {"max_token": 3.14}}, "Input should be a valid integer, got a number with a fractional part"),
+            ({"raptor": {"max_token": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
+            ({"raptor": {"threshold": -0.1}}, "Input should be greater than or equal to 0"),
+            ({"raptor": {"threshold": 1.1}}, "Input should be less than or equal to 1"),
+            ({"raptor": {"threshold": "string"}}, "Input should be a valid number, unable to parse string as a number"),
+            ({"raptor": {"max_cluster": 0}}, "Input should be greater than or equal to 1"),
+            ({"raptor": {"max_cluster": 1025}}, "Input should be less than or equal to 1024"),
+            ({"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer, got a number with a fractional par"),
+            ({"raptor": {"max_cluster": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
+            ({"raptor": {"random_seed": -1}}, "Input should be greater than or equal to 0"),
+            ({"raptor": {"random_seed": 3.14}}, "Input should be a valid integer, got a number with a fractional part"),
+            ({"raptor": {"random_seed": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
+            ({"delimiter": "a" * 65536}, "Parser config exceeds size limit (max 65,535 characters)"),
+        ],
+        ids=[
+            "auto_keywords_min_limit",
+            "auto_keywords_max_limit",
+            "auto_keywords_float_not_allowed",
+            "auto_keywords_type_invalid",
+            "auto_questions_min_limit",
+            "auto_questions_max_limit",
+            "auto_questions_float_not_allowed",
+            "auto_questions_type_invalid",
+            "chunk_token_num_min_limit",
+            "chunk_token_num_max_limit",
+            "chunk_token_num_float_not_allowed",
+            "chunk_token_num_type_invalid",
+            "delimiter_empty",
+            "html4excel_type_invalid",
+            "tag_kb_ids_not_list",
+            "tag_kb_ids_int_in_list",
+            "topn_tags_min_limit",
+            "topn_tags_max_limit",
+            "topn_tags_float_not_allowed",
+            "topn_tags_type_invalid",
+            "filename_embd_weight_min_limit",
+            "filename_embd_weight_max_limit",
+            "filename_embd_weight_type_invalid",
+            "task_page_size_min_limit",
+            "task_page_size_float_not_allowed",
+            "task_page_size_type_invalid",
+            "pages_not_list",
+            "pages_not_list_in_list",
+            "pages_not_int_list",
+            "graphrag_type_invalid",
+            "graphrag_entity_types_not_list",
+            "graphrag_entity_types_not_str_in_list",
+            "graphrag_method_unknown",
+            "graphrag_method_none",
+            "graphrag_community_type_invalid",
+            "graphrag_resolution_type_invalid",
+            "raptor_type_invalid",
+            "raptor_prompt_empty",
+            "raptor_prompt_space",
+            "raptor_max_token_min_limit",
+            "raptor_max_token_max_limit",
+            "raptor_max_token_float_not_allowed",
+            "raptor_max_token_type_invalid",
+            "raptor_threshold_min_limit",
+            "raptor_threshold_max_limit",
+            "raptor_threshold_type_invalid",
+            "raptor_max_cluster_min_limit",
+            "raptor_max_cluster_max_limit",
+            "raptor_max_cluster_float_not_allowed",
+            "raptor_max_cluster_type_invalid",
+            "raptor_random_seed_min_limit",
+            "raptor_random_seed_float_not_allowed",
+            "raptor_random_seed_type_invalid",
+            "parser_config_type_invalid",
+        ],
+    )
+    def test_parser_config_invalid(self, add_dataset_func, parser_config, expected_message):
+        dataset = add_dataset_func
+        with pytest.raises(Exception) as excinfo:
+            dataset.update({"parser_config": parser_config})
+        assert expected_message in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p2
+    def test_parser_config_empty(self, client, add_dataset_func):
+        dataset = add_dataset_func
+        expected_config = DataSet.ParserConfig(
+            client,
+            {
+                "chunk_token_num": 128,
+                "delimiter": r"\n",
+                "html4excel": False,
+                "layout_recognize": "DeepDOC",
+                "raptor": {"use_raptor": False},
+            },
+        )
+        dataset.update({"parser_config": {}})
+        assert str(dataset.parser_config) == str(expected_config), str(dataset)
+
+        retrieved_dataset = client.get_dataset(name=dataset.name)
+        assert str(retrieved_dataset.parser_config) == str(expected_config), str(retrieved_dataset)
+
+    @pytest.mark.p3
+    def test_parser_config_none(self, client, add_dataset_func):
+        dataset = add_dataset_func
+        expected_config = DataSet.ParserConfig(
+            client,
+            {
+                "chunk_token_num": 128,
+                "delimiter": r"\n",
+                "html4excel": False,
+                "layout_recognize": "DeepDOC",
+                "raptor": {"use_raptor": False},
+            },
+        )
+        dataset.update({"parser_config": None})
+        assert str(dataset.parser_config) == str(expected_config), str(dataset)
+
+        retrieved_dataset = client.get_dataset(name=dataset.name)
+        assert str(retrieved_dataset.parser_config) == str(expected_config), str(retrieved_dataset)
+
+    @pytest.mark.p3
+    def test_parser_config_empty_with_chunk_method_change(self, client, add_dataset_func):
+        dataset = add_dataset_func
+        expected_config = DataSet.ParserConfig(
+            client,
+            {
+                "raptor": {"use_raptor": False},
+            },
+        )
+        dataset.update({"chunk_method": "qa", "parser_config": {}})
+        assert str(dataset.parser_config) == str(expected_config), str(dataset)
+
+        retrieved_dataset = client.get_dataset(name=dataset.name)
+        assert str(retrieved_dataset.parser_config) == str(expected_config), str(retrieved_dataset)
+
+    @pytest.mark.p3
+    def test_parser_config_unset_with_chunk_method_change(self, client, add_dataset_func):
+        dataset = add_dataset_func
+        expected_config = DataSet.ParserConfig(
+            client,
+            {
+                "raptor": {"use_raptor": False},
+            },
+        )
+        dataset.update({"chunk_method": "qa"})
+        assert str(dataset.parser_config) == str(expected_config), str(dataset)
+
+        retrieved_dataset = client.get_dataset(name=dataset.name)
+        assert str(retrieved_dataset.parser_config) == str(expected_config), str(retrieved_dataset)
+
+    @pytest.mark.p3
+    def test_parser_config_none_with_chunk_method_change(self, client, add_dataset_func):
+        dataset = add_dataset_func
+        expected_config = DataSet.ParserConfig(
+            client,
+            {
+                "raptor": {"use_raptor": False},
+            },
+        )
+        dataset.update({"chunk_method": "qa", "parser_config": None})
+        assert str(dataset.parser_config) == str(expected_config), str(dataset)
+
+        retrieved_dataset = client.get_dataset(name=dataset.name)
+        assert str(retrieved_dataset.parser_config) == str(expected_config), str(retrieved_dataset)
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "payload",
+        [
+            {"id": "id"},
+            {"tenant_id": "e57c1966f99211efb41e9e45646e0111"},
+            {"created_by": "created_by"},
+            {"create_date": "Tue, 11 Mar 2025 13:37:23 GMT"},
+            {"create_time": 1741671443322},
+            {"update_date": "Tue, 11 Mar 2025 13:37:23 GMT"},
+            {"update_time": 1741671443339},
+            {"document_count": 1},
+            {"chunk_count": 1},
+            {"token_num": 1},
+            {"status": "1"},
+            {"unknown_field": "unknown_field"},
+        ],
+    )
+    def test_field_unsupported(self, add_dataset_func, payload):
+        dataset = add_dataset_func
+        with pytest.raises(Exception) as excinfo:
+            dataset.update(payload)
+        assert "Extra inputs are not permitted" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p2
+    def test_field_unset(self, client, add_dataset_func):
+        dataset = add_dataset_func
+        original_dataset = client.get_dataset(name=dataset.name)
+
+        dataset.update({"name": "default_unset"})
+
+        updated_dataset = client.get_dataset(name="default_unset")
+        assert updated_dataset.avatar == original_dataset.avatar, str(updated_dataset)
+        assert updated_dataset.description == original_dataset.description, str(updated_dataset)
+        assert updated_dataset.embedding_model == original_dataset.embedding_model, str(updated_dataset)
+        assert updated_dataset.permission == original_dataset.permission, str(updated_dataset)
+        assert updated_dataset.chunk_method == original_dataset.chunk_method, str(updated_dataset)
+        assert updated_dataset.pagerank == original_dataset.pagerank, str(updated_dataset)
+        assert str(updated_dataset.parser_config) == str(original_dataset.parser_config), str(updated_dataset)