Test: add sdk Document test cases (#8094)

### What problem does this PR solve? Add sdk document test cases ### Type of change - [x] Add test cases
2026-02-02 16:45:08 +08:00 · 2025-06-06 09:47:06 +08:00
parent 100ea574a7
commit cc1b2c8f09
20 changed files with 1450 additions and 27 deletions
--- a/sdk/python/ragflow_sdk/modules/document.py
+++ b/sdk/python/ragflow_sdk/modules/document.py
@ -41,7 +41,7 @@ class Document(Base):
        self.progress = 0.0
        self.progress_msg = ""
        self.process_begin_at = None
-        self.process_duration = 0.0
+        self.process_duation = 0.0
        self.run = "0"
        self.status = "1"
        for k in list(res_dict.keys()):
--- a/test/testcases/test_http_api/test_dataset_mangement/test_create_dataset.py
+++ b/test/testcases/test_http_api/test_dataset_mangement/test_create_dataset.py
@ -85,6 +85,7 @@ class TestCapability:
            futures = [executor.submit(create_dataset, api_key, {"name": f"dataset_{i}"}) for i in range(count)]
        responses = list(as_completed(futures))
        assert len(responses) == count, responses
+        assert all(futures.result()["code"] == 0 for futures in futures)


@pytest.mark.usefixtures("clear_datasets")
--- a/test/testcases/test_http_api/test_dataset_mangement/test_delete_datasets.py
+++ b/test/testcases/test_http_api/test_dataset_mangement/test_delete_datasets.py
@ -93,6 +93,7 @@ class TestCapability:
            futures = [executor.submit(delete_datasets, api_key, {"ids": ids[i : i + 1]}) for i in range(count)]
        responses = list(as_completed(futures))
        assert len(responses) == count, responses
+        assert all(futures.result()["code"] == 0 for futures in futures)


 class TestDatasetsDelete:
--- a/test/testcases/test_http_api/test_dataset_mangement/test_list_datasets.py
+++ b/test/testcases/test_http_api/test_dataset_mangement/test_list_datasets.py
@ -49,6 +49,7 @@ class TestCapability:
            futures = [executor.submit(list_datasets, api_key) for i in range(count)]
        responses = list(as_completed(futures))
        assert len(responses) == count, responses
+        assert all(futures.result()["code"] == 0 for futures in futures)


@pytest.mark.usefixtures("add_datasets")
--- a/test/testcases/test_http_api/test_dataset_mangement/test_update_dataset.py
+++ b/test/testcases/test_http_api/test_dataset_mangement/test_update_dataset.py
@ -95,6 +95,7 @@ class TestCapability:
            futures = [executor.submit(update_dataset, api_key, dataset_id, {"name": f"dataset_{i}"}) for i in range(count)]
        responses = list(as_completed(futures))
        assert len(responses) == count, responses
+        assert all(futures.result()["code"] == 0 for futures in futures)


 class TestDatasetUpdate:
--- a/test/testcases/test_http_api/test_file_management_within_dataset/conftest.py
+++ b/test/testcases/test_http_api/test_file_management_within_dataset/conftest.py
@ -25,7 +25,7 @@ def add_document_func(request, api_key, add_dataset, ragflow_tmp_dir):
    document_ids = bulk_upload_documents(api_key, dataset_id, 1, ragflow_tmp_dir)

    def cleanup():
-        delete_documents(api_key, dataset_id, {"ids": document_ids})
+        delete_documents(api_key, dataset_id, {"ids": None})

    request.addfinalizer(cleanup)
    return dataset_id, document_ids[0]
@ -37,7 +37,7 @@ def add_documents(request, api_key, add_dataset, ragflow_tmp_dir):
    document_ids = bulk_upload_documents(api_key, dataset_id, 5, ragflow_tmp_dir)

    def cleanup():
-        delete_documents(api_key, dataset_id, {"ids": document_ids})
+        delete_documents(api_key, dataset_id, {"ids": None})

    request.addfinalizer(cleanup)
    return dataset_id, document_ids
@ -49,7 +49,7 @@ def add_documents_func(request, api_key, add_dataset_func, ragflow_tmp_dir):
    document_ids = bulk_upload_documents(api_key, dataset_id, 3, ragflow_tmp_dir)

    def cleanup():
-        delete_documents(api_key, dataset_id, {"ids": document_ids})
+        delete_documents(api_key, dataset_id, {"ids": None})

    request.addfinalizer(cleanup)
    return dataset_id, document_ids
--- a/test/testcases/test_http_api/test_file_management_within_dataset/test_delete_documents.py
+++ b/test/testcases/test_http_api/test_file_management_within_dataset/test_delete_documents.py
@ -13,7 +13,8 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
-from concurrent.futures import ThreadPoolExecutor
+from concurrent.futures import ThreadPoolExecutor, as_completed
+

 import pytest
 from common import INVALID_API_TOKEN, bulk_upload_documents, delete_documents, list_documents
@ -148,9 +149,9 @@ class TestDocumentsDeletion:

@pytest.mark.p3
 def test_concurrent_deletion(api_key, add_dataset, tmp_path):
-    documents_num = 100
+    count = 100
    dataset_id = add_dataset
-    document_ids = bulk_upload_documents(api_key, dataset_id, documents_num, tmp_path)
+    document_ids = bulk_upload_documents(api_key, dataset_id, count, tmp_path)

    with ThreadPoolExecutor(max_workers=5) as executor:
        futures = [
@ -160,10 +161,11 @@ def test_concurrent_deletion(api_key, add_dataset, tmp_path):
                dataset_id,
                {"ids": document_ids[i : i + 1]},
            )
-            for i in range(documents_num)
+            for i in range(count)
        ]
-    responses = [f.result() for f in futures]
-    assert all(r["code"] == 0 for r in responses)
+    responses = list(as_completed(futures))
+    assert len(responses) == count, responses
+    assert all(futures.result()["code"] == 0 for futures in futures)


@pytest.mark.p3
--- a/test/testcases/test_http_api/test_file_management_within_dataset/test_list_documents.py
+++ b/test/testcases/test_http_api/test_file_management_within_dataset/test_list_documents.py
@ -13,7 +13,7 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
-from concurrent.futures import ThreadPoolExecutor
+from concurrent.futures import ThreadPoolExecutor, as_completed

 import pytest
 from common import INVALID_API_TOKEN, list_documents
@ -342,11 +342,13 @@ class TestDocumentsList:
    @pytest.mark.p3
    def test_concurrent_list(self, api_key, add_documents):
        dataset_id, _ = add_documents
+        count = 100

        with ThreadPoolExecutor(max_workers=5) as executor:
-            futures = [executor.submit(list_documents, api_key, dataset_id) for i in range(100)]
-        responses = [f.result() for f in futures]
-        assert all(r["code"] == 0 for r in responses)
+            futures = [executor.submit(list_documents, api_key, dataset_id) for i in range(count)]
+        responses = list(as_completed(futures))
+        assert len(responses) == count, responses
+        assert all(futures.result()["code"] == 0 for futures in futures)

    @pytest.mark.p3
    def test_invalid_params(self, api_key, add_documents):
--- a/test/testcases/test_http_api/test_file_management_within_dataset/test_parse_documents.py
+++ b/test/testcases/test_http_api/test_file_management_within_dataset/test_parse_documents.py
@ -13,7 +13,7 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
-from concurrent.futures import ThreadPoolExecutor
+from concurrent.futures import ThreadPoolExecutor, as_completed

 import pytest
 from common import INVALID_API_TOKEN, bulk_upload_documents, list_documents, parse_documents
@ -195,9 +195,9 @@ def test_concurrent_parse(api_key, add_dataset_func, tmp_path):
                return False
        return True

-    document_num = 100
+    count = 100
    dataset_id = add_dataset_func
-    document_ids = bulk_upload_documents(api_key, dataset_id, document_num, tmp_path)
+    document_ids = bulk_upload_documents(api_key, dataset_id, count, tmp_path)

    with ThreadPoolExecutor(max_workers=5) as executor:
        futures = [
@ -207,11 +207,12 @@ def test_concurrent_parse(api_key, add_dataset_func, tmp_path):
                dataset_id,
                {"document_ids": document_ids[i : i + 1]},
            )
-            for i in range(document_num)
+            for i in range(count)
        ]
-    responses = [f.result() for f in futures]
-    assert all(r["code"] == 0 for r in responses)
+    responses = list(as_completed(futures))
+    assert len(responses) == count, responses
+    assert all(futures.result()["code"] == 0 for futures in futures)

-    condition(api_key, dataset_id, document_num)
+    condition(api_key, dataset_id, count)

    validate_document_details(api_key, dataset_id, document_ids)
--- a/test/testcases/test_http_api/test_file_management_within_dataset/test_upload_documents.py
+++ b/test/testcases/test_http_api/test_file_management_within_dataset/test_upload_documents.py
@ -213,6 +213,7 @@ class TestDocumentsUpload:
            futures = [executor.submit(upload_documents, api_key, dataset_id, fps[i : i + 1]) for i in range(count)]
        responses = list(as_completed(futures))
        assert len(responses) == count, responses
+        assert all(futures.result()["code"] == 0 for futures in futures)

        res = list_datasets(api_key, {"id": dataset_id})
        assert res["data"][0]["document_count"] == count
--- a/test/testcases/test_sdk_api/common.py
+++ b/test/testcases/test_sdk_api/common.py
@ -14,10 +14,28 @@
 #  limitations under the License.
 #

+from pathlib import Path
+
+from ragflow_sdk import DataSet, Document, RAGFlow
+from utils.file_utils import create_txt_file
+
+
 # DATASET MANAGEMENT
-def batch_create_datasets(client, num):
+def batch_create_datasets(client: RAGFlow, num: int) -> list[DataSet]:
    datasets = []
    for i in range(num):
        dataset = client.create_dataset(name=f"dataset_{i}")
        datasets.append(dataset)
    return datasets
+
+
+# FILE MANAGEMENT WITHIN DATASET
+def bulk_upload_documents(dataset: DataSet, num: int, tmp_path: Path) -> list[Document]:
+    document_infos = []
+    for i in range(num):
+        fp = create_txt_file(tmp_path / f"ragflow_test_upload_{i}.txt")
+        with fp.open("rb") as f:
+            blob = f.read()
+        document_infos.append({"display_name": fp.name, "blob": blob})
+
+    return dataset.upload_documents(document_infos)
--- a/test/testcases/test_sdk_api/conftest.py
+++ b/test/testcases/test_sdk_api/conftest.py
@ -14,32 +14,109 @@
 #  limitations under the License.
 #

+from pathlib import Path
+
 import pytest
 from common import (
    batch_create_datasets,
+    bulk_upload_documents,
 )
 from configs import HOST_ADDRESS, VERSION
-from ragflow_sdk import RAGFlow
+from pytest import FixtureRequest
+from ragflow_sdk import DataSet, RAGFlow
+from utils import wait_for
+from utils.file_utils import (
+    create_docx_file,
+    create_eml_file,
+    create_excel_file,
+    create_html_file,
+    create_image_file,
+    create_json_file,
+    create_md_file,
+    create_pdf_file,
+    create_ppt_file,
+    create_txt_file,
+)
+
+
+@wait_for(30, 1, "Document parsing timeout")
+def condition(_dataset: DataSet):
+    documents = DataSet.list_documents(page_size=1000)
+    for document in documents:
+        if document.run != "DONE":
+            return False
+    return True
+
+
+@pytest.fixture
+def generate_test_files(request, tmp_path):
+    file_creators = {
+        "docx": (tmp_path / "ragflow_test.docx", create_docx_file),
+        "excel": (tmp_path / "ragflow_test.xlsx", create_excel_file),
+        "ppt": (tmp_path / "ragflow_test.pptx", create_ppt_file),
+        "image": (tmp_path / "ragflow_test.png", create_image_file),
+        "pdf": (tmp_path / "ragflow_test.pdf", create_pdf_file),
+        "txt": (tmp_path / "ragflow_test.txt", create_txt_file),
+        "md": (tmp_path / "ragflow_test.md", create_md_file),
+        "json": (tmp_path / "ragflow_test.json", create_json_file),
+        "eml": (tmp_path / "ragflow_test.eml", create_eml_file),
+        "html": (tmp_path / "ragflow_test.html", create_html_file),
+    }
+
+    files = {}
+    for file_type, (file_path, creator_func) in file_creators.items():
+        if request.param in ["", file_type]:
+            creator_func(file_path)
+            files[file_type] = file_path
+    return files
+
+
+@pytest.fixture(scope="class")
+def ragflow_tmp_dir(request, tmp_path_factory) -> Path:
+    class_name = request.cls.__name__
+    return tmp_path_factory.mktemp(class_name)


@pytest.fixture(scope="session")
-def client(token):
+def client(token) -> RAGFlow:
    return RAGFlow(api_key=token, base_url=HOST_ADDRESS, version=VERSION)


@pytest.fixture(scope="function")
-def clear_datasets(request, client):
+def clear_datasets(request: FixtureRequest, client: RAGFlow):
    def cleanup():
        client.delete_datasets(ids=None)

    request.addfinalizer(cleanup)


+@pytest.fixture(scope="class")
+def add_dataset(request: FixtureRequest, client: RAGFlow):
+    def cleanup():
+        client.delete_datasets(ids=None)
+
+    request.addfinalizer(cleanup)
+
+    dataset_ids = batch_create_datasets(client, 1)
+    return dataset_ids[0]
+
+
@pytest.fixture(scope="function")
-def add_dataset_func(request, client):
+def add_dataset_func(request: FixtureRequest, client: RAGFlow) -> DataSet:
    def cleanup():
        client.delete_datasets(ids=None)

    request.addfinalizer(cleanup)
-
    return batch_create_datasets(client, 1)[0]
+
+
+@pytest.fixture(scope="class")
+def add_document(request: FixtureRequest, add_dataset: DataSet, ragflow_tmp_dir):
+    dataset = add_dataset
+    documents = bulk_upload_documents(dataset, 1, ragflow_tmp_dir)
+
+    def cleanup():
+        dataset.delete_documents(ids=None)
+
+    request.addfinalizer(cleanup)
+    return dataset, documents[0]
--- a/test/testcases/test_sdk_api/test_file_management_within_dataset/conftest.py
+++ b/test/testcases/test_sdk_api/test_file_management_within_dataset/conftest.py
@ -0,0 +1,57 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+
+import pytest
+from common import bulk_upload_documents
+from pytest import FixtureRequest
+from ragflow_sdk import DataSet, Document
+
+
+@pytest.fixture(scope="function")
+def add_document_func(request: FixtureRequest, add_dataset: DataSet, ragflow_tmp_dir) -> tuple[DataSet, Document]:
+    dataset = add_dataset
+    documents = bulk_upload_documents(dataset, 1, ragflow_tmp_dir)
+
+    def cleanup():
+        dataset.delete_documents(ids=None)
+
+    request.addfinalizer(cleanup)
+    return dataset, documents[0]
+
+
+@pytest.fixture(scope="class")
+def add_documents(request: FixtureRequest, add_dataset: DataSet, ragflow_tmp_dir) -> tuple[DataSet, list[Document]]:
+    dataset = add_dataset
+    documents = bulk_upload_documents(dataset, 5, ragflow_tmp_dir)
+
+    def cleanup():
+        dataset.delete_documents(ids=None)
+
+    request.addfinalizer(cleanup)
+    return dataset, documents
+
+
+@pytest.fixture(scope="function")
+def add_documents_func(request: FixtureRequest, add_dataset_func: DataSet, ragflow_tmp_dir) -> tuple[DataSet, list[Document]]:
+    dataset = add_dataset_func
+    documents = bulk_upload_documents(dataset, 3, ragflow_tmp_dir)
+
+    def cleanup():
+        dataset.delete_documents(ids=None)
+
+    request.addfinalizer(cleanup)
+    return dataset, documents
--- a/test/testcases/test_sdk_api/test_file_management_within_dataset/test_delete_documents.py
+++ b/test/testcases/test_sdk_api/test_file_management_within_dataset/test_delete_documents.py
@ -0,0 +1,118 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+import pytest
+from common import bulk_upload_documents
+
+
+class TestDocumentsDeletion:
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "payload, expected_message, remaining",
+        [
+            ({"ids": None}, "", 0),
+            ({"ids": []}, "", 0),
+            ({"ids": ["invalid_id"]}, "Documents not found: ['invalid_id']", 3),
+            ({"ids": ["\n!?。；！？\"'"]}, "Documents not found: ['\\n!?。；！？\"\\'']", 3),
+            ("not json", "must be a mapping", 3),
+            (lambda r: {"ids": r[:1]}, "", 2),
+            (lambda r: {"ids": r}, "", 0),
+        ],
+    )
+    def test_basic_scenarios(
+        self,
+        add_documents_func,
+        payload,
+        expected_message,
+        remaining,
+    ):
+        dataset, documents = add_documents_func
+        if callable(payload):
+            payload = payload([document.id for document in documents])
+
+        if expected_message:
+            with pytest.raises(Exception) as excinfo:
+                dataset.delete_documents(**payload)
+            assert expected_message in str(excinfo.value), str(excinfo.value)
+        else:
+            dataset.delete_documents(**payload)
+
+        documents = dataset.list_documents()
+        assert len(documents) == remaining, str(documents)
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "payload",
+        [
+            lambda r: {"ids": ["invalid_id"] + r},
+            lambda r: {"ids": r[:1] + ["invalid_id"] + r[1:3]},
+            lambda r: {"ids": r + ["invalid_id"]},
+        ],
+    )
+    def test_delete_partial_invalid_id(self, add_documents_func, payload):
+        dataset, documents = add_documents_func
+        payload = payload([document.id for document in documents])
+
+        with pytest.raises(Exception) as excinfo:
+            dataset.delete_documents(**payload)
+        assert "Documents not found: ['invalid_id']" in str(excinfo.value), str(excinfo.value)
+
+        documents = dataset.list_documents()
+        assert len(documents) == 0, str(documents)
+
+    @pytest.mark.p2
+    def test_repeated_deletion(self, add_documents_func):
+        dataset, documents = add_documents_func
+        document_ids = [document.id for document in documents]
+        dataset.delete_documents(ids=document_ids)
+        with pytest.raises(Exception) as excinfo:
+            dataset.delete_documents(ids=document_ids)
+        assert "Documents not found" in str(excinfo.value), str(excinfo.value)
+
+    @pytest.mark.p2
+    def test_duplicate_deletion(self, add_documents_func):
+        dataset, documents = add_documents_func
+        document_ids = [document.id for document in documents]
+        dataset.delete_documents(ids=document_ids + document_ids)
+        assert len(dataset.list_documents()) == 0, str(dataset.list_documents())
+
+
+@pytest.mark.p3
+def test_concurrent_deletion(add_dataset, tmp_path):
+    count = 100
+    dataset = add_dataset
+    documents = bulk_upload_documents(dataset, count, tmp_path)
+
+    def delete_doc(doc_id):
+        dataset.delete_documents(ids=[doc_id])
+
+    with ThreadPoolExecutor(max_workers=5) as executor:
+        futures = [executor.submit(delete_doc, doc.id) for doc in documents]
+
+    responses = list(as_completed(futures))
+    assert len(responses) == count, responses
+
+
+@pytest.mark.p3
+def test_delete_1k(add_dataset, tmp_path):
+    count = 1_000
+    dataset = add_dataset
+    documents = bulk_upload_documents(dataset, count, tmp_path)
+    assert len(dataset.list_documents(page_size=count * 2)) == count
+
+    dataset.delete_documents(ids=[doc.id for doc in documents])
+    assert len(dataset.list_documents()) == 0
--- a/test/testcases/test_sdk_api/test_file_management_within_dataset/test_download_document.py
+++ b/test/testcases/test_sdk_api/test_file_management_within_dataset/test_download_document.py
@ -0,0 +1,89 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+import pytest
+from common import bulk_upload_documents
+from utils import compare_by_hash
+
+
+@pytest.mark.p1
+@pytest.mark.parametrize(
+    "generate_test_files",
+    [
+        "docx",
+        "excel",
+        "ppt",
+        "image",
+        "pdf",
+        "txt",
+        "md",
+        "json",
+        "eml",
+        "html",
+    ],
+    indirect=True,
+)
+def test_file_type_validation(add_dataset, generate_test_files, request):
+    dataset = add_dataset
+    fp = generate_test_files[request.node.callspec.params["generate_test_files"]]
+    with fp.open("rb") as f:
+        blob = f.read()
+
+    documents = dataset.upload_documents([{"display_name": fp.name, "blob": blob}])
+
+    for document in documents:
+        with fp.with_stem("ragflow_test_download").open("wb") as f:
+            f.write(document.download())
+
+        assert compare_by_hash(fp, fp.with_stem("ragflow_test_download"))
+
+
+class TestDocumentDownload:
+    @pytest.mark.p3
+    def test_same_file_repeat(self, add_documents, tmp_path, ragflow_tmp_dir):
+        num = 5
+        _, documents = add_documents
+
+        for i in range(num):
+            download_path = tmp_path / f"ragflow_test_download_{i}.txt"
+            with download_path.open("wb") as f:
+                f.write(documents[0].download())
+            assert compare_by_hash(ragflow_tmp_dir / "ragflow_test_upload_0.txt", download_path), f"Downloaded file {i} does not match original"
+
+
+@pytest.mark.p3
+def test_concurrent_download(add_dataset, tmp_path):
+    count = 20
+    dataset = add_dataset
+    documents = bulk_upload_documents(dataset, count, tmp_path)
+
+    def download_doc(document, i):
+        download_path = tmp_path / f"ragflow_test_download_{i}.txt"
+        with download_path.open("wb") as f:
+            f.write(document.download())
+        # assert compare_by_hash(tmp_path / f"ragflow_test_upload_{i}.txt", download_path), str(download_path)
+
+    with ThreadPoolExecutor(max_workers=5) as executor:
+        futures = [executor.submit(download_doc, documents[i], i) for i in range(count)]
+    responses = list(as_completed(futures))
+    assert len(responses) == count, responses
+
+    for i in range(count):
+        assert compare_by_hash(
+            tmp_path / f"ragflow_test_upload_{i}.txt",
+            tmp_path / f"ragflow_test_download_{i}.txt",
+        )
--- a/test/testcases/test_sdk_api/test_file_management_within_dataset/test_list_documents.py
+++ b/test/testcases/test_sdk_api/test_file_management_within_dataset/test_list_documents.py
@ -0,0 +1,247 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+import pytest
+
+
+class TestDocumentsList:
+    @pytest.mark.p1
+    def test_default(self, add_documents):
+        dataset, _ = add_documents
+        documents = dataset.list_documents()
+        assert len(documents) == 5, str(documents)
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "params, expected_page_size, expected_message",
+        [
+            ({"page": None, "page_size": 2}, 2, "not instance of"),
+            ({"page": 0, "page_size": 2}, 2, ""),
+            ({"page": 2, "page_size": 2}, 2, ""),
+            ({"page": 3, "page_size": 2}, 1, ""),
+            ({"page": "3", "page_size": 2}, 1, "not instance of"),
+            pytest.param(
+                {"page": -1, "page_size": 2},
+                0,
+                "Invalid page number",
+                marks=pytest.mark.skip(reason="issues/5851"),
+            ),
+            pytest.param(
+                {"page": "a", "page_size": 2},
+                0,
+                "Invalid page value",
+                marks=pytest.mark.skip(reason="issues/5851"),
+            ),
+        ],
+    )
+    def test_page(self, add_documents, params, expected_page_size, expected_message):
+        dataset, _ = add_documents
+        if expected_message:
+            with pytest.raises(Exception) as excinfo:
+                dataset.list_documents(**params)
+            assert expected_message in str(excinfo.value), str(excinfo.value)
+        else:
+            documents = dataset.list_documents(**params)
+            assert len(documents) == expected_page_size, str(documents)
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "params, expected_page_size, expected_message",
+        [
+            ({"page_size": None}, 5, "not instance of"),
+            ({"page_size": 0}, 0, ""),
+            ({"page_size": 1}, 1, ""),
+            ({"page_size": 6}, 5, ""),
+            ({"page_size": "1"}, 1, "not instance of"),
+            pytest.param(
+                {"page_size": -1},
+                0,
+                "Invalid page size",
+                marks=pytest.mark.skip(reason="issues/5851"),
+            ),
+            pytest.param(
+                {"page_size": "a"},
+                0,
+                "Invalid page size value",
+                marks=pytest.mark.skip(reason="issues/5851"),
+            ),
+        ],
+    )
+    def test_page_size(self, add_documents, params, expected_page_size, expected_message):
+        dataset, _ = add_documents
+        if expected_message:
+            with pytest.raises(Exception) as excinfo:
+                dataset.list_documents(**params)
+            assert expected_message in str(excinfo.value), str(excinfo.value)
+        else:
+            documents = dataset.list_documents(**params)
+            assert len(documents) == expected_page_size, str(documents)
+
+    @pytest.mark.p3
+    @pytest.mark.parametrize(
+        "params, expected_message",
+        [
+            ({"orderby": None}, "not instance of"),
+            ({"orderby": "create_time"}, ""),
+            ({"orderby": "update_time"}, ""),
+            pytest.param({"orderby": "name", "desc": "False"}, "", marks=pytest.mark.skip(reason="issues/5851")),
+            pytest.param({"orderby": "unknown"}, "orderby should be create_time or update_time", marks=pytest.mark.skip(reason="issues/5851")),
+        ],
+    )
+    def test_orderby(self, add_documents, params, expected_message):
+        dataset, _ = add_documents
+        if expected_message:
+            with pytest.raises(Exception) as excinfo:
+                dataset.list_documents(**params)
+            assert expected_message in str(excinfo.value), str(excinfo.value)
+        else:
+            dataset.list_documents(**params)
+
+    @pytest.mark.p3
+    @pytest.mark.parametrize(
+        "params, expected_message",
+        [
+            ({"desc": None}, "not instance of"),
+            ({"desc": "true"}, "not instance of"),
+            ({"desc": "True"}, "not instance of"),
+            ({"desc": True}, ""),
+            pytest.param({"desc": "false"}, "", marks=pytest.mark.skip(reason="issues/5851")),
+            ({"desc": "False"}, "not instance of"),
+            ({"desc": False}, ""),
+            ({"desc": "False", "orderby": "update_time"}, "not instance of"),
+            pytest.param({"desc": "unknown"}, "desc should be true or false", marks=pytest.mark.skip(reason="issues/5851")),
+        ],
+    )
+    def test_desc(self, add_documents, params, expected_message):
+        dataset, _ = add_documents
+        if expected_message:
+            with pytest.raises(Exception) as excinfo:
+                dataset.list_documents(**params)
+            assert expected_message in str(excinfo.value), str(excinfo.value)
+        else:
+            dataset.list_documents(**params)
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "params, expected_num",
+        [
+            ({"keywords": None}, 5),
+            ({"keywords": ""}, 5),
+            ({"keywords": "0"}, 1),
+            ({"keywords": "ragflow_test_upload"}, 5),
+            ({"keywords": "unknown"}, 0),
+        ],
+    )
+    def test_keywords(self, add_documents, params, expected_num):
+        dataset, _ = add_documents
+        documents = dataset.list_documents(**params)
+        assert len(documents) == expected_num, str(documents)
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "params, expected_num, expected_message",
+        [
+            ({"name": None}, 5, ""),
+            ({"name": ""}, 5, ""),
+            ({"name": "ragflow_test_upload_0.txt"}, 1, ""),
+            ({"name": "unknown.txt"}, 0, "You don't own the document unknown.txt"),
+        ],
+    )
+    def test_name(self, add_documents, params, expected_num, expected_message):
+        dataset, _ = add_documents
+        if expected_message:
+            with pytest.raises(Exception) as excinfo:
+                dataset.list_documents(**params)
+            assert expected_message in str(excinfo.value), str(excinfo.value)
+        else:
+            documents = dataset.list_documents(**params)
+            assert len(documents) == expected_num, str(documents)
+            if params["name"] not in [None, ""]:
+                assert documents[0].name == params["name"], str(documents)
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "document_id, expected_num, expected_message",
+        [
+            (None, 5, ""),
+            ("", 5, ""),
+            (lambda docs: docs[0].id, 1, ""),
+            ("unknown.txt", 0, "You don't own the document unknown.txt"),
+        ],
+    )
+    def test_id(self, add_documents, document_id, expected_num, expected_message):
+        dataset, documents = add_documents
+        if callable(document_id):
+            params = {"id": document_id(documents)}
+        else:
+            params = {"id": document_id}
+
+        if expected_message:
+            with pytest.raises(Exception) as excinfo:
+                dataset.list_documents(**params)
+            assert expected_message in str(excinfo.value), str(excinfo.value)
+        else:
+            documents = dataset.list_documents(**params)
+            assert len(documents) == expected_num, str(documents)
+            if params["id"] not in [None, ""]:
+                assert documents[0].id == params["id"], str(documents)
+
+    @pytest.mark.p3
+    @pytest.mark.parametrize(
+        "document_id, name, expected_num, expected_message",
+        [
+            (lambda docs: docs[0].id, "ragflow_test_upload_0.txt", 1, ""),
+            (lambda docs: docs[0].id, "ragflow_test_upload_1.txt", 0, ""),
+            (lambda docs: docs[0].id, "unknown", 0, "You don't own the document unknown"),
+            ("invalid_id", "ragflow_test_upload_0.txt", 0, "You don't own the document invalid_id"),
+        ],
+    )
+    def test_name_and_id(self, add_documents, document_id, name, expected_num, expected_message):
+        dataset, documents = add_documents
+        params = {"id": document_id(documents) if callable(document_id) else document_id, "name": name}
+
+        if expected_message:
+            with pytest.raises(Exception) as excinfo:
+                dataset.list_documents(**params)
+            assert expected_message in str(excinfo.value), str(excinfo.value)
+        else:
+            documents = dataset.list_documents(**params)
+            assert len(documents) == expected_num, str(documents)
+
+    @pytest.mark.p3
+    def test_concurrent_list(self, add_documents):
+        dataset, _ = add_documents
+        count = 100
+
+        def list_docs():
+            return dataset.list_documents()
+
+        with ThreadPoolExecutor(max_workers=5) as executor:
+            futures = [executor.submit(list_docs) for _ in range(count)]
+        responses = list(as_completed(futures))
+        assert len(responses) == count, responses
+        for future in futures:
+            docs = future.result()
+            assert len(docs) == 5, str(docs)
+
+    @pytest.mark.p3
+    def test_invalid_params(self, add_documents):
+        dataset, _ = add_documents
+        params = {"a": "b"}
+        with pytest.raises(TypeError) as excinfo:
+            dataset.list_documents(**params)
+        assert "got an unexpected keyword argument" in str(excinfo.value), str(excinfo.value)
--- a/test/testcases/test_sdk_api/test_file_management_within_dataset/test_parse_documents.py
+++ b/test/testcases/test_sdk_api/test_file_management_within_dataset/test_parse_documents.py
@ -0,0 +1,145 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+import pytest
+from common import bulk_upload_documents
+from ragflow_sdk import DataSet
+from utils import wait_for
+
+
+@wait_for(30, 1, "Document parsing timeout")
+def condition(_dataset: DataSet, _document_ids=None):
+    documents = _dataset.list_documents(page_size=1000)
+
+    if _document_ids is None:
+        for document in documents:
+            if document.run != "DONE":
+                return False
+        return True
+
+    target_ids = set(_document_ids)
+    for document in documents:
+        if document.id in target_ids:
+            if document.run != "DONE":
+                return False
+    return True
+
+
+def validate_document_details(dataset, document_ids):
+    documents = dataset.list_documents(page_size=1000)
+    for document in documents:
+        if document.id in document_ids:
+            assert document.run == "DONE"
+            assert len(document.process_begin_at) > 0
+            assert document.process_duation > 0
+            assert document.progress > 0
+            assert "Task done" in document.progress_msg
+
+
+class TestDocumentsParse:
+    @pytest.mark.parametrize(
+        "payload, expected_message",
+        [
+            pytest.param(None, "AttributeError", marks=pytest.mark.skip),
+            pytest.param({"document_ids": []}, "`document_ids` is required", marks=pytest.mark.p1),
+            pytest.param({"document_ids": ["invalid_id"]}, "Documents not found: ['invalid_id']", marks=pytest.mark.p3),
+            pytest.param({"document_ids": ["\n!?。；！？\"'"]}, "Documents not found: ['\\n!?。；！？\"\\'']", marks=pytest.mark.p3),
+            pytest.param("not json", "AttributeError", marks=pytest.mark.skip),
+            pytest.param(lambda r: {"document_ids": r[:1]}, "", marks=pytest.mark.p1),
+            pytest.param(lambda r: {"document_ids": r}, "", marks=pytest.mark.p1),
+        ],
+    )
+    def test_basic_scenarios(self, add_documents_func, payload, expected_message):
+        dataset, documents = add_documents_func
+        if callable(payload):
+            payload = payload([doc.id for doc in documents])
+
+        if expected_message:
+            with pytest.raises(Exception) as excinfo:
+                dataset.async_parse_documents(**payload)
+            assert expected_message in str(excinfo.value), str(excinfo.value)
+        else:
+            dataset.async_parse_documents(**payload)
+            condition(dataset, payload["document_ids"])
+            validate_document_details(dataset, payload["document_ids"])
+
+    @pytest.mark.parametrize(
+        "payload",
+        [
+            pytest.param(lambda r: {"document_ids": ["invalid_id"] + r}, marks=pytest.mark.p3),
+            pytest.param(lambda r: {"document_ids": r[:1] + ["invalid_id"] + r[1:3]}, marks=pytest.mark.p1),
+            pytest.param(lambda r: {"document_ids": r + ["invalid_id"]}, marks=pytest.mark.p3),
+        ],
+    )
+    def test_parse_partial_invalid_document_id(self, add_documents_func, payload):
+        dataset, documents = add_documents_func
+        document_ids = [doc.id for doc in documents]
+        payload = payload(document_ids)
+
+        with pytest.raises(Exception) as excinfo:
+            dataset.async_parse_documents(**payload)
+        assert "Documents not found: ['invalid_id']" in str(excinfo.value), str(excinfo.value)
+
+        condition(dataset, document_ids)
+        validate_document_details(dataset, document_ids)
+
+    @pytest.mark.p3
+    def test_repeated_parse(self, add_documents_func):
+        dataset, documents = add_documents_func
+        document_ids = [doc.id for doc in documents]
+        dataset.async_parse_documents(document_ids=document_ids)
+        condition(dataset, document_ids)
+        dataset.async_parse_documents(document_ids=document_ids)
+
+    @pytest.mark.p3
+    def test_duplicate_parse(self, add_documents_func):
+        dataset, documents = add_documents_func
+        document_ids = [doc.id for doc in documents]
+        dataset.async_parse_documents(document_ids=document_ids + document_ids)
+        condition(dataset, document_ids)
+        validate_document_details(dataset, document_ids)
+
+
+@pytest.mark.p3
+def test_parse_100_files(add_dataset_func, tmp_path):
+    dataset = add_dataset_func
+    documents = bulk_upload_documents(dataset, 100, tmp_path)
+    document_ids = [doc.id for doc in documents]
+
+    dataset.async_parse_documents(document_ids=document_ids)
+    condition(dataset, document_ids)
+    validate_document_details(dataset, document_ids)
+
+
+@pytest.mark.p3
+def test_concurrent_parse(add_dataset_func, tmp_path):
+    count = 100
+    dataset = add_dataset_func
+    documents = bulk_upload_documents(dataset, count, tmp_path)
+    document_ids = [doc.id for doc in documents]
+
+    def parse_doc(doc_id):
+        dataset.async_parse_documents(document_ids=[doc_id])
+
+    with ThreadPoolExecutor(max_workers=5) as executor:
+        futures = [executor.submit(parse_doc, doc.id) for doc in documents]
+
+    responses = list(as_completed(futures))
+    assert len(responses) == count, responses
+
+    condition(dataset, document_ids)
+    validate_document_details(dataset, document_ids)
--- a/test/testcases/test_sdk_api/test_file_management_within_dataset/test_stop_parse_documents.py
+++ b/test/testcases/test_sdk_api/test_file_management_within_dataset/test_stop_parse_documents.py
@ -0,0 +1,41 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import pytest
+
+
+def validate_document_parse_done(dataset, document_ids):
+    documents = dataset.list_documents(page_size=1000)
+    for document in documents:
+        if document.id in document_ids:
+            assert document.run == "DONE"
+            assert len(document.process_begin_at) > 0
+            assert document.process_duation > 0
+            assert document.progress > 0
+            assert "Task done" in document.progress_msg
+
+
+def validate_document_parse_cancel(dataset, document_ids):
+    documents = dataset.list_documents(page_size=1000)
+    for document in documents:
+        assert document.run == "CANCEL"
+        assert len(document.process_begin_at) > 0
+        assert document.progress == 0.0
+
+
+@pytest.mark.skip
+class TestDocumentsParseStop:
+    pass
--- a/test/testcases/test_sdk_api/test_file_management_within_dataset/test_update_document.py
+++ b/test/testcases/test_sdk_api/test_file_management_within_dataset/test_update_document.py
@ -0,0 +1,411 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import pytest
+from configs import DOCUMENT_NAME_LIMIT
+from ragflow_sdk import DataSet
+
+
+class TestDocumentsUpdated:
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "name, expected_message",
+        [
+            ("new_name.txt", ""),
+            (f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt", "The name should be less than 128 bytes"),
+            (0, "AttributeError"),
+            (None, "AttributeError"),
+            ("", "The extension of file can't be changed"),
+            ("ragflow_test_upload_0", "The extension of file can't be changed"),
+            ("ragflow_test_upload_1.txt", "Duplicated document name in the same dataset"),
+            ("RAGFLOW_TEST_UPLOAD_1.TXT", ""),
+        ],
+    )
+    def test_name(self, add_documents, name, expected_message):
+        dataset, documents = add_documents
+        document = documents[0]
+
+        if expected_message:
+            with pytest.raises(Exception) as excinfo:
+                document.update({"name": name})
+            assert expected_message in str(excinfo.value), str(excinfo.value)
+        else:
+            document.update({"name": name})
+            updated_doc = dataset.list_documents(id=document.id)[0]
+            assert updated_doc.name == name, str(updated_doc)
+
+    @pytest.mark.p3
+    @pytest.mark.parametrize(
+        "meta_fields, expected_message",
+        [
+            ({"test": "test"}, ""),
+            ("test", "meta_fields must be a dictionary"),
+        ],
+    )
+    def test_meta_fields(self, add_documents, meta_fields, expected_message):
+        _, documents = add_documents
+        document = documents[0]
+
+        if expected_message:
+            with pytest.raises(Exception) as excinfo:
+                document.update({"meta_fields": meta_fields})
+            assert expected_message in str(excinfo.value), str(excinfo.value)
+        else:
+            document.update({"meta_fields": meta_fields})
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "chunk_method, expected_message",
+        [
+            ("naive", ""),
+            ("manual", ""),
+            ("qa", ""),
+            ("table", ""),
+            ("paper", ""),
+            ("book", ""),
+            ("laws", ""),
+            ("presentation", ""),
+            ("picture", ""),
+            ("one", ""),
+            ("knowledge_graph", ""),
+            ("email", ""),
+            ("tag", ""),
+            ("", "`chunk_method`  doesn't exist"),
+            ("other_chunk_method", "`chunk_method` other_chunk_method doesn't exist"),
+        ],
+    )
+    def test_chunk_method(self, add_documents, chunk_method, expected_message):
+        dataset, documents = add_documents
+        document = documents[0]
+
+        if expected_message:
+            with pytest.raises(Exception) as excinfo:
+                document.update({"chunk_method": chunk_method})
+            assert expected_message in str(excinfo.value), str(excinfo.value)
+        else:
+            document.update({"chunk_method": chunk_method})
+            updated_doc = dataset.list_documents(id=document.id)[0]
+            assert updated_doc.chunk_method == chunk_method, str(updated_doc)
+
+    @pytest.mark.p3
+    @pytest.mark.parametrize(
+        "payload, expected_message",
+        [
+            ({"chunk_count": 1}, "Can't change `chunk_count`"),
+            pytest.param(
+                {"create_date": "Fri, 14 Mar 2025 16:53:42 GMT"},
+                "The input parameters are invalid",
+                marks=pytest.mark.skip(reason="issues/6104"),
+            ),
+            pytest.param(
+                {"create_time": 1},
+                "The input parameters are invalid",
+                marks=pytest.mark.skip(reason="issues/6104"),
+            ),
+            pytest.param(
+                {"created_by": "ragflow_test"},
+                "The input parameters are invalid",
+                marks=pytest.mark.skip(reason="issues/6104"),
+            ),
+            pytest.param(
+                {"dataset_id": "ragflow_test"},
+                "The input parameters are invalid",
+                marks=pytest.mark.skip(reason="issues/6104"),
+            ),
+            pytest.param(
+                {"id": "ragflow_test"},
+                "The input parameters are invalid",
+                marks=pytest.mark.skip(reason="issues/6104"),
+            ),
+            pytest.param(
+                {"location": "ragflow_test.txt"},
+                "The input parameters are invalid",
+                marks=pytest.mark.skip(reason="issues/6104"),
+            ),
+            pytest.param(
+                {"process_begin_at": 1},
+                "The input parameters are invalid",
+                marks=pytest.mark.skip(reason="issues/6104"),
+            ),
+            pytest.param(
+                {"process_duation": 1.0},
+                "The input parameters are invalid",
+                marks=pytest.mark.skip(reason="issues/6104"),
+            ),
+            ({"progress": 1.0}, "Can't change `progress`"),
+            pytest.param(
+                {"progress_msg": "ragflow_test"},
+                "The input parameters are invalid",
+                marks=pytest.mark.skip(reason="issues/6104"),
+            ),
+            pytest.param(
+                {"run": "ragflow_test"},
+                "The input parameters are invalid",
+                marks=pytest.mark.skip(reason="issues/6104"),
+            ),
+            pytest.param(
+                {"size": 1},
+                "The input parameters are invalid",
+                marks=pytest.mark.skip(reason="issues/6104"),
+            ),
+            pytest.param(
+                {"source_type": "ragflow_test"},
+                "The input parameters are invalid",
+                marks=pytest.mark.skip(reason="issues/6104"),
+            ),
+            pytest.param(
+                {"thumbnail": "ragflow_test"},
+                "The input parameters are invalid",
+                marks=pytest.mark.skip(reason="issues/6104"),
+            ),
+            ({"token_count": 1}, "Can't change `token_count`"),
+            pytest.param(
+                {"type": "ragflow_test"},
+                "The input parameters are invalid",
+                marks=pytest.mark.skip(reason="issues/6104"),
+            ),
+            pytest.param(
+                {"update_date": "Fri, 14 Mar 2025 16:33:17 GMT"},
+                "The input parameters are invalid",
+                marks=pytest.mark.skip(reason="issues/6104"),
+            ),
+            pytest.param(
+                {"update_time": 1},
+                "The input parameters are invalid",
+                marks=pytest.mark.skip(reason="issues/6104"),
+            ),
+        ],
+    )
+    def test_invalid_field(self, add_documents, payload, expected_message):
+        _, documents = add_documents
+        document = documents[0]
+
+        with pytest.raises(Exception) as excinfo:
+            document.update(payload)
+        assert expected_message in str(excinfo.value), str(excinfo.value)
+
+
+class TestUpdateDocumentParserConfig:
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "chunk_method, parser_config, expected_message",
+        [
+            ("naive", {}, ""),
+            (
+                "naive",
+                {
+                    "chunk_token_num": 128,
+                    "layout_recognize": "DeepDOC",
+                    "html4excel": False,
+                    "delimiter": r"\n",
+                    "task_page_size": 12,
+                    "raptor": {"use_raptor": False},
+                },
+                "",
+            ),
+            pytest.param(
+                "naive",
+                {"chunk_token_num": -1},
+                "chunk_token_num should be in range from 1 to 100000000",
+                marks=pytest.mark.skip(reason="issues/6098"),
+            ),
+            pytest.param(
+                "naive",
+                {"chunk_token_num": 0},
+                "chunk_token_num should be in range from 1 to 100000000",
+                marks=pytest.mark.skip(reason="issues/6098"),
+            ),
+            pytest.param(
+                "naive",
+                {"chunk_token_num": 100000000},
+                "chunk_token_num should be in range from 1 to 100000000",
+                marks=pytest.mark.skip(reason="issues/6098"),
+            ),
+            pytest.param(
+                "naive",
+                {"chunk_token_num": 3.14},
+                "",
+                marks=pytest.mark.skip(reason="issues/6098"),
+            ),
+            pytest.param(
+                "naive",
+                {"chunk_token_num": "1024"},
+                "",
+                marks=pytest.mark.skip(reason="issues/6098"),
+            ),
+            ("naive", {"layout_recognize": "DeepDOC"}, ""),
+            ("naive", {"layout_recognize": "Naive"}, ""),
+            ("naive", {"html4excel": True}, ""),
+            ("naive", {"html4excel": False}, ""),
+            pytest.param(
+                "naive",
+                {"html4excel": 1},
+                "html4excel should be True or False",
+                marks=pytest.mark.skip(reason="issues/6098"),
+            ),
+            ("naive", {"delimiter": ""}, ""),
+            ("naive", {"delimiter": "`##`"}, ""),
+            pytest.param(
+                "naive",
+                {"delimiter": 1},
+                "",
+                marks=pytest.mark.skip(reason="issues/6098"),
+            ),
+            pytest.param(
+                "naive",
+                {"task_page_size": -1},
+                "task_page_size should be in range from 1 to 100000000",
+                marks=pytest.mark.skip(reason="issues/6098"),
+            ),
+            pytest.param(
+                "naive",
+                {"task_page_size": 0},
+                "task_page_size should be in range from 1 to 100000000",
+                marks=pytest.mark.skip(reason="issues/6098"),
+            ),
+            pytest.param(
+                "naive",
+                {"task_page_size": 100000000},
+                "task_page_size should be in range from 1 to 100000000",
+                marks=pytest.mark.skip(reason="issues/6098"),
+            ),
+            pytest.param(
+                "naive",
+                {"task_page_size": 3.14},
+                "",
+                marks=pytest.mark.skip(reason="issues/6098"),
+            ),
+            pytest.param(
+                "naive",
+                {"task_page_size": "1024"},
+                "",
+                marks=pytest.mark.skip(reason="issues/6098"),
+            ),
+            ("naive", {"raptor": {"use_raptor": True}}, ""),
+            ("naive", {"raptor": {"use_raptor": False}}, ""),
+            pytest.param(
+                "naive",
+                {"invalid_key": "invalid_value"},
+                "Abnormal 'parser_config'. Invalid key: invalid_key",
+                marks=pytest.mark.skip(reason="issues/6098"),
+            ),
+            pytest.param(
+                "naive",
+                {"auto_keywords": -1},
+                "auto_keywords should be in range from 0 to 32",
+                marks=pytest.mark.skip(reason="issues/6098"),
+            ),
+            pytest.param(
+                "naive",
+                {"auto_keywords": 32},
+                "auto_keywords should be in range from 0 to 32",
+                marks=pytest.mark.skip(reason="issues/6098"),
+            ),
+            pytest.param(
+                "naive",
+                {"auto_keywords": 3.14},
+                "",
+                marks=pytest.mark.skip(reason="issues/6098"),
+            ),
+            pytest.param(
+                "naive",
+                {"auto_keywords": "1024"},
+                "",
+                marks=pytest.mark.skip(reason="issues/6098"),
+            ),
+            pytest.param(
+                "naive",
+                {"auto_questions": -1},
+                "auto_questions should be in range from 0 to 10",
+                marks=pytest.mark.skip(reason="issues/6098"),
+            ),
+            pytest.param(
+                "naive",
+                {"auto_questions": 10},
+                "auto_questions should be in range from 0 to 10",
+                marks=pytest.mark.skip(reason="issues/6098"),
+            ),
+            pytest.param(
+                "naive",
+                {"auto_questions": 3.14},
+                "",
+                marks=pytest.mark.skip(reason="issues/6098"),
+            ),
+            pytest.param(
+                "naive",
+                {"auto_questions": "1024"},
+                "",
+                marks=pytest.mark.skip(reason="issues/6098"),
+            ),
+            pytest.param(
+                "naive",
+                {"topn_tags": -1},
+                "topn_tags should be in range from 0 to 10",
+                marks=pytest.mark.skip(reason="issues/6098"),
+            ),
+            pytest.param(
+                "naive",
+                {"topn_tags": 10},
+                "topn_tags should be in range from 0 to 10",
+                marks=pytest.mark.skip(reason="issues/6098"),
+            ),
+            pytest.param(
+                "naive",
+                {"topn_tags": 3.14},
+                "",
+                marks=pytest.mark.skip(reason="issues/6098"),
+            ),
+            pytest.param(
+                "naive",
+                {"topn_tags": "1024"},
+                "",
+                marks=pytest.mark.skip(reason="issues/6098"),
+            ),
+        ],
+    )
+    def test_parser_config(self, client, add_documents, chunk_method, parser_config, expected_message):
+        dataset, documents = add_documents
+        document = documents[0]
+        from operator import attrgetter
+
+        update_data = {"chunk_method": chunk_method, "parser_config": parser_config}
+
+        if expected_message:
+            with pytest.raises(Exception) as excinfo:
+                document.update(update_data)
+            assert expected_message in str(excinfo.value), str(excinfo.value)
+        else:
+            document.update(update_data)
+            updated_doc = dataset.list_documents(id=document.id)[0]
+            if parser_config:
+                for k, v in parser_config.items():
+                    if isinstance(v, dict):
+                        for kk, vv in v.items():
+                            assert attrgetter(f"{k}.{kk}")(updated_doc.parser_config) == vv, str(updated_doc)
+                    else:
+                        assert attrgetter(k)(updated_doc.parser_config) == v, str(updated_doc)
+            else:
+                expected_config = DataSet.ParserConfig(
+                    client,
+                    {
+                        "chunk_token_num": 128,
+                        "delimiter": r"\n",
+                        "html4excel": False,
+                        "layout_recognize": "DeepDOC",
+                        "raptor": {"use_raptor": False},
+                    },
+                )
+                assert str(updated_doc.parser_config) == str(expected_config), str(updated_doc)
--- a/test/testcases/test_sdk_api/test_file_management_within_dataset/test_upload_documents.py
+++ b/test/testcases/test_sdk_api/test_file_management_within_dataset/test_upload_documents.py
@ -0,0 +1,210 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import string
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+import pytest
+from configs import DOCUMENT_NAME_LIMIT
+from utils.file_utils import create_txt_file
+
+
+class TestDocumentsUpload:
+    @pytest.mark.p1
+    def test_valid_single_upload(self, add_dataset_func, tmp_path):
+        dataset = add_dataset_func
+        fp = create_txt_file(tmp_path / "ragflow_test.txt")
+        with fp.open("rb") as f:
+            blob = f.read()
+
+        documents = dataset.upload_documents([{"display_name": fp.name, "blob": blob}])
+        for document in documents:
+            assert document.dataset_id == dataset.id, str(document)
+            assert document.name == fp.name, str(document)
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "generate_test_files",
+        [
+            "docx",
+            "excel",
+            "ppt",
+            "image",
+            "pdf",
+            "txt",
+            "md",
+            "json",
+            "eml",
+            "html",
+        ],
+        indirect=True,
+    )
+    def test_file_type_validation(self, add_dataset_func, generate_test_files, request):
+        dataset = add_dataset_func
+        fp = generate_test_files[request.node.callspec.params["generate_test_files"]]
+
+        with fp.open("rb") as f:
+            blob = f.read()
+
+        documents = dataset.upload_documents([{"display_name": fp.name, "blob": blob}])
+        for document in documents:
+            assert document.dataset_id == dataset.id, str(document)
+            assert document.name == fp.name, str(document)
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "file_type",
+        ["exe", "unknown"],
+    )
+    def test_unsupported_file_type(self, add_dataset_func, tmp_path, file_type):
+        dataset = add_dataset_func
+        fp = tmp_path / f"ragflow_test.{file_type}"
+        fp.touch()
+
+        with fp.open("rb") as f:
+            blob = f.read()
+
+        with pytest.raises(Exception) as excinfo:
+            dataset.upload_documents([{"display_name": fp.name, "blob": blob}])
+        assert str(excinfo.value) == f"ragflow_test.{file_type}: This type of file has not been supported yet!", str(excinfo.value)
+
+    @pytest.mark.p2
+    def test_missing_file(self, add_dataset_func):
+        dataset = add_dataset_func
+        with pytest.raises(Exception) as excinfo:
+            dataset.upload_documents([])
+        assert str(excinfo.value) == "No file part!", str(excinfo.value)
+
+    @pytest.mark.p3
+    def test_empty_file(self, add_dataset_func, tmp_path):
+        dataset = add_dataset_func
+        fp = tmp_path / "empty.txt"
+        fp.touch()
+
+        with fp.open("rb") as f:
+            blob = f.read()
+
+        documents = dataset.upload_documents([{"display_name": fp.name, "blob": blob}])
+        for document in documents:
+            assert document.size == 0, str(document)
+
+    @pytest.mark.p3
+    def test_filename_empty(self, add_dataset_func, tmp_path):
+        dataset = add_dataset_func
+        fp = create_txt_file(tmp_path / "ragflow_test.txt")
+
+        with fp.open("rb") as f:
+            blob = f.read()
+
+        with pytest.raises(Exception) as excinfo:
+            dataset.upload_documents([{"display_name": "", "blob": blob}])
+        assert str(excinfo.value) == "No file selected!", str(excinfo.value)
+
+    @pytest.mark.p2
+    def test_filename_exceeds_max_length(self, add_dataset_func, tmp_path):
+        dataset = add_dataset_func
+        fp = create_txt_file(tmp_path / f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt")
+
+        with fp.open("rb") as f:
+            blob = f.read()
+
+        with pytest.raises(Exception) as excinfo:
+            dataset.upload_documents([{"display_name": fp.name, "blob": blob}])
+        assert str(excinfo.value) == "File name should be less than 128 bytes.", str(excinfo.value)
+
+    @pytest.mark.p2
+    def test_duplicate_files(self, add_dataset_func, tmp_path):
+        dataset = add_dataset_func
+        fp = create_txt_file(tmp_path / "ragflow_test.txt")
+
+        with fp.open("rb") as f:
+            blob = f.read()
+
+        documents = dataset.upload_documents([{"display_name": fp.name, "blob": blob}, {"display_name": fp.name, "blob": blob}])
+
+        assert len(documents) == 2, str(documents)
+        for i, document in enumerate(documents):
+            assert document.dataset_id == dataset.id, str(document)
+            expected_name = fp.name if i == 0 else f"{fp.stem}({i}){fp.suffix}"
+            assert document.name == expected_name, str(document)
+
+    @pytest.mark.p2
+    def test_same_file_repeat(self, add_dataset_func, tmp_path):
+        dataset = add_dataset_func
+        fp = create_txt_file(tmp_path / "ragflow_test.txt")
+
+        with fp.open("rb") as f:
+            blob = f.read()
+
+        for i in range(3):
+            documents = dataset.upload_documents([{"display_name": fp.name, "blob": blob}])
+            assert len(documents) == 1, str(documents)
+            document = documents[0]
+            assert document.dataset_id == dataset.id, str(document)
+            expected_name = fp.name if i == 0 else f"{fp.stem}({i}){fp.suffix}"
+            assert document.name == expected_name, str(document)
+
+    @pytest.mark.p3
+    def test_filename_special_characters(self, add_dataset_func, tmp_path):
+        dataset = add_dataset_func
+        illegal_chars = '<>:"/\\|?*'
+        translation_table = str.maketrans({char: "_" for char in illegal_chars})
+        safe_filename = string.punctuation.translate(translation_table)
+        fp = tmp_path / f"{safe_filename}.txt"
+        fp.write_text("Sample text content")
+
+        with fp.open("rb") as f:
+            blob = f.read()
+
+        documents = dataset.upload_documents([{"display_name": fp.name, "blob": blob}])
+        assert len(documents) == 1, str(documents)
+        document = documents[0]
+        assert document.dataset_id == dataset.id, str(document)
+        assert document.name == fp.name, str(document)
+
+    @pytest.mark.p1
+    def test_multiple_files(self, client, add_dataset_func, tmp_path):
+        dataset = add_dataset_func
+        expected_document_count = 20
+        document_infos = []
+        for i in range(expected_document_count):
+            fp = create_txt_file(tmp_path / f"ragflow_test_upload_{i}.txt")
+            with fp.open("rb") as f:
+                blob = f.read()
+            document_infos.append({"display_name": fp.name, "blob": blob})
+        documents = dataset.upload_documents(document_infos)
+        assert len(documents) == expected_document_count, str(documents)
+
+        retrieved_dataset = client.get_dataset(name=dataset.name)
+        assert retrieved_dataset.document_count == expected_document_count, str(retrieved_dataset)
+
+    @pytest.mark.p3
+    def test_concurrent_upload(self, client, add_dataset_func, tmp_path):
+        dataset = add_dataset_func
+        count = 20
+        fps = [create_txt_file(tmp_path / f"ragflow_test_{i}.txt") for i in range(count)]
+
+        def upload_file(fp):
+            with fp.open("rb") as f:
+                blob = f.read()
+            return dataset.upload_documents([{"display_name": fp.name, "blob": blob}])
+
+        with ThreadPoolExecutor(max_workers=5) as executor:
+            futures = [executor.submit(upload_file, fp) for fp in fps]
+        responses = list(as_completed(futures))
+        assert len(responses) == count, responses
+
+        retrieved_dataset = client.get_dataset(name=dataset.name)
+        assert retrieved_dataset.document_count == count, str(retrieved_dataset)