TEST: Added test cases for Upload Documents HTTP API (#5991)

### What problem does this PR solve? cover upload docments endpoints ### Type of change - [x] add test cases
2026-01-25 12:46:38 +08:00 · 2025-03-12 19:38:52 +08:00
parent 7cd37c37cd
commit bd5eb47441
12 changed files with 782 additions and 123 deletions
--- a/sdk/python/test/data/logo.svg
+++ b/sdk/python/test/data/logo.svg
@ -1,29 +0,0 @@
-<svg width="32" height="34" viewBox="0 0 32 34" fill="none" xmlns="http://www.w3.org/2000/svg">
-    <path fill-rule="evenodd" clip-rule="evenodd"
-        d="M3.43265 20.7677C4.15835 21.5062 4.15834 22.7035 3.43262 23.4419L3.39546 23.4797C2.66974 24.2182 1.49312 24.2182 0.767417 23.4797C0.0417107 22.7412 0.0417219 21.544 0.767442 20.8055L0.804608 20.7677C1.53033 20.0292 2.70694 20.0293 3.43265 20.7677Z"
-        fill="#B2DDFF" />
-    <path fill-rule="evenodd" clip-rule="evenodd"
-        d="M12.1689 21.3375C12.8933 22.0773 12.8912 23.2746 12.1641 24.0117L7.01662 29.2307C6.2896 29.9678 5.11299 29.9657 4.38859 29.2259C3.66419 28.4861 3.66632 27.2888 4.39334 26.5517L9.54085 21.3327C10.2679 20.5956 11.4445 20.5977 12.1689 21.3375Z"
-        fill="#53B1FD" />
-    <path fill-rule="evenodd" clip-rule="evenodd"
-        d="M19.1551 30.3217C19.7244 29.4528 20.8781 29.218 21.7321 29.7973L21.8436 29.8729C22.6975 30.4522 22.9283 31.6262 22.359 32.4952C21.7897 33.3641 20.6359 33.5989 19.782 33.0196L19.6705 32.944C18.8165 32.3647 18.5858 31.1907 19.1551 30.3217Z"
-        fill="#B2DDFF" />
-    <path fill-rule="evenodd" clip-rule="evenodd"
-        d="M31.4184 20.6544C32.1441 21.3929 32.1441 22.5902 31.4184 23.3286L28.8911 25.9003C28.1654 26.6388 26.9887 26.6388 26.263 25.9003C25.5373 25.1619 25.5373 23.9646 26.263 23.2261L28.7903 20.6544C29.516 19.916 30.6927 19.916 31.4184 20.6544Z"
-        fill="#53B1FD" />
-    <path fill-rule="evenodd" clip-rule="evenodd"
-        d="M31.4557 11.1427C32.1814 11.8812 32.1814 13.0785 31.4557 13.8169L12.7797 32.8209C12.054 33.5594 10.8774 33.5594 10.1517 32.8209C9.42599 32.0825 9.42599 30.8852 10.1517 30.1467L28.8277 11.1427C29.5534 10.4043 30.73 10.4043 31.4557 11.1427Z"
-        fill="#1570EF" />
-    <path fill-rule="evenodd" clip-rule="evenodd"
-        d="M27.925 5.29994C28.6508 6.0384 28.6508 7.23568 27.925 7.97414L17.184 18.9038C16.4583 19.6423 15.2817 19.6423 14.556 18.9038C13.8303 18.1653 13.8303 16.9681 14.556 16.2296L25.297 5.29994C26.0227 4.56148 27.1993 4.56148 27.925 5.29994Z"
-        fill="#1570EF" />
-    <path fill-rule="evenodd" clip-rule="evenodd"
-        d="M22.256 1.59299C22.9822 2.33095 22.983 3.52823 22.2578 4.26718L8.45055 18.3358C7.72533 19.0748 6.54871 19.0756 5.82251 18.3376C5.09631 17.5996 5.09552 16.4024 5.82075 15.6634L19.6279 1.59478C20.3532 0.855827 21.5298 0.855022 22.256 1.59299Z"
-        fill="#1570EF" />
-    <path fill-rule="evenodd" clip-rule="evenodd"
-        d="M8.58225 6.09619C9.30671 6.83592 9.30469 8.0332 8.57772 8.77038L3.17006 14.2541C2.4431 14.9913 1.26649 14.9893 0.542025 14.2495C-0.182438 13.5098 -0.180413 12.3125 0.546548 11.5753L5.95421 6.09159C6.68117 5.3544 7.85778 5.35646 8.58225 6.09619Z"
-        fill="#53B1FD" />
-    <path fill-rule="evenodd" clip-rule="evenodd"
-        d="M11.893 0.624023C12.9193 0.624023 13.7513 1.47063 13.7513 2.51497V2.70406C13.7513 3.7484 12.9193 4.59501 11.893 4.59501C10.8667 4.59501 10.0347 3.7484 10.0347 2.70406V2.51497C10.0347 1.47063 10.8667 0.624023 11.893 0.624023Z"
-        fill="#B2DDFF" />
-</svg>
--- a/sdk/python/test/test_http_api/test_dataset_mangement/conftest.py
+++ b/sdk/python/test/test_http_api/test_dataset_mangement/conftest.py
@ -13,12 +13,3 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
-
-import pytest
-from common import delete_dataset
-
-
-@pytest.fixture(scope="function", autouse=True)
-def clear_datasets(get_http_api_auth):
-    yield
-    delete_dataset(get_http_api_auth)
--- a/sdk/python/test/libs/utils/init.py
+++ b/sdk/python/test/libs/utils/init.py
@ -0,0 +1,25 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import base64
+from pathlib import Path
+
+
+def encode_avatar(image_path):
+    with Path.open(image_path, "rb") as file:
+        binary_data = file.read()
+    base64_encoded = base64.b64encode(binary_data).decode("utf-8")
+    return base64_encoded
--- a/sdk/python/test/libs/utils/file_utils.py
+++ b/sdk/python/test/libs/utils/file_utils.py
@ -0,0 +1,107 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import json
+
+from docx import Document  # pip install python-docx
+from openpyxl import Workbook  # pip install openpyxl
+from PIL import Image, ImageDraw  # pip install Pillow
+from pptx import Presentation  # pip install python-pptx
+from reportlab.pdfgen import canvas  # pip install reportlab
+
+
+def create_docx_file(path):
+    doc = Document()
+    doc.add_paragraph("这是一个测试 DOCX 文件。")
+    doc.save(path)
+    return path
+
+
+def create_excel_file(path):
+    wb = Workbook()
+    ws = wb.active
+    ws["A1"] = "测试 Excel 文件"
+    wb.save(path)
+    return path
+
+
+def create_ppt_file(path):
+    prs = Presentation()
+    slide = prs.slides.add_slide(prs.slide_layouts[0])
+    slide.shapes.title.text = "测试 PPT 文件"
+    prs.save(path)
+    return path
+
+
+def create_image_file(path):
+    img = Image.new("RGB", (100, 100), color="blue")
+    draw = ImageDraw.Draw(img)
+    draw.text((10, 40), "Test", fill="white")
+    img.save(path)
+    return path
+
+
+def create_pdf_file(path):
+    if not isinstance(path, str):
+        path = str(path)
+    c = canvas.Canvas(path)
+    c.drawString(100, 750, "测试 PDF 文件")
+    c.save()
+    return path
+
+
+def create_txt_file(path):
+    with open(path, "w", encoding="utf-8") as f:
+        f.write("这是测试 TXT 文件的内容。")
+    return path
+
+
+def create_md_file(path):
+    md_content = "# 测试 MD 文件\n\n这是一份 Markdown 格式的测试文件。"
+    with open(path, "w", encoding="utf-8") as f:
+        f.write(md_content)
+    return path
+
+
+def create_json_file(path):
+    data = {"message": "这是测试 JSON 文件", "value": 123}
+    with open(path, "w", encoding="utf-8") as f:
+        json.dump(data, f, indent=2)
+    return path
+
+
+def create_eml_file(path):
+    eml_content = (
+        "From: sender@example.com\n"
+        "To: receiver@example.com\n"
+        "Subject: 测试 EML 文件\n\n"
+        "这是一封测试邮件的内容。\n"
+    )
+    with open(path, "w", encoding="utf-8") as f:
+        f.write(eml_content)
+    return path
+
+
+def create_html_file(path):
+    html_content = (
+        "<html>\n"
+        "<head><title>测试 HTML 文件</title></head>\n"
+        "<body><h1>这是一个测试 HTML 文件</h1></body>\n"
+        "</html>"
+    )
+    with open(path, "w", encoding="utf-8") as f:
+        f.write(html_content)
+    return path
--- a/sdk/python/test/test_http_api/common.py
+++ b/sdk/python/test/test_http_api/common.py
@ -0,0 +1,101 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import os
+from pathlib import Path
+
+import requests
+from requests_toolbelt import MultipartEncoder
+
+HEADERS = {"Content-Type": "application/json"}
+HOST_ADDRESS = os.getenv("HOST_ADDRESS", "http://127.0.0.1:9380")
+DATASETS_API_URL = "/api/v1/datasets"
+FILE_API_URL = "/api/v1/datasets/{dataset_id}/documents"
+
+INVALID_API_TOKEN = "invalid_key_123"
+DATASET_NAME_LIMIT = 128
+DOCUMENT_NAME_LIMIT = 128
+
+
+# DATASET MANAGEMENT
+def create_dataset(auth, payload):
+    res = requests.post(
+        url=f"{HOST_ADDRESS}{DATASETS_API_URL}",
+        headers=HEADERS,
+        auth=auth,
+        json=payload,
+    )
+    return res.json()
+
+
+def list_dataset(auth, params=None):
+    res = requests.get(
+        url=f"{HOST_ADDRESS}{DATASETS_API_URL}",
+        headers=HEADERS,
+        auth=auth,
+        params=params,
+    )
+    return res.json()
+
+
+def update_dataset(auth, dataset_id, payload):
+    res = requests.put(
+        url=f"{HOST_ADDRESS}{DATASETS_API_URL}/{dataset_id}",
+        headers=HEADERS,
+        auth=auth,
+        json=payload,
+    )
+    return res.json()
+
+
+def delete_dataset(auth, payload=None):
+    res = requests.delete(
+        url=f"{HOST_ADDRESS}{DATASETS_API_URL}",
+        headers=HEADERS,
+        auth=auth,
+        json=payload,
+    )
+    return res.json()
+
+
+def create_datasets(auth, num):
+    ids = []
+    for i in range(num):
+        res = create_dataset(auth, {"name": f"dataset_{i}"})
+        ids.append(res["data"]["id"])
+    return ids
+
+
+# FILE MANAGEMENT WITHIN DATASET
+def upload_documnets(auth, dataset_id, files_path=None):
+    url = f"{HOST_ADDRESS}{FILE_API_URL}".format(dataset_id=dataset_id)
+
+    if files_path is None:
+        files_path = []
+
+    fields = []
+    for i, fp in enumerate(files_path):
+        p = Path(fp)
+        fields.append(("file", (p.name, p.open("rb"))))
+    m = MultipartEncoder(fields=fields)
+
+    res = requests.post(
+        url=url,
+        headers={"Content-Type": m.content_type},
+        auth=auth,
+        data=m,
+    )
+    return res.json()
--- a/sdk/python/test/test_http_api/conftest.py
+++ b/sdk/python/test/test_http_api/conftest.py
@ -0,0 +1,73 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+
+import pytest
+from common import delete_dataset
+from libs.utils.file_utils import (
+    create_docx_file,
+    create_eml_file,
+    create_excel_file,
+    create_html_file,
+    create_image_file,
+    create_json_file,
+    create_md_file,
+    create_pdf_file,
+    create_ppt_file,
+    create_txt_file,
+)
+
+
+@pytest.fixture(scope="function", autouse=True)
+def clear_datasets(get_http_api_auth):
+    yield
+    delete_dataset(get_http_api_auth)
+
+
+@pytest.fixture
+def generate_test_files(tmp_path):
+    files = {}
+    files["docx"] = tmp_path / "ragflow_test.docx"
+    create_docx_file(files["docx"])
+
+    files["excel"] = tmp_path / "ragflow_test.xlsx"
+    create_excel_file(files["excel"])
+
+    files["ppt"] = tmp_path / "ragflow_test.pptx"
+    create_ppt_file(files["ppt"])
+
+    files["image"] = tmp_path / "ragflow_test.png"
+    create_image_file(files["image"])
+
+    files["pdf"] = tmp_path / "ragflow_test.pdf"
+    create_pdf_file(files["pdf"])
+
+    files["txt"] = tmp_path / "ragflow_test.txt"
+    create_txt_file(files["txt"])
+
+    files["md"] = tmp_path / "ragflow_test.md"
+    create_md_file(files["md"])
+
+    files["json"] = tmp_path / "ragflow_test.json"
+    create_json_file(files["json"])
+
+    files["eml"] = tmp_path / "ragflow_test.eml"
+    create_eml_file(files["eml"])
+
+    files["html"] = tmp_path / "ragflow_test.html"
+    create_html_file(files["html"])
+
+    return files
--- a/sdk/python/test/test_http_api/test_dataset_mangement/common.py
+++ b/sdk/python/test/test_http_api/test_dataset_mangement/common.py
@ -1,57 +0,0 @@
-#
-#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-
-import os
-
-import requests
-
-HOST_ADDRESS = os.getenv("HOST_ADDRESS", "http://127.0.0.1:9380")
-API_URL = f"{HOST_ADDRESS}/api/v1/datasets"
-HEADERS = {"Content-Type": "application/json"}
-
-
-INVALID_API_TOKEN = "invalid_key_123"
-DATASET_NAME_LIMIT = 128
-
-
-def create_dataset(auth, payload):
-    res = requests.post(url=API_URL, headers=HEADERS, auth=auth, json=payload)
-    return res.json()
-
-
-def list_dataset(auth, params=None):
-    res = requests.get(url=API_URL, headers=HEADERS, auth=auth, params=params)
-    return res.json()
-
-
-def update_dataset(auth, dataset_id, payload):
-    res = requests.put(
-        url=f"{API_URL}/{dataset_id}", headers=HEADERS, auth=auth, json=payload 
-    )
-    return res.json()
-
-
-def delete_dataset(auth, payload=None):
-    res = requests.delete(url=API_URL, headers=HEADERS, auth=auth, json=payload)
-    return res.json()
-
-
-def create_datasets(auth, num):
-    ids = []
-    for i in range(num):
-        res = create_dataset(auth, {"name": f"dataset_{i}"})
-        ids.append(res["data"]["id"])
-    return ids
--- a/sdk/python/test/test_http_api/test_dataset_mangement/test_create_dataset.py
+++ b/sdk/python/test/test_http_api/test_dataset_mangement/test_create_dataset.py
@ -13,12 +13,12 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
-import base64
-from pathlib import Path

 import pytest
 from common import DATASET_NAME_LIMIT, INVALID_API_TOKEN, create_dataset
 from libs.auth import RAGFlowHttpApiAuth
+from libs.utils import encode_avatar
+from libs.utils.file_utils import create_image_file


 class TestAuthorization:
@ -75,18 +75,11 @@ class TestDatasetCreation:


 class TestAdvancedConfigurations:
-    def test_avatar(self, get_http_api_auth, request):
-        def encode_avatar(image_path):
-            with Path.open(image_path, "rb") as file:
-                binary_data = file.read()
-            base64_encoded = base64.b64encode(binary_data).decode("utf-8")
-            return base64_encoded
-
+    def test_avatar(self, get_http_api_auth, tmp_path):
+        fn = create_image_file(tmp_path / "ragflow_test.png")
        payload = {
            "name": "avatar_test",
-            "avatar": encode_avatar(
-                Path(request.config.rootdir) / "test/data/logo.svg"
-            ),
+            "avatar": encode_avatar(fn),
        }
        res = create_dataset(get_http_api_auth, payload)
        assert res["code"] == 0
--- a/sdk/python/test/test_http_api/test_dataset_mangement/test_update_dataset.py
+++ b/sdk/python/test/test_http_api/test_dataset_mangement/test_update_dataset.py
@ -13,9 +13,7 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
-import base64
 from concurrent.futures import ThreadPoolExecutor
-from pathlib import Path

 import pytest
 from common import (
@ -26,6 +24,8 @@ from common import (
    update_dataset,
 )
 from libs.auth import RAGFlowHttpApiAuth
+from libs.utils import encode_avatar
+from libs.utils.file_utils import create_image_file

 # TODO: Missing scenario for updating embedding_model with chunk_count != 0

@ -171,19 +171,10 @@ class TestDatasetUpdate:
        else:
            assert res["message"] == expected_message

-    def test_avatar(self, get_http_api_auth, request):
-        def encode_avatar(image_path):
-            with Path.open(image_path, "rb") as file:
-                binary_data = file.read()
-            base64_encoded = base64.b64encode(binary_data).decode("utf-8")
-            return base64_encoded
-
+    def test_avatar(self, get_http_api_auth, tmp_path):
        ids = create_datasets(get_http_api_auth, 1)
-        payload = {
-            "avatar": encode_avatar(
-                Path(request.config.rootdir) / "test/data/logo.svg"
-            ),
-        }
+        fn = create_image_file(tmp_path / "ragflow_test.png")
+        payload = {"avatar": encode_avatar(fn)}
        res = update_dataset(get_http_api_auth, ids[0], payload)
        assert res["code"] == 0

--- a/sdk/python/test/test_http_api/test_file_management_within_dataset/test_upload_documents.py
+++ b/sdk/python/test/test_http_api/test_file_management_within_dataset/test_upload_documents.py
@ -0,0 +1,230 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import string
+from concurrent.futures import ThreadPoolExecutor
+
+import pytest
+import requests
+from common import (
+    DOCUMENT_NAME_LIMIT,
+    FILE_API_URL,
+    HOST_ADDRESS,
+    INVALID_API_TOKEN,
+    create_datasets,
+    list_dataset,
+    upload_documnets,
+)
+from libs.auth import RAGFlowHttpApiAuth
+from libs.utils.file_utils import create_txt_file
+from requests_toolbelt import MultipartEncoder
+
+
+class TestAuthorization:
+    @pytest.mark.parametrize(
+        "auth, expected_code, expected_message",
+        [
+            (None, 0, "`Authorization` can't be empty"),
+            (
+                RAGFlowHttpApiAuth(INVALID_API_TOKEN),
+                109,
+                "Authentication error: API key is invalid!",
+            ),
+        ],
+    )
+    def test_invalid_auth(
+        self, get_http_api_auth, auth, expected_code, expected_message
+    ):
+        ids = create_datasets(get_http_api_auth, 1)
+        res = upload_documnets(auth, ids[0])
+        assert res["code"] == expected_code
+        assert res["message"] == expected_message
+
+
+class TestUploadDocuments:
+    def test_valid_single_upload(self, get_http_api_auth, tmp_path):
+        ids = create_datasets(get_http_api_auth, 1)
+        fp = create_txt_file(tmp_path / "ragflow_test.txt")
+        res = upload_documnets(get_http_api_auth, ids[0], [fp])
+        assert res["code"] == 0
+        assert res["data"][0]["dataset_id"] == ids[0]
+        assert res["data"][0]["name"] == fp.name
+
+    @pytest.mark.parametrize(
+        "file_type",
+        [
+            "docx",
+            "excel",
+            "ppt",
+            "image",
+            "pdf",
+            "txt",
+            "md",
+            "json",
+            "eml",
+            "html",
+        ],
+    )
+    def test_file_type_validation(
+        self, get_http_api_auth, generate_test_files, file_type
+    ):
+        ids = create_datasets(get_http_api_auth, 1)
+        fp = generate_test_files[file_type]
+        res = upload_documnets(get_http_api_auth, ids[0], [fp])
+        assert res["code"] == 0
+        assert res["data"][0]["dataset_id"] == ids[0]
+        assert res["data"][0]["name"] == fp.name
+
+    @pytest.mark.parametrize(
+        "file_type",
+        ["exe", "unknown"],
+    )
+    def test_unsupported_file_type(self, get_http_api_auth, tmp_path, file_type):
+        ids = create_datasets(get_http_api_auth, 1)
+        fp = tmp_path / f"ragflow_test.{file_type}"
+        fp.touch()
+        res = upload_documnets(get_http_api_auth, ids[0], [fp])
+        assert res["code"] == 500
+        assert (
+            res["message"]
+            == f"ragflow_test.{file_type}: This type of file has not been supported yet!"
+        )
+
+    def test_missing_file(self, get_http_api_auth):
+        ids = create_datasets(get_http_api_auth, 1)
+        res = upload_documnets(get_http_api_auth, ids[0])
+        assert res["code"] == 101
+        assert res["message"] == "No file part!"
+
+    def test_empty_file(self, get_http_api_auth, tmp_path):
+        ids = create_datasets(get_http_api_auth, 1)
+        fp = tmp_path / "empty.txt"
+        fp.touch()
+
+        res = upload_documnets(get_http_api_auth, ids[0], [fp])
+        assert res["code"] == 0
+        assert res["data"][0]["size"] == 0
+
+    def test_filename_empty(self, get_http_api_auth, tmp_path):
+        ids = create_datasets(get_http_api_auth, 1)
+        fp = create_txt_file(tmp_path / "ragflow_test.txt")
+        url = f"{HOST_ADDRESS}{FILE_API_URL}".format(dataset_id=ids[0])
+        fields = (("file", ("", fp.open("rb"))),)
+        m = MultipartEncoder(fields=fields)
+        res = requests.post(
+            url=url,
+            headers={"Content-Type": m.content_type},
+            auth=get_http_api_auth,
+            data=m,
+        )
+        assert res.json()["code"] == 101
+        assert res.json()["message"] == "No file selected!"
+
+    def test_filename_exceeds_max_length(self, get_http_api_auth, tmp_path):
+        ids = create_datasets(get_http_api_auth, 1)
+        # filename_length = 129
+        fp = create_txt_file(tmp_path / f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt")
+        res = upload_documnets(get_http_api_auth, ids[0], [fp])
+        assert res["code"] == 500
+        assert (
+            res["message"]
+            == f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt: Exceed the maximum length of file name!"
+        )
+
+    def test_invalid_dataset_id(self, get_http_api_auth, tmp_path):
+        fp = create_txt_file(tmp_path / "ragflow_test.txt")
+        res = upload_documnets(get_http_api_auth, "invalid_dataset_id", [fp])
+        assert res["code"] == 100
+        assert (
+            res["message"]
+            == """LookupError("Can\'t find the dataset with ID invalid_dataset_id!")"""
+        )
+
+    def test_duplicate_files(self, get_http_api_auth, tmp_path):
+        ids = create_datasets(get_http_api_auth, 1)
+        fp = create_txt_file(tmp_path / "ragflow_test.txt")
+        res = upload_documnets(get_http_api_auth, ids[0], [fp, fp])
+        assert res["code"] == 0
+        assert len(res["data"]) == 2
+        for i in range(len(res["data"])):
+            assert res["data"][i]["dataset_id"] == ids[0]
+            expected_name = fp.name
+            if i != 0:
+                expected_name = f"{fp.stem}({i}){fp.suffix}"
+            assert res["data"][i]["name"] == expected_name
+
+    def test_same_file_repeat(self, get_http_api_auth, tmp_path):
+        ids = create_datasets(get_http_api_auth, 1)
+        fp = create_txt_file(tmp_path / "ragflow_test.txt")
+        for i in range(10):
+            res = upload_documnets(get_http_api_auth, ids[0], [fp])
+            assert res["code"] == 0
+            assert len(res["data"]) == 1
+            assert res["data"][0]["dataset_id"] == ids[0]
+            expected_name = fp.name
+            if i != 0:
+                expected_name = f"{fp.stem}({i}){fp.suffix}"
+            assert res["data"][0]["name"] == expected_name
+
+    def test_filename_special_characters(self, get_http_api_auth, tmp_path):
+        ids = create_datasets(get_http_api_auth, 1)
+        illegal_chars = '<>:"/\\|?*'
+        translation_table = str.maketrans({char: "_" for char in illegal_chars})
+        safe_filename = string.punctuation.translate(translation_table)
+        fp = tmp_path / f"{safe_filename}.txt"
+        fp.write_text("Sample text content")
+
+        res = upload_documnets(get_http_api_auth, ids[0], [fp])
+        assert res["code"] == 0
+        assert len(res["data"]) == 1
+        assert res["data"][0]["dataset_id"] == ids[0]
+        assert res["data"][0]["name"] == fp.name
+
+    def test_multiple_files(self, get_http_api_auth, tmp_path):
+        ids = create_datasets(get_http_api_auth, 1)
+        expected_document_count = 20
+        fps = []
+        for i in range(expected_document_count):
+            fp = create_txt_file(tmp_path / f"ragflow_test_{i}.txt")
+            fps.append(fp)
+        res = upload_documnets(get_http_api_auth, ids[0], fps)
+        assert res["code"] == 0
+
+        res = list_dataset(get_http_api_auth, {"id": ids[0]})
+        assert res["data"][0]["document_count"] == expected_document_count
+
+    @pytest.mark.xfail
+    def test_concurrent_upload(self, get_http_api_auth, tmp_path):
+        ids = create_datasets(get_http_api_auth, 1)
+
+        expected_document_count = 20
+        fps = []
+        for i in range(expected_document_count):
+            fp = create_txt_file(tmp_path / f"ragflow_test_{i}.txt")
+            fps.append(fp)
+
+        with ThreadPoolExecutor(max_workers=5) as executor:
+            futures = [
+                executor.submit(
+                    upload_documnets, get_http_api_auth, ids[0], fps[i : i + 1]
+                )
+                for i in range(expected_document_count)
+            ]
+        responses = [f.result() for f in futures]
+        assert all(r["code"] == 0 for r in responses)
+
+        res = list_dataset(get_http_api_auth, {"id": ids[0]})
+        assert res["data"][0]["document_count"] == expected_document_count