TEST: Added test cases for Upload Documents HTTP API (#5991)

### What problem does this PR solve?

cover upload docments endpoints

### Type of change

- [x] add test cases
This commit is contained in:
liu an
2025-03-12 19:38:52 +08:00
committed by GitHub
parent 7cd37c37cd
commit bd5eb47441
12 changed files with 782 additions and 123 deletions

View File

@ -1,29 +0,0 @@
<svg width="32" height="34" viewBox="0 0 32 34" fill="none" xmlns="http://www.w3.org/2000/svg">
<path fill-rule="evenodd" clip-rule="evenodd"
d="M3.43265 20.7677C4.15835 21.5062 4.15834 22.7035 3.43262 23.4419L3.39546 23.4797C2.66974 24.2182 1.49312 24.2182 0.767417 23.4797C0.0417107 22.7412 0.0417219 21.544 0.767442 20.8055L0.804608 20.7677C1.53033 20.0292 2.70694 20.0293 3.43265 20.7677Z"
fill="#B2DDFF" />
<path fill-rule="evenodd" clip-rule="evenodd"
d="M12.1689 21.3375C12.8933 22.0773 12.8912 23.2746 12.1641 24.0117L7.01662 29.2307C6.2896 29.9678 5.11299 29.9657 4.38859 29.2259C3.66419 28.4861 3.66632 27.2888 4.39334 26.5517L9.54085 21.3327C10.2679 20.5956 11.4445 20.5977 12.1689 21.3375Z"
fill="#53B1FD" />
<path fill-rule="evenodd" clip-rule="evenodd"
d="M19.1551 30.3217C19.7244 29.4528 20.8781 29.218 21.7321 29.7973L21.8436 29.8729C22.6975 30.4522 22.9283 31.6262 22.359 32.4952C21.7897 33.3641 20.6359 33.5989 19.782 33.0196L19.6705 32.944C18.8165 32.3647 18.5858 31.1907 19.1551 30.3217Z"
fill="#B2DDFF" />
<path fill-rule="evenodd" clip-rule="evenodd"
d="M31.4184 20.6544C32.1441 21.3929 32.1441 22.5902 31.4184 23.3286L28.8911 25.9003C28.1654 26.6388 26.9887 26.6388 26.263 25.9003C25.5373 25.1619 25.5373 23.9646 26.263 23.2261L28.7903 20.6544C29.516 19.916 30.6927 19.916 31.4184 20.6544Z"
fill="#53B1FD" />
<path fill-rule="evenodd" clip-rule="evenodd"
d="M31.4557 11.1427C32.1814 11.8812 32.1814 13.0785 31.4557 13.8169L12.7797 32.8209C12.054 33.5594 10.8774 33.5594 10.1517 32.8209C9.42599 32.0825 9.42599 30.8852 10.1517 30.1467L28.8277 11.1427C29.5534 10.4043 30.73 10.4043 31.4557 11.1427Z"
fill="#1570EF" />
<path fill-rule="evenodd" clip-rule="evenodd"
d="M27.925 5.29994C28.6508 6.0384 28.6508 7.23568 27.925 7.97414L17.184 18.9038C16.4583 19.6423 15.2817 19.6423 14.556 18.9038C13.8303 18.1653 13.8303 16.9681 14.556 16.2296L25.297 5.29994C26.0227 4.56148 27.1993 4.56148 27.925 5.29994Z"
fill="#1570EF" />
<path fill-rule="evenodd" clip-rule="evenodd"
d="M22.256 1.59299C22.9822 2.33095 22.983 3.52823 22.2578 4.26718L8.45055 18.3358C7.72533 19.0748 6.54871 19.0756 5.82251 18.3376C5.09631 17.5996 5.09552 16.4024 5.82075 15.6634L19.6279 1.59478C20.3532 0.855827 21.5298 0.855022 22.256 1.59299Z"
fill="#1570EF" />
<path fill-rule="evenodd" clip-rule="evenodd"
d="M8.58225 6.09619C9.30671 6.83592 9.30469 8.0332 8.57772 8.77038L3.17006 14.2541C2.4431 14.9913 1.26649 14.9893 0.542025 14.2495C-0.182438 13.5098 -0.180413 12.3125 0.546548 11.5753L5.95421 6.09159C6.68117 5.3544 7.85778 5.35646 8.58225 6.09619Z"
fill="#53B1FD" />
<path fill-rule="evenodd" clip-rule="evenodd"
d="M11.893 0.624023C12.9193 0.624023 13.7513 1.47063 13.7513 2.51497V2.70406C13.7513 3.7484 12.9193 4.59501 11.893 4.59501C10.8667 4.59501 10.0347 3.7484 10.0347 2.70406V2.51497C10.0347 1.47063 10.8667 0.624023 11.893 0.624023Z"
fill="#B2DDFF" />
</svg>

Before

Width:  |  Height:  |  Size: 3.0 KiB

View File

@ -13,12 +13,3 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import pytest
from common import delete_dataset
@pytest.fixture(scope="function", autouse=True)
def clear_datasets(get_http_api_auth):
yield
delete_dataset(get_http_api_auth)

View File

@ -0,0 +1,25 @@
#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import base64
from pathlib import Path
def encode_avatar(image_path):
with Path.open(image_path, "rb") as file:
binary_data = file.read()
base64_encoded = base64.b64encode(binary_data).decode("utf-8")
return base64_encoded

View File

@ -0,0 +1,107 @@
#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import json
from docx import Document # pip install python-docx
from openpyxl import Workbook # pip install openpyxl
from PIL import Image, ImageDraw # pip install Pillow
from pptx import Presentation # pip install python-pptx
from reportlab.pdfgen import canvas # pip install reportlab
def create_docx_file(path):
doc = Document()
doc.add_paragraph("这是一个测试 DOCX 文件。")
doc.save(path)
return path
def create_excel_file(path):
wb = Workbook()
ws = wb.active
ws["A1"] = "测试 Excel 文件"
wb.save(path)
return path
def create_ppt_file(path):
prs = Presentation()
slide = prs.slides.add_slide(prs.slide_layouts[0])
slide.shapes.title.text = "测试 PPT 文件"
prs.save(path)
return path
def create_image_file(path):
img = Image.new("RGB", (100, 100), color="blue")
draw = ImageDraw.Draw(img)
draw.text((10, 40), "Test", fill="white")
img.save(path)
return path
def create_pdf_file(path):
if not isinstance(path, str):
path = str(path)
c = canvas.Canvas(path)
c.drawString(100, 750, "测试 PDF 文件")
c.save()
return path
def create_txt_file(path):
with open(path, "w", encoding="utf-8") as f:
f.write("这是测试 TXT 文件的内容。")
return path
def create_md_file(path):
md_content = "# 测试 MD 文件\n\n这是一份 Markdown 格式的测试文件。"
with open(path, "w", encoding="utf-8") as f:
f.write(md_content)
return path
def create_json_file(path):
data = {"message": "这是测试 JSON 文件", "value": 123}
with open(path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2)
return path
def create_eml_file(path):
eml_content = (
"From: sender@example.com\n"
"To: receiver@example.com\n"
"Subject: 测试 EML 文件\n\n"
"这是一封测试邮件的内容。\n"
)
with open(path, "w", encoding="utf-8") as f:
f.write(eml_content)
return path
def create_html_file(path):
html_content = (
"<html>\n"
"<head><title>测试 HTML 文件</title></head>\n"
"<body><h1>这是一个测试 HTML 文件</h1></body>\n"
"</html>"
)
with open(path, "w", encoding="utf-8") as f:
f.write(html_content)
return path

View File

@ -0,0 +1,101 @@
#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import os
from pathlib import Path
import requests
from requests_toolbelt import MultipartEncoder
HEADERS = {"Content-Type": "application/json"}
HOST_ADDRESS = os.getenv("HOST_ADDRESS", "http://127.0.0.1:9380")
DATASETS_API_URL = "/api/v1/datasets"
FILE_API_URL = "/api/v1/datasets/{dataset_id}/documents"
INVALID_API_TOKEN = "invalid_key_123"
DATASET_NAME_LIMIT = 128
DOCUMENT_NAME_LIMIT = 128
# DATASET MANAGEMENT
def create_dataset(auth, payload):
res = requests.post(
url=f"{HOST_ADDRESS}{DATASETS_API_URL}",
headers=HEADERS,
auth=auth,
json=payload,
)
return res.json()
def list_dataset(auth, params=None):
res = requests.get(
url=f"{HOST_ADDRESS}{DATASETS_API_URL}",
headers=HEADERS,
auth=auth,
params=params,
)
return res.json()
def update_dataset(auth, dataset_id, payload):
res = requests.put(
url=f"{HOST_ADDRESS}{DATASETS_API_URL}/{dataset_id}",
headers=HEADERS,
auth=auth,
json=payload,
)
return res.json()
def delete_dataset(auth, payload=None):
res = requests.delete(
url=f"{HOST_ADDRESS}{DATASETS_API_URL}",
headers=HEADERS,
auth=auth,
json=payload,
)
return res.json()
def create_datasets(auth, num):
ids = []
for i in range(num):
res = create_dataset(auth, {"name": f"dataset_{i}"})
ids.append(res["data"]["id"])
return ids
# FILE MANAGEMENT WITHIN DATASET
def upload_documnets(auth, dataset_id, files_path=None):
url = f"{HOST_ADDRESS}{FILE_API_URL}".format(dataset_id=dataset_id)
if files_path is None:
files_path = []
fields = []
for i, fp in enumerate(files_path):
p = Path(fp)
fields.append(("file", (p.name, p.open("rb"))))
m = MultipartEncoder(fields=fields)
res = requests.post(
url=url,
headers={"Content-Type": m.content_type},
auth=auth,
data=m,
)
return res.json()

View File

@ -0,0 +1,73 @@
#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import pytest
from common import delete_dataset
from libs.utils.file_utils import (
create_docx_file,
create_eml_file,
create_excel_file,
create_html_file,
create_image_file,
create_json_file,
create_md_file,
create_pdf_file,
create_ppt_file,
create_txt_file,
)
@pytest.fixture(scope="function", autouse=True)
def clear_datasets(get_http_api_auth):
yield
delete_dataset(get_http_api_auth)
@pytest.fixture
def generate_test_files(tmp_path):
files = {}
files["docx"] = tmp_path / "ragflow_test.docx"
create_docx_file(files["docx"])
files["excel"] = tmp_path / "ragflow_test.xlsx"
create_excel_file(files["excel"])
files["ppt"] = tmp_path / "ragflow_test.pptx"
create_ppt_file(files["ppt"])
files["image"] = tmp_path / "ragflow_test.png"
create_image_file(files["image"])
files["pdf"] = tmp_path / "ragflow_test.pdf"
create_pdf_file(files["pdf"])
files["txt"] = tmp_path / "ragflow_test.txt"
create_txt_file(files["txt"])
files["md"] = tmp_path / "ragflow_test.md"
create_md_file(files["md"])
files["json"] = tmp_path / "ragflow_test.json"
create_json_file(files["json"])
files["eml"] = tmp_path / "ragflow_test.eml"
create_eml_file(files["eml"])
files["html"] = tmp_path / "ragflow_test.html"
create_html_file(files["html"])
return files

View File

@ -1,57 +0,0 @@
#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import os
import requests
HOST_ADDRESS = os.getenv("HOST_ADDRESS", "http://127.0.0.1:9380")
API_URL = f"{HOST_ADDRESS}/api/v1/datasets"
HEADERS = {"Content-Type": "application/json"}
INVALID_API_TOKEN = "invalid_key_123"
DATASET_NAME_LIMIT = 128
def create_dataset(auth, payload):
res = requests.post(url=API_URL, headers=HEADERS, auth=auth, json=payload)
return res.json()
def list_dataset(auth, params=None):
res = requests.get(url=API_URL, headers=HEADERS, auth=auth, params=params)
return res.json()
def update_dataset(auth, dataset_id, payload):
res = requests.put(
url=f"{API_URL}/{dataset_id}", headers=HEADERS, auth=auth, json=payload
)
return res.json()
def delete_dataset(auth, payload=None):
res = requests.delete(url=API_URL, headers=HEADERS, auth=auth, json=payload)
return res.json()
def create_datasets(auth, num):
ids = []
for i in range(num):
res = create_dataset(auth, {"name": f"dataset_{i}"})
ids.append(res["data"]["id"])
return ids

View File

@ -13,12 +13,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import base64
from pathlib import Path
import pytest
from common import DATASET_NAME_LIMIT, INVALID_API_TOKEN, create_dataset
from libs.auth import RAGFlowHttpApiAuth
from libs.utils import encode_avatar
from libs.utils.file_utils import create_image_file
class TestAuthorization:
@ -75,18 +75,11 @@ class TestDatasetCreation:
class TestAdvancedConfigurations:
def test_avatar(self, get_http_api_auth, request):
def encode_avatar(image_path):
with Path.open(image_path, "rb") as file:
binary_data = file.read()
base64_encoded = base64.b64encode(binary_data).decode("utf-8")
return base64_encoded
def test_avatar(self, get_http_api_auth, tmp_path):
fn = create_image_file(tmp_path / "ragflow_test.png")
payload = {
"name": "avatar_test",
"avatar": encode_avatar(
Path(request.config.rootdir) / "test/data/logo.svg"
),
"avatar": encode_avatar(fn),
}
res = create_dataset(get_http_api_auth, payload)
assert res["code"] == 0

View File

@ -13,9 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import base64
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
import pytest
from common import (
@ -26,6 +24,8 @@ from common import (
update_dataset,
)
from libs.auth import RAGFlowHttpApiAuth
from libs.utils import encode_avatar
from libs.utils.file_utils import create_image_file
# TODO: Missing scenario for updating embedding_model with chunk_count != 0
@ -171,19 +171,10 @@ class TestDatasetUpdate:
else:
assert res["message"] == expected_message
def test_avatar(self, get_http_api_auth, request):
def encode_avatar(image_path):
with Path.open(image_path, "rb") as file:
binary_data = file.read()
base64_encoded = base64.b64encode(binary_data).decode("utf-8")
return base64_encoded
def test_avatar(self, get_http_api_auth, tmp_path):
ids = create_datasets(get_http_api_auth, 1)
payload = {
"avatar": encode_avatar(
Path(request.config.rootdir) / "test/data/logo.svg"
),
}
fn = create_image_file(tmp_path / "ragflow_test.png")
payload = {"avatar": encode_avatar(fn)}
res = update_dataset(get_http_api_auth, ids[0], payload)
assert res["code"] == 0

View File

@ -0,0 +1,230 @@
#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import string
from concurrent.futures import ThreadPoolExecutor
import pytest
import requests
from common import (
DOCUMENT_NAME_LIMIT,
FILE_API_URL,
HOST_ADDRESS,
INVALID_API_TOKEN,
create_datasets,
list_dataset,
upload_documnets,
)
from libs.auth import RAGFlowHttpApiAuth
from libs.utils.file_utils import create_txt_file
from requests_toolbelt import MultipartEncoder
class TestAuthorization:
@pytest.mark.parametrize(
"auth, expected_code, expected_message",
[
(None, 0, "`Authorization` can't be empty"),
(
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
109,
"Authentication error: API key is invalid!",
),
],
)
def test_invalid_auth(
self, get_http_api_auth, auth, expected_code, expected_message
):
ids = create_datasets(get_http_api_auth, 1)
res = upload_documnets(auth, ids[0])
assert res["code"] == expected_code
assert res["message"] == expected_message
class TestUploadDocuments:
def test_valid_single_upload(self, get_http_api_auth, tmp_path):
ids = create_datasets(get_http_api_auth, 1)
fp = create_txt_file(tmp_path / "ragflow_test.txt")
res = upload_documnets(get_http_api_auth, ids[0], [fp])
assert res["code"] == 0
assert res["data"][0]["dataset_id"] == ids[0]
assert res["data"][0]["name"] == fp.name
@pytest.mark.parametrize(
"file_type",
[
"docx",
"excel",
"ppt",
"image",
"pdf",
"txt",
"md",
"json",
"eml",
"html",
],
)
def test_file_type_validation(
self, get_http_api_auth, generate_test_files, file_type
):
ids = create_datasets(get_http_api_auth, 1)
fp = generate_test_files[file_type]
res = upload_documnets(get_http_api_auth, ids[0], [fp])
assert res["code"] == 0
assert res["data"][0]["dataset_id"] == ids[0]
assert res["data"][0]["name"] == fp.name
@pytest.mark.parametrize(
"file_type",
["exe", "unknown"],
)
def test_unsupported_file_type(self, get_http_api_auth, tmp_path, file_type):
ids = create_datasets(get_http_api_auth, 1)
fp = tmp_path / f"ragflow_test.{file_type}"
fp.touch()
res = upload_documnets(get_http_api_auth, ids[0], [fp])
assert res["code"] == 500
assert (
res["message"]
== f"ragflow_test.{file_type}: This type of file has not been supported yet!"
)
def test_missing_file(self, get_http_api_auth):
ids = create_datasets(get_http_api_auth, 1)
res = upload_documnets(get_http_api_auth, ids[0])
assert res["code"] == 101
assert res["message"] == "No file part!"
def test_empty_file(self, get_http_api_auth, tmp_path):
ids = create_datasets(get_http_api_auth, 1)
fp = tmp_path / "empty.txt"
fp.touch()
res = upload_documnets(get_http_api_auth, ids[0], [fp])
assert res["code"] == 0
assert res["data"][0]["size"] == 0
def test_filename_empty(self, get_http_api_auth, tmp_path):
ids = create_datasets(get_http_api_auth, 1)
fp = create_txt_file(tmp_path / "ragflow_test.txt")
url = f"{HOST_ADDRESS}{FILE_API_URL}".format(dataset_id=ids[0])
fields = (("file", ("", fp.open("rb"))),)
m = MultipartEncoder(fields=fields)
res = requests.post(
url=url,
headers={"Content-Type": m.content_type},
auth=get_http_api_auth,
data=m,
)
assert res.json()["code"] == 101
assert res.json()["message"] == "No file selected!"
def test_filename_exceeds_max_length(self, get_http_api_auth, tmp_path):
ids = create_datasets(get_http_api_auth, 1)
# filename_length = 129
fp = create_txt_file(tmp_path / f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt")
res = upload_documnets(get_http_api_auth, ids[0], [fp])
assert res["code"] == 500
assert (
res["message"]
== f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt: Exceed the maximum length of file name!"
)
def test_invalid_dataset_id(self, get_http_api_auth, tmp_path):
fp = create_txt_file(tmp_path / "ragflow_test.txt")
res = upload_documnets(get_http_api_auth, "invalid_dataset_id", [fp])
assert res["code"] == 100
assert (
res["message"]
== """LookupError("Can\'t find the dataset with ID invalid_dataset_id!")"""
)
def test_duplicate_files(self, get_http_api_auth, tmp_path):
ids = create_datasets(get_http_api_auth, 1)
fp = create_txt_file(tmp_path / "ragflow_test.txt")
res = upload_documnets(get_http_api_auth, ids[0], [fp, fp])
assert res["code"] == 0
assert len(res["data"]) == 2
for i in range(len(res["data"])):
assert res["data"][i]["dataset_id"] == ids[0]
expected_name = fp.name
if i != 0:
expected_name = f"{fp.stem}({i}){fp.suffix}"
assert res["data"][i]["name"] == expected_name
def test_same_file_repeat(self, get_http_api_auth, tmp_path):
ids = create_datasets(get_http_api_auth, 1)
fp = create_txt_file(tmp_path / "ragflow_test.txt")
for i in range(10):
res = upload_documnets(get_http_api_auth, ids[0], [fp])
assert res["code"] == 0
assert len(res["data"]) == 1
assert res["data"][0]["dataset_id"] == ids[0]
expected_name = fp.name
if i != 0:
expected_name = f"{fp.stem}({i}){fp.suffix}"
assert res["data"][0]["name"] == expected_name
def test_filename_special_characters(self, get_http_api_auth, tmp_path):
ids = create_datasets(get_http_api_auth, 1)
illegal_chars = '<>:"/\\|?*'
translation_table = str.maketrans({char: "_" for char in illegal_chars})
safe_filename = string.punctuation.translate(translation_table)
fp = tmp_path / f"{safe_filename}.txt"
fp.write_text("Sample text content")
res = upload_documnets(get_http_api_auth, ids[0], [fp])
assert res["code"] == 0
assert len(res["data"]) == 1
assert res["data"][0]["dataset_id"] == ids[0]
assert res["data"][0]["name"] == fp.name
def test_multiple_files(self, get_http_api_auth, tmp_path):
ids = create_datasets(get_http_api_auth, 1)
expected_document_count = 20
fps = []
for i in range(expected_document_count):
fp = create_txt_file(tmp_path / f"ragflow_test_{i}.txt")
fps.append(fp)
res = upload_documnets(get_http_api_auth, ids[0], fps)
assert res["code"] == 0
res = list_dataset(get_http_api_auth, {"id": ids[0]})
assert res["data"][0]["document_count"] == expected_document_count
@pytest.mark.xfail
def test_concurrent_upload(self, get_http_api_auth, tmp_path):
ids = create_datasets(get_http_api_auth, 1)
expected_document_count = 20
fps = []
for i in range(expected_document_count):
fp = create_txt_file(tmp_path / f"ragflow_test_{i}.txt")
fps.append(fp)
with ThreadPoolExecutor(max_workers=5) as executor:
futures = [
executor.submit(
upload_documnets, get_http_api_auth, ids[0], fps[i : i + 1]
)
for i in range(expected_document_count)
]
responses = [f.result() for f in futures]
assert all(r["code"] == 0 for r in responses)
res = list_dataset(get_http_api_auth, {"id": ids[0]})
assert res["data"][0]["document_count"] == expected_document_count