mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
TEST: Added test cases for Upload Documents HTTP API (#5991)
### What problem does this PR solve? cover upload docments endpoints ### Type of change - [x] add test cases
This commit is contained in:
@ -1,29 +0,0 @@
|
||||
<svg width="32" height="34" viewBox="0 0 32 34" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path fill-rule="evenodd" clip-rule="evenodd"
|
||||
d="M3.43265 20.7677C4.15835 21.5062 4.15834 22.7035 3.43262 23.4419L3.39546 23.4797C2.66974 24.2182 1.49312 24.2182 0.767417 23.4797C0.0417107 22.7412 0.0417219 21.544 0.767442 20.8055L0.804608 20.7677C1.53033 20.0292 2.70694 20.0293 3.43265 20.7677Z"
|
||||
fill="#B2DDFF" />
|
||||
<path fill-rule="evenodd" clip-rule="evenodd"
|
||||
d="M12.1689 21.3375C12.8933 22.0773 12.8912 23.2746 12.1641 24.0117L7.01662 29.2307C6.2896 29.9678 5.11299 29.9657 4.38859 29.2259C3.66419 28.4861 3.66632 27.2888 4.39334 26.5517L9.54085 21.3327C10.2679 20.5956 11.4445 20.5977 12.1689 21.3375Z"
|
||||
fill="#53B1FD" />
|
||||
<path fill-rule="evenodd" clip-rule="evenodd"
|
||||
d="M19.1551 30.3217C19.7244 29.4528 20.8781 29.218 21.7321 29.7973L21.8436 29.8729C22.6975 30.4522 22.9283 31.6262 22.359 32.4952C21.7897 33.3641 20.6359 33.5989 19.782 33.0196L19.6705 32.944C18.8165 32.3647 18.5858 31.1907 19.1551 30.3217Z"
|
||||
fill="#B2DDFF" />
|
||||
<path fill-rule="evenodd" clip-rule="evenodd"
|
||||
d="M31.4184 20.6544C32.1441 21.3929 32.1441 22.5902 31.4184 23.3286L28.8911 25.9003C28.1654 26.6388 26.9887 26.6388 26.263 25.9003C25.5373 25.1619 25.5373 23.9646 26.263 23.2261L28.7903 20.6544C29.516 19.916 30.6927 19.916 31.4184 20.6544Z"
|
||||
fill="#53B1FD" />
|
||||
<path fill-rule="evenodd" clip-rule="evenodd"
|
||||
d="M31.4557 11.1427C32.1814 11.8812 32.1814 13.0785 31.4557 13.8169L12.7797 32.8209C12.054 33.5594 10.8774 33.5594 10.1517 32.8209C9.42599 32.0825 9.42599 30.8852 10.1517 30.1467L28.8277 11.1427C29.5534 10.4043 30.73 10.4043 31.4557 11.1427Z"
|
||||
fill="#1570EF" />
|
||||
<path fill-rule="evenodd" clip-rule="evenodd"
|
||||
d="M27.925 5.29994C28.6508 6.0384 28.6508 7.23568 27.925 7.97414L17.184 18.9038C16.4583 19.6423 15.2817 19.6423 14.556 18.9038C13.8303 18.1653 13.8303 16.9681 14.556 16.2296L25.297 5.29994C26.0227 4.56148 27.1993 4.56148 27.925 5.29994Z"
|
||||
fill="#1570EF" />
|
||||
<path fill-rule="evenodd" clip-rule="evenodd"
|
||||
d="M22.256 1.59299C22.9822 2.33095 22.983 3.52823 22.2578 4.26718L8.45055 18.3358C7.72533 19.0748 6.54871 19.0756 5.82251 18.3376C5.09631 17.5996 5.09552 16.4024 5.82075 15.6634L19.6279 1.59478C20.3532 0.855827 21.5298 0.855022 22.256 1.59299Z"
|
||||
fill="#1570EF" />
|
||||
<path fill-rule="evenodd" clip-rule="evenodd"
|
||||
d="M8.58225 6.09619C9.30671 6.83592 9.30469 8.0332 8.57772 8.77038L3.17006 14.2541C2.4431 14.9913 1.26649 14.9893 0.542025 14.2495C-0.182438 13.5098 -0.180413 12.3125 0.546548 11.5753L5.95421 6.09159C6.68117 5.3544 7.85778 5.35646 8.58225 6.09619Z"
|
||||
fill="#53B1FD" />
|
||||
<path fill-rule="evenodd" clip-rule="evenodd"
|
||||
d="M11.893 0.624023C12.9193 0.624023 13.7513 1.47063 13.7513 2.51497V2.70406C13.7513 3.7484 12.9193 4.59501 11.893 4.59501C10.8667 4.59501 10.0347 3.7484 10.0347 2.70406V2.51497C10.0347 1.47063 10.8667 0.624023 11.893 0.624023Z"
|
||||
fill="#B2DDFF" />
|
||||
</svg>
|
||||
|
Before Width: | Height: | Size: 3.0 KiB |
@ -13,12 +13,3 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import pytest
|
||||
from common import delete_dataset
|
||||
|
||||
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
def clear_datasets(get_http_api_auth):
|
||||
yield
|
||||
delete_dataset(get_http_api_auth)
|
||||
25
sdk/python/test/libs/utils/__init__.py
Normal file
25
sdk/python/test/libs/utils/__init__.py
Normal file
@ -0,0 +1,25 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import base64
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def encode_avatar(image_path):
|
||||
with Path.open(image_path, "rb") as file:
|
||||
binary_data = file.read()
|
||||
base64_encoded = base64.b64encode(binary_data).decode("utf-8")
|
||||
return base64_encoded
|
||||
107
sdk/python/test/libs/utils/file_utils.py
Normal file
107
sdk/python/test/libs/utils/file_utils.py
Normal file
@ -0,0 +1,107 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import json
|
||||
|
||||
from docx import Document # pip install python-docx
|
||||
from openpyxl import Workbook # pip install openpyxl
|
||||
from PIL import Image, ImageDraw # pip install Pillow
|
||||
from pptx import Presentation # pip install python-pptx
|
||||
from reportlab.pdfgen import canvas # pip install reportlab
|
||||
|
||||
|
||||
def create_docx_file(path):
|
||||
doc = Document()
|
||||
doc.add_paragraph("这是一个测试 DOCX 文件。")
|
||||
doc.save(path)
|
||||
return path
|
||||
|
||||
|
||||
def create_excel_file(path):
|
||||
wb = Workbook()
|
||||
ws = wb.active
|
||||
ws["A1"] = "测试 Excel 文件"
|
||||
wb.save(path)
|
||||
return path
|
||||
|
||||
|
||||
def create_ppt_file(path):
|
||||
prs = Presentation()
|
||||
slide = prs.slides.add_slide(prs.slide_layouts[0])
|
||||
slide.shapes.title.text = "测试 PPT 文件"
|
||||
prs.save(path)
|
||||
return path
|
||||
|
||||
|
||||
def create_image_file(path):
|
||||
img = Image.new("RGB", (100, 100), color="blue")
|
||||
draw = ImageDraw.Draw(img)
|
||||
draw.text((10, 40), "Test", fill="white")
|
||||
img.save(path)
|
||||
return path
|
||||
|
||||
|
||||
def create_pdf_file(path):
|
||||
if not isinstance(path, str):
|
||||
path = str(path)
|
||||
c = canvas.Canvas(path)
|
||||
c.drawString(100, 750, "测试 PDF 文件")
|
||||
c.save()
|
||||
return path
|
||||
|
||||
|
||||
def create_txt_file(path):
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
f.write("这是测试 TXT 文件的内容。")
|
||||
return path
|
||||
|
||||
|
||||
def create_md_file(path):
|
||||
md_content = "# 测试 MD 文件\n\n这是一份 Markdown 格式的测试文件。"
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
f.write(md_content)
|
||||
return path
|
||||
|
||||
|
||||
def create_json_file(path):
|
||||
data = {"message": "这是测试 JSON 文件", "value": 123}
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
return path
|
||||
|
||||
|
||||
def create_eml_file(path):
|
||||
eml_content = (
|
||||
"From: sender@example.com\n"
|
||||
"To: receiver@example.com\n"
|
||||
"Subject: 测试 EML 文件\n\n"
|
||||
"这是一封测试邮件的内容。\n"
|
||||
)
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
f.write(eml_content)
|
||||
return path
|
||||
|
||||
|
||||
def create_html_file(path):
|
||||
html_content = (
|
||||
"<html>\n"
|
||||
"<head><title>测试 HTML 文件</title></head>\n"
|
||||
"<body><h1>这是一个测试 HTML 文件</h1></body>\n"
|
||||
"</html>"
|
||||
)
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
f.write(html_content)
|
||||
return path
|
||||
101
sdk/python/test/test_http_api/common.py
Normal file
101
sdk/python/test/test_http_api/common.py
Normal file
@ -0,0 +1,101 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
from requests_toolbelt import MultipartEncoder
|
||||
|
||||
HEADERS = {"Content-Type": "application/json"}
|
||||
HOST_ADDRESS = os.getenv("HOST_ADDRESS", "http://127.0.0.1:9380")
|
||||
DATASETS_API_URL = "/api/v1/datasets"
|
||||
FILE_API_URL = "/api/v1/datasets/{dataset_id}/documents"
|
||||
|
||||
INVALID_API_TOKEN = "invalid_key_123"
|
||||
DATASET_NAME_LIMIT = 128
|
||||
DOCUMENT_NAME_LIMIT = 128
|
||||
|
||||
|
||||
# DATASET MANAGEMENT
|
||||
def create_dataset(auth, payload):
|
||||
res = requests.post(
|
||||
url=f"{HOST_ADDRESS}{DATASETS_API_URL}",
|
||||
headers=HEADERS,
|
||||
auth=auth,
|
||||
json=payload,
|
||||
)
|
||||
return res.json()
|
||||
|
||||
|
||||
def list_dataset(auth, params=None):
|
||||
res = requests.get(
|
||||
url=f"{HOST_ADDRESS}{DATASETS_API_URL}",
|
||||
headers=HEADERS,
|
||||
auth=auth,
|
||||
params=params,
|
||||
)
|
||||
return res.json()
|
||||
|
||||
|
||||
def update_dataset(auth, dataset_id, payload):
|
||||
res = requests.put(
|
||||
url=f"{HOST_ADDRESS}{DATASETS_API_URL}/{dataset_id}",
|
||||
headers=HEADERS,
|
||||
auth=auth,
|
||||
json=payload,
|
||||
)
|
||||
return res.json()
|
||||
|
||||
|
||||
def delete_dataset(auth, payload=None):
|
||||
res = requests.delete(
|
||||
url=f"{HOST_ADDRESS}{DATASETS_API_URL}",
|
||||
headers=HEADERS,
|
||||
auth=auth,
|
||||
json=payload,
|
||||
)
|
||||
return res.json()
|
||||
|
||||
|
||||
def create_datasets(auth, num):
|
||||
ids = []
|
||||
for i in range(num):
|
||||
res = create_dataset(auth, {"name": f"dataset_{i}"})
|
||||
ids.append(res["data"]["id"])
|
||||
return ids
|
||||
|
||||
|
||||
# FILE MANAGEMENT WITHIN DATASET
|
||||
def upload_documnets(auth, dataset_id, files_path=None):
|
||||
url = f"{HOST_ADDRESS}{FILE_API_URL}".format(dataset_id=dataset_id)
|
||||
|
||||
if files_path is None:
|
||||
files_path = []
|
||||
|
||||
fields = []
|
||||
for i, fp in enumerate(files_path):
|
||||
p = Path(fp)
|
||||
fields.append(("file", (p.name, p.open("rb"))))
|
||||
m = MultipartEncoder(fields=fields)
|
||||
|
||||
res = requests.post(
|
||||
url=url,
|
||||
headers={"Content-Type": m.content_type},
|
||||
auth=auth,
|
||||
data=m,
|
||||
)
|
||||
return res.json()
|
||||
73
sdk/python/test/test_http_api/conftest.py
Normal file
73
sdk/python/test/test_http_api/conftest.py
Normal file
@ -0,0 +1,73 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
import pytest
|
||||
from common import delete_dataset
|
||||
from libs.utils.file_utils import (
|
||||
create_docx_file,
|
||||
create_eml_file,
|
||||
create_excel_file,
|
||||
create_html_file,
|
||||
create_image_file,
|
||||
create_json_file,
|
||||
create_md_file,
|
||||
create_pdf_file,
|
||||
create_ppt_file,
|
||||
create_txt_file,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
def clear_datasets(get_http_api_auth):
|
||||
yield
|
||||
delete_dataset(get_http_api_auth)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def generate_test_files(tmp_path):
|
||||
files = {}
|
||||
files["docx"] = tmp_path / "ragflow_test.docx"
|
||||
create_docx_file(files["docx"])
|
||||
|
||||
files["excel"] = tmp_path / "ragflow_test.xlsx"
|
||||
create_excel_file(files["excel"])
|
||||
|
||||
files["ppt"] = tmp_path / "ragflow_test.pptx"
|
||||
create_ppt_file(files["ppt"])
|
||||
|
||||
files["image"] = tmp_path / "ragflow_test.png"
|
||||
create_image_file(files["image"])
|
||||
|
||||
files["pdf"] = tmp_path / "ragflow_test.pdf"
|
||||
create_pdf_file(files["pdf"])
|
||||
|
||||
files["txt"] = tmp_path / "ragflow_test.txt"
|
||||
create_txt_file(files["txt"])
|
||||
|
||||
files["md"] = tmp_path / "ragflow_test.md"
|
||||
create_md_file(files["md"])
|
||||
|
||||
files["json"] = tmp_path / "ragflow_test.json"
|
||||
create_json_file(files["json"])
|
||||
|
||||
files["eml"] = tmp_path / "ragflow_test.eml"
|
||||
create_eml_file(files["eml"])
|
||||
|
||||
files["html"] = tmp_path / "ragflow_test.html"
|
||||
create_html_file(files["html"])
|
||||
|
||||
return files
|
||||
@ -1,57 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import os
|
||||
|
||||
import requests
|
||||
|
||||
HOST_ADDRESS = os.getenv("HOST_ADDRESS", "http://127.0.0.1:9380")
|
||||
API_URL = f"{HOST_ADDRESS}/api/v1/datasets"
|
||||
HEADERS = {"Content-Type": "application/json"}
|
||||
|
||||
|
||||
INVALID_API_TOKEN = "invalid_key_123"
|
||||
DATASET_NAME_LIMIT = 128
|
||||
|
||||
|
||||
def create_dataset(auth, payload):
|
||||
res = requests.post(url=API_URL, headers=HEADERS, auth=auth, json=payload)
|
||||
return res.json()
|
||||
|
||||
|
||||
def list_dataset(auth, params=None):
|
||||
res = requests.get(url=API_URL, headers=HEADERS, auth=auth, params=params)
|
||||
return res.json()
|
||||
|
||||
|
||||
def update_dataset(auth, dataset_id, payload):
|
||||
res = requests.put(
|
||||
url=f"{API_URL}/{dataset_id}", headers=HEADERS, auth=auth, json=payload
|
||||
)
|
||||
return res.json()
|
||||
|
||||
|
||||
def delete_dataset(auth, payload=None):
|
||||
res = requests.delete(url=API_URL, headers=HEADERS, auth=auth, json=payload)
|
||||
return res.json()
|
||||
|
||||
|
||||
def create_datasets(auth, num):
|
||||
ids = []
|
||||
for i in range(num):
|
||||
res = create_dataset(auth, {"name": f"dataset_{i}"})
|
||||
ids.append(res["data"]["id"])
|
||||
return ids
|
||||
@ -13,12 +13,12 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import base64
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from common import DATASET_NAME_LIMIT, INVALID_API_TOKEN, create_dataset
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
from libs.utils import encode_avatar
|
||||
from libs.utils.file_utils import create_image_file
|
||||
|
||||
|
||||
class TestAuthorization:
|
||||
@ -75,18 +75,11 @@ class TestDatasetCreation:
|
||||
|
||||
|
||||
class TestAdvancedConfigurations:
|
||||
def test_avatar(self, get_http_api_auth, request):
|
||||
def encode_avatar(image_path):
|
||||
with Path.open(image_path, "rb") as file:
|
||||
binary_data = file.read()
|
||||
base64_encoded = base64.b64encode(binary_data).decode("utf-8")
|
||||
return base64_encoded
|
||||
|
||||
def test_avatar(self, get_http_api_auth, tmp_path):
|
||||
fn = create_image_file(tmp_path / "ragflow_test.png")
|
||||
payload = {
|
||||
"name": "avatar_test",
|
||||
"avatar": encode_avatar(
|
||||
Path(request.config.rootdir) / "test/data/logo.svg"
|
||||
),
|
||||
"avatar": encode_avatar(fn),
|
||||
}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 0
|
||||
|
||||
@ -13,9 +13,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import base64
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from common import (
|
||||
@ -26,6 +24,8 @@ from common import (
|
||||
update_dataset,
|
||||
)
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
from libs.utils import encode_avatar
|
||||
from libs.utils.file_utils import create_image_file
|
||||
|
||||
# TODO: Missing scenario for updating embedding_model with chunk_count != 0
|
||||
|
||||
@ -171,19 +171,10 @@ class TestDatasetUpdate:
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
def test_avatar(self, get_http_api_auth, request):
|
||||
def encode_avatar(image_path):
|
||||
with Path.open(image_path, "rb") as file:
|
||||
binary_data = file.read()
|
||||
base64_encoded = base64.b64encode(binary_data).decode("utf-8")
|
||||
return base64_encoded
|
||||
|
||||
def test_avatar(self, get_http_api_auth, tmp_path):
|
||||
ids = create_datasets(get_http_api_auth, 1)
|
||||
payload = {
|
||||
"avatar": encode_avatar(
|
||||
Path(request.config.rootdir) / "test/data/logo.svg"
|
||||
),
|
||||
}
|
||||
fn = create_image_file(tmp_path / "ragflow_test.png")
|
||||
payload = {"avatar": encode_avatar(fn)}
|
||||
res = update_dataset(get_http_api_auth, ids[0], payload)
|
||||
assert res["code"] == 0
|
||||
|
||||
|
||||
@ -0,0 +1,230 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import string
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
from common import (
|
||||
DOCUMENT_NAME_LIMIT,
|
||||
FILE_API_URL,
|
||||
HOST_ADDRESS,
|
||||
INVALID_API_TOKEN,
|
||||
create_datasets,
|
||||
list_dataset,
|
||||
upload_documnets,
|
||||
)
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
from libs.utils.file_utils import create_txt_file
|
||||
from requests_toolbelt import MultipartEncoder
|
||||
|
||||
|
||||
class TestAuthorization:
|
||||
@pytest.mark.parametrize(
|
||||
"auth, expected_code, expected_message",
|
||||
[
|
||||
(None, 0, "`Authorization` can't be empty"),
|
||||
(
|
||||
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
|
||||
109,
|
||||
"Authentication error: API key is invalid!",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_auth(
|
||||
self, get_http_api_auth, auth, expected_code, expected_message
|
||||
):
|
||||
ids = create_datasets(get_http_api_auth, 1)
|
||||
res = upload_documnets(auth, ids[0])
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
|
||||
class TestUploadDocuments:
|
||||
def test_valid_single_upload(self, get_http_api_auth, tmp_path):
|
||||
ids = create_datasets(get_http_api_auth, 1)
|
||||
fp = create_txt_file(tmp_path / "ragflow_test.txt")
|
||||
res = upload_documnets(get_http_api_auth, ids[0], [fp])
|
||||
assert res["code"] == 0
|
||||
assert res["data"][0]["dataset_id"] == ids[0]
|
||||
assert res["data"][0]["name"] == fp.name
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"file_type",
|
||||
[
|
||||
"docx",
|
||||
"excel",
|
||||
"ppt",
|
||||
"image",
|
||||
"pdf",
|
||||
"txt",
|
||||
"md",
|
||||
"json",
|
||||
"eml",
|
||||
"html",
|
||||
],
|
||||
)
|
||||
def test_file_type_validation(
|
||||
self, get_http_api_auth, generate_test_files, file_type
|
||||
):
|
||||
ids = create_datasets(get_http_api_auth, 1)
|
||||
fp = generate_test_files[file_type]
|
||||
res = upload_documnets(get_http_api_auth, ids[0], [fp])
|
||||
assert res["code"] == 0
|
||||
assert res["data"][0]["dataset_id"] == ids[0]
|
||||
assert res["data"][0]["name"] == fp.name
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"file_type",
|
||||
["exe", "unknown"],
|
||||
)
|
||||
def test_unsupported_file_type(self, get_http_api_auth, tmp_path, file_type):
|
||||
ids = create_datasets(get_http_api_auth, 1)
|
||||
fp = tmp_path / f"ragflow_test.{file_type}"
|
||||
fp.touch()
|
||||
res = upload_documnets(get_http_api_auth, ids[0], [fp])
|
||||
assert res["code"] == 500
|
||||
assert (
|
||||
res["message"]
|
||||
== f"ragflow_test.{file_type}: This type of file has not been supported yet!"
|
||||
)
|
||||
|
||||
def test_missing_file(self, get_http_api_auth):
|
||||
ids = create_datasets(get_http_api_auth, 1)
|
||||
res = upload_documnets(get_http_api_auth, ids[0])
|
||||
assert res["code"] == 101
|
||||
assert res["message"] == "No file part!"
|
||||
|
||||
def test_empty_file(self, get_http_api_auth, tmp_path):
|
||||
ids = create_datasets(get_http_api_auth, 1)
|
||||
fp = tmp_path / "empty.txt"
|
||||
fp.touch()
|
||||
|
||||
res = upload_documnets(get_http_api_auth, ids[0], [fp])
|
||||
assert res["code"] == 0
|
||||
assert res["data"][0]["size"] == 0
|
||||
|
||||
def test_filename_empty(self, get_http_api_auth, tmp_path):
|
||||
ids = create_datasets(get_http_api_auth, 1)
|
||||
fp = create_txt_file(tmp_path / "ragflow_test.txt")
|
||||
url = f"{HOST_ADDRESS}{FILE_API_URL}".format(dataset_id=ids[0])
|
||||
fields = (("file", ("", fp.open("rb"))),)
|
||||
m = MultipartEncoder(fields=fields)
|
||||
res = requests.post(
|
||||
url=url,
|
||||
headers={"Content-Type": m.content_type},
|
||||
auth=get_http_api_auth,
|
||||
data=m,
|
||||
)
|
||||
assert res.json()["code"] == 101
|
||||
assert res.json()["message"] == "No file selected!"
|
||||
|
||||
def test_filename_exceeds_max_length(self, get_http_api_auth, tmp_path):
|
||||
ids = create_datasets(get_http_api_auth, 1)
|
||||
# filename_length = 129
|
||||
fp = create_txt_file(tmp_path / f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt")
|
||||
res = upload_documnets(get_http_api_auth, ids[0], [fp])
|
||||
assert res["code"] == 500
|
||||
assert (
|
||||
res["message"]
|
||||
== f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt: Exceed the maximum length of file name!"
|
||||
)
|
||||
|
||||
def test_invalid_dataset_id(self, get_http_api_auth, tmp_path):
|
||||
fp = create_txt_file(tmp_path / "ragflow_test.txt")
|
||||
res = upload_documnets(get_http_api_auth, "invalid_dataset_id", [fp])
|
||||
assert res["code"] == 100
|
||||
assert (
|
||||
res["message"]
|
||||
== """LookupError("Can\'t find the dataset with ID invalid_dataset_id!")"""
|
||||
)
|
||||
|
||||
def test_duplicate_files(self, get_http_api_auth, tmp_path):
|
||||
ids = create_datasets(get_http_api_auth, 1)
|
||||
fp = create_txt_file(tmp_path / "ragflow_test.txt")
|
||||
res = upload_documnets(get_http_api_auth, ids[0], [fp, fp])
|
||||
assert res["code"] == 0
|
||||
assert len(res["data"]) == 2
|
||||
for i in range(len(res["data"])):
|
||||
assert res["data"][i]["dataset_id"] == ids[0]
|
||||
expected_name = fp.name
|
||||
if i != 0:
|
||||
expected_name = f"{fp.stem}({i}){fp.suffix}"
|
||||
assert res["data"][i]["name"] == expected_name
|
||||
|
||||
def test_same_file_repeat(self, get_http_api_auth, tmp_path):
|
||||
ids = create_datasets(get_http_api_auth, 1)
|
||||
fp = create_txt_file(tmp_path / "ragflow_test.txt")
|
||||
for i in range(10):
|
||||
res = upload_documnets(get_http_api_auth, ids[0], [fp])
|
||||
assert res["code"] == 0
|
||||
assert len(res["data"]) == 1
|
||||
assert res["data"][0]["dataset_id"] == ids[0]
|
||||
expected_name = fp.name
|
||||
if i != 0:
|
||||
expected_name = f"{fp.stem}({i}){fp.suffix}"
|
||||
assert res["data"][0]["name"] == expected_name
|
||||
|
||||
def test_filename_special_characters(self, get_http_api_auth, tmp_path):
|
||||
ids = create_datasets(get_http_api_auth, 1)
|
||||
illegal_chars = '<>:"/\\|?*'
|
||||
translation_table = str.maketrans({char: "_" for char in illegal_chars})
|
||||
safe_filename = string.punctuation.translate(translation_table)
|
||||
fp = tmp_path / f"{safe_filename}.txt"
|
||||
fp.write_text("Sample text content")
|
||||
|
||||
res = upload_documnets(get_http_api_auth, ids[0], [fp])
|
||||
assert res["code"] == 0
|
||||
assert len(res["data"]) == 1
|
||||
assert res["data"][0]["dataset_id"] == ids[0]
|
||||
assert res["data"][0]["name"] == fp.name
|
||||
|
||||
def test_multiple_files(self, get_http_api_auth, tmp_path):
|
||||
ids = create_datasets(get_http_api_auth, 1)
|
||||
expected_document_count = 20
|
||||
fps = []
|
||||
for i in range(expected_document_count):
|
||||
fp = create_txt_file(tmp_path / f"ragflow_test_{i}.txt")
|
||||
fps.append(fp)
|
||||
res = upload_documnets(get_http_api_auth, ids[0], fps)
|
||||
assert res["code"] == 0
|
||||
|
||||
res = list_dataset(get_http_api_auth, {"id": ids[0]})
|
||||
assert res["data"][0]["document_count"] == expected_document_count
|
||||
|
||||
@pytest.mark.xfail
|
||||
def test_concurrent_upload(self, get_http_api_auth, tmp_path):
|
||||
ids = create_datasets(get_http_api_auth, 1)
|
||||
|
||||
expected_document_count = 20
|
||||
fps = []
|
||||
for i in range(expected_document_count):
|
||||
fp = create_txt_file(tmp_path / f"ragflow_test_{i}.txt")
|
||||
fps.append(fp)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [
|
||||
executor.submit(
|
||||
upload_documnets, get_http_api_auth, ids[0], fps[i : i + 1]
|
||||
)
|
||||
for i in range(expected_document_count)
|
||||
]
|
||||
responses = [f.result() for f in futures]
|
||||
assert all(r["code"] == 0 for r in responses)
|
||||
|
||||
res = list_dataset(get_http_api_auth, {"id": ids[0]})
|
||||
assert res["data"][0]["document_count"] == expected_document_count
|
||||
Reference in New Issue
Block a user