mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Add test for document (#3548)
### What problem does this PR solve? Add test for document ### Type of change - [x] New Feature (non-breaking change which adds functionality) Co-authored-by: liuhua <10215101452@stu.ecun.edu.cn>
This commit is contained in:
@ -1,6 +1,6 @@
|
||||
from ragflow_sdk import RAGFlow, DataSet, Document, Chunk
|
||||
from ragflow_sdk import RAGFlow
|
||||
from common import HOST_ADDRESS
|
||||
|
||||
import pytest
|
||||
|
||||
def test_upload_document_with_success(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
@ -48,7 +48,6 @@ def test_list_documents_in_dataset_with_success(get_api_key_fixture):
|
||||
ds.list_documents(keywords="test", page=1, page_size=12)
|
||||
|
||||
|
||||
|
||||
def test_delete_documents_in_dataset_with_success(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
@ -59,4 +58,109 @@ def test_delete_documents_in_dataset_with_success(get_api_key_fixture):
|
||||
docs = ds.upload_documents(document_infos)
|
||||
ds.delete_documents([docs[0].id])
|
||||
|
||||
# upload and parse the document with different in different parse method.
|
||||
def test_upload_and_parse_pdf_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_pdf_document")
|
||||
with open("test_data/test.pdf","rb") as file:
|
||||
blob=file.read()
|
||||
document_infos = [{"displayed_name": "test.pdf","blob": blob}]
|
||||
docs=ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
||||
|
||||
def test_upload_and_parse_docx_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_docx_document")
|
||||
with open("test_data/test.docx","rb") as file:
|
||||
blob=file.read()
|
||||
document_infos = [{"displayed_name": "test.docx","blob": blob}]
|
||||
docs=ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
||||
def test_upload_and_parse_excel_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_excel_document")
|
||||
with open("test_data/test.xlsx","rb") as file:
|
||||
blob=file.read()
|
||||
document_infos = [{"displayed_name": "test.xlsx","blob": blob}]
|
||||
docs=ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
||||
def test_upload_and_parse_ppt_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_ppt_document")
|
||||
with open("test_data/test.ppt","rb") as file:
|
||||
blob=file.read()
|
||||
document_infos = [{"displayed_name": "test.ppt","blob": blob}]
|
||||
docs=ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
||||
def test_upload_and_parse_image_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_image_document")
|
||||
with open("test_data/test.jpg","rb") as file:
|
||||
blob=file.read()
|
||||
document_infos = [{"displayed_name": "test.jpg","blob": blob}]
|
||||
docs=ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
||||
def test_upload_and_parse_txt_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_txt_document")
|
||||
with open("test_data/test.txt","rb") as file:
|
||||
blob=file.read()
|
||||
document_infos = [{"displayed_name": "test.txt","blob": blob}]
|
||||
docs=ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
||||
def test_upload_and_parse_md_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_md_document")
|
||||
with open("test_data/test.md","rb") as file:
|
||||
blob=file.read()
|
||||
document_infos = [{"displayed_name": "test.md","blob": blob}]
|
||||
docs=ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
||||
|
||||
def test_upload_and_parse_json_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_json_document")
|
||||
with open("test_data/test.json","rb") as file:
|
||||
blob=file.read()
|
||||
document_infos = [{"displayed_name": "test.json","blob": blob}]
|
||||
docs=ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
||||
|
||||
@pytest.mark.skip(reason="")
|
||||
def test_upload_and_parse_eml_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_eml_document")
|
||||
with open("test_data/test.eml","rb") as file:
|
||||
blob=file.read()
|
||||
document_infos = [{"displayed_name": "test.eml","blob": blob}]
|
||||
docs=ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
||||
|
||||
def test_upload_and_parse_html_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_html_document")
|
||||
with open("test_data/test.html","rb") as file:
|
||||
blob=file.read()
|
||||
document_infos = [{"displayed_name": "test.html","blob": blob}]
|
||||
docs=ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
||||
Reference in New Issue
Block a user