mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
API: created list_doc (#1327)
### What problem does this PR solve? Adds the api of listing documentation. ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -26,12 +26,11 @@ class RAGFlow:
|
||||
'''
|
||||
api_url: http://<host_address>/api/v1
|
||||
dataset_url: http://<host_address>/api/v1/dataset
|
||||
document_url: http://<host_address>/api/v1/documents
|
||||
document_url: http://<host_address>/api/v1/dataset/{dataset_id}/documents
|
||||
'''
|
||||
self.user_key = user_key
|
||||
self.api_url = f"{base_url}/api/{version}"
|
||||
self.dataset_url = f"{self.api_url}/dataset"
|
||||
self.document_url = f"{self.api_url}/documents"
|
||||
self.authorization_header = {"Authorization": "{}".format(self.user_key)}
|
||||
|
||||
def create_dataset(self, dataset_name):
|
||||
@ -79,7 +78,7 @@ class RAGFlow:
|
||||
response = requests.put(endpoint, json=params, headers=self.authorization_header)
|
||||
return response.json()
|
||||
|
||||
# -------------------- content management -----------------------------------------------------
|
||||
# -------------------- content management -----------------------------------------------------
|
||||
|
||||
# ----------------------------upload local files-----------------------------------------------------
|
||||
def upload_local_file(self, dataset_id, file_paths):
|
||||
@ -95,7 +94,7 @@ class RAGFlow:
|
||||
else:
|
||||
return {'code': RetCode.DATA_ERROR, 'message': f"The file {file_path} does not exist"}
|
||||
|
||||
res = requests.request('POST', url=f"{self.document_url}/{dataset_id}", files=files,
|
||||
res = requests.request('POST', url=f"{self.dataset_url}/{dataset_id}/documents", files=files,
|
||||
headers=self.authorization_header)
|
||||
|
||||
result_dict = json.loads(res.text)
|
||||
@ -103,16 +102,27 @@ class RAGFlow:
|
||||
|
||||
# ----------------------------delete a file-----------------------------------------------------
|
||||
def delete_files(self, document_id, dataset_id):
|
||||
endpoint = f"{self.document_url}/{dataset_id}/{document_id}"
|
||||
endpoint = f"{self.dataset_url}/{dataset_id}/documents/{document_id}"
|
||||
res = requests.delete(endpoint, headers=self.authorization_header)
|
||||
return res.json()
|
||||
|
||||
# ----------------------------list files-----------------------------------------------------
|
||||
def list_files(self, dataset_id, offset=0, count=-1, order_by="create_time", descend=True, keywords=""):
|
||||
params = {
|
||||
"offset": offset,
|
||||
"count": count,
|
||||
"order_by": order_by,
|
||||
"descend": descend,
|
||||
"keywords": keywords
|
||||
}
|
||||
endpoint = f"{self.dataset_url}/{dataset_id}/documents/"
|
||||
res = requests.get(endpoint, params=params, headers=self.authorization_header)
|
||||
return res.json()
|
||||
|
||||
# ----------------------------download a file-----------------------------------------------------
|
||||
|
||||
# ----------------------------enable rename-----------------------------------------------------
|
||||
|
||||
# ----------------------------list files-----------------------------------------------------
|
||||
|
||||
# ----------------------------start parsing-----------------------------------------------------
|
||||
|
||||
# ----------------------------stop parsing-----------------------------------------------------
|
||||
|
||||
@ -37,7 +37,7 @@ class TestFile(TestSdk):
|
||||
dataset_id = created_res['data']['dataset_id']
|
||||
file_paths = ["test_data/test.txt", "test_data/test1.txt"]
|
||||
res = ragflow.upload_local_file(dataset_id, file_paths)
|
||||
assert res['code'] == RetCode.SUCCESS and res['data'] is True and res['message'] == 'success'
|
||||
assert res['code'] == RetCode.SUCCESS and res['message'] == 'success'
|
||||
|
||||
def test_upload_one_file(self):
|
||||
"""
|
||||
@ -48,7 +48,7 @@ class TestFile(TestSdk):
|
||||
dataset_id = created_res['data']['dataset_id']
|
||||
file_paths = ["test_data/test.txt"]
|
||||
res = ragflow.upload_local_file(dataset_id, file_paths)
|
||||
assert res['code'] == RetCode.SUCCESS and res['data'] is True and res['message'] == 'success'
|
||||
assert res['code'] == RetCode.SUCCESS and res['message'] == 'success'
|
||||
|
||||
def test_upload_nonexistent_files(self):
|
||||
"""
|
||||
@ -237,12 +237,143 @@ class TestFile(TestSdk):
|
||||
assert (deleted_res['code'] == RetCode.ARGUMENT_ERROR and deleted_res['message'] ==
|
||||
f'The document {doc_id} is not in the dataset: {other_dataset_id}, but in the dataset: {created_res_id}.')
|
||||
|
||||
# ----------------------------list files-----------------------------------------------------
|
||||
def test_list_documents_with_success(self):
|
||||
"""
|
||||
Test listing documents with a successful outcome.
|
||||
"""
|
||||
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
# upload a document
|
||||
created_res = ragflow.create_dataset("test_list_documents_with_success")
|
||||
created_res_id = created_res['data']['dataset_id']
|
||||
file_paths = ["test_data/test.txt"]
|
||||
ragflow.upload_local_file(created_res_id, file_paths)
|
||||
# Call the list_document method
|
||||
response = ragflow.list_files(created_res_id)
|
||||
assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 1
|
||||
|
||||
def test_list_documents_with_checking_size(self):
|
||||
"""
|
||||
Test listing documents and verify the size and names of the documents.
|
||||
"""
|
||||
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
# upload 10 documents
|
||||
created_res = ragflow.create_dataset("test_list_documents_with_checking_size")
|
||||
created_res_id = created_res['data']['dataset_id']
|
||||
file_paths = ["test_data/test.txt"] * 10
|
||||
ragflow.upload_local_file(created_res_id, file_paths)
|
||||
# Call the list_document method
|
||||
response = ragflow.list_files(created_res_id)
|
||||
assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 10
|
||||
|
||||
def test_list_documents_with_getting_empty_result(self):
|
||||
"""
|
||||
Test listing documents that should be empty.
|
||||
"""
|
||||
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
# upload 0 documents
|
||||
created_res = ragflow.create_dataset("test_list_documents_with_getting_empty_result")
|
||||
created_res_id = created_res['data']['dataset_id']
|
||||
# Call the list_document method
|
||||
response = ragflow.list_files(created_res_id)
|
||||
assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 0
|
||||
|
||||
def test_list_documents_with_creating_100_documents(self):
|
||||
"""
|
||||
Test listing 100 documents and verify the size of these documents.
|
||||
"""
|
||||
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
# upload 100 documents
|
||||
created_res = ragflow.create_dataset("test_list_documents_with_creating_100_documents")
|
||||
created_res_id = created_res['data']['dataset_id']
|
||||
file_paths = ["test_data/test.txt"] * 100
|
||||
ragflow.upload_local_file(created_res_id, file_paths)
|
||||
# Call the list_document method
|
||||
response = ragflow.list_files(created_res_id)
|
||||
assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 100
|
||||
|
||||
def test_list_document_with_failure(self):
|
||||
"""
|
||||
Test listing documents with IndexError.
|
||||
"""
|
||||
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
created_res = ragflow.create_dataset("test_list_document_with_failure")
|
||||
created_res_id = created_res['data']['dataset_id']
|
||||
response = ragflow.list_files(created_res_id, offset=-1, count=-1)
|
||||
assert "IndexError" in response['message'] and response['code'] == RetCode.EXCEPTION_ERROR
|
||||
|
||||
def test_list_document_with_verifying_offset_and_count(self):
|
||||
"""
|
||||
Test listing documents with verifying the functionalities of offset and count.
|
||||
"""
|
||||
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
created_res = ragflow.create_dataset("test_list_document_with_verifying_offset_and_count")
|
||||
created_res_id = created_res['data']['dataset_id']
|
||||
file_paths = ["test_data/test.txt", "test_data/empty.txt"] * 10
|
||||
ragflow.upload_local_file(created_res_id, file_paths)
|
||||
# Call the list_document method
|
||||
response = ragflow.list_files(created_res_id, offset=2, count=10)
|
||||
|
||||
assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 10
|
||||
|
||||
def test_list_document_with_verifying_keywords(self):
|
||||
"""
|
||||
Test listing documents with verifying the functionality of searching keywords.
|
||||
"""
|
||||
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
created_res = ragflow.create_dataset("test_list_document_with_verifying_keywords")
|
||||
created_res_id = created_res['data']['dataset_id']
|
||||
file_paths = ["test_data/test.txt", "test_data/empty.txt"]
|
||||
ragflow.upload_local_file(created_res_id, file_paths)
|
||||
# Call the list_document method
|
||||
response = ragflow.list_files(created_res_id, keywords="empty")
|
||||
|
||||
assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 1
|
||||
|
||||
def test_list_document_with_verifying_order_by_and_descend(self):
|
||||
"""
|
||||
Test listing documents with verifying the functionality of order_by and descend.
|
||||
"""
|
||||
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
created_res = ragflow.create_dataset("test_list_document_with_verifying_order_by_and_descend")
|
||||
created_res_id = created_res['data']['dataset_id']
|
||||
file_paths = ["test_data/test.txt", "test_data/empty.txt"]
|
||||
ragflow.upload_local_file(created_res_id, file_paths)
|
||||
# Call the list_document method
|
||||
response = ragflow.list_files(created_res_id)
|
||||
assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 2
|
||||
docs = response['data']['docs']
|
||||
# reverse
|
||||
i = 1
|
||||
for doc in docs:
|
||||
assert doc['name'] in file_paths[i]
|
||||
i -= 1
|
||||
|
||||
def test_list_document_with_verifying_order_by_and_ascend(self):
|
||||
"""
|
||||
Test listing documents with verifying the functionality of order_by and ascend.
|
||||
"""
|
||||
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
created_res = ragflow.create_dataset("test_list_document_with_verifying_order_by_and_ascend")
|
||||
created_res_id = created_res['data']['dataset_id']
|
||||
file_paths = ["test_data/test.txt", "test_data/test1.txt", "test_data/empty.txt"]
|
||||
ragflow.upload_local_file(created_res_id, file_paths)
|
||||
# Call the list_document method
|
||||
response = ragflow.list_files(created_res_id, descend=False)
|
||||
assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 3
|
||||
|
||||
docs = response['data']['docs']
|
||||
|
||||
i = 0
|
||||
for doc in docs:
|
||||
assert doc['name'] in file_paths[i]
|
||||
i += 1
|
||||
|
||||
# TODO: have to set the limitation of the number of documents
|
||||
# ----------------------------download a file-----------------------------------------------------
|
||||
|
||||
# ----------------------------enable rename-----------------------------------------------------
|
||||
|
||||
# ----------------------------list files-----------------------------------------------------
|
||||
|
||||
# ----------------------------start parsing-----------------------------------------------------
|
||||
|
||||
# ----------------------------stop parsing-----------------------------------------------------
|
||||
@ -257,8 +388,6 @@ class TestFile(TestSdk):
|
||||
|
||||
# ----------------------------insert a new chunk-----------------------------------------------------
|
||||
|
||||
# ----------------------------upload a file-----------------------------------------------------
|
||||
|
||||
# ----------------------------get a specific chunk-----------------------------------------------------
|
||||
|
||||
# ----------------------------retrieval test-----------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user