Update comments (#4569)

### What problem does this PR solve?

Add license statement.

### Type of change

- [x] Refactoring

Signed-off-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
Jin Hai
2025-01-21 20:52:28 +08:00
committed by GitHub
parent 583050a876
commit 3894de895b
86 changed files with 1034 additions and 145 deletions

View File

@ -1,13 +1,30 @@
#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from ragflow_sdk import RAGFlow
from common import HOST_ADDRESS
from time import sleep
def test_parse_document_with_txt(get_api_key_fixture):
API_KEY = get_api_key_fixture
rag = RAGFlow(API_KEY, HOST_ADDRESS)
ds = rag.create_dataset(name="test_parse_document")
name = 'ragflow_test.txt'
with open("test_data/ragflow_test.txt", "rb") as file :
with open("test_data/ragflow_test.txt", "rb") as file:
blob = file.read()
docs = ds.upload_documents([{"display_name": name, "blob": blob}])
doc = docs[0]
@ -21,14 +38,15 @@ def test_parse_document_with_txt(get_api_key_fixture):
raise Exception("Run time ERROR: Document parsing did not complete in time.")
'''
def test_parse_and_cancel_document(get_api_key_fixture):
API_KEY = get_api_key_fixture
rag = RAGFlow(API_KEY, HOST_ADDRESS)
ds = rag.create_dataset(name="test_parse_and_cancel_document")
name = 'ragflow_test.txt'
with open("test_data/ragflow_test.txt", "rb") as file :
with open("test_data/ragflow_test.txt", "rb") as file:
blob = file.read()
docs=ds.upload_documents([{"display_name": name, "blob": blob}])
docs = ds.upload_documents([{"display_name": name, "blob": blob}])
doc = docs[0]
ds.async_parse_documents(document_ids=[doc.id])
sleep(1)
@ -60,6 +78,7 @@ def test_bulk_parse_documents(get_api_key_fixture):
raise Exception("Run time ERROR: Bulk document parsing did not complete in time.")
'''
def test_list_chunks_with_success(get_api_key_fixture):
API_KEY = get_api_key_fixture
rag = RAGFlow(API_KEY, HOST_ADDRESS)
@ -73,7 +92,7 @@ def test_list_chunks_with_success(get_api_key_fixture):
{'display_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
]
'''
documents =[{"display_name":"test_list_chunks_with_success.txt","blob":blob}]
documents = [{"display_name": "test_list_chunks_with_success.txt", "blob": blob}]
docs = ds.upload_documents(documents)
ids = [doc.id for doc in docs]
ds.async_parse_documents(ids)
@ -103,7 +122,7 @@ def test_add_chunk_with_success(get_api_key_fixture):
{'display_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
]
'''
documents =[{"display_name":"test_list_chunks_with_success.txt","blob":blob}]
documents = [{"display_name": "test_list_chunks_with_success.txt", "blob": blob}]
docs = ds.upload_documents(documents)
doc = docs[0]
doc.add_chunk(content="This is a chunk addition test")
@ -122,7 +141,7 @@ def test_delete_chunk_with_success(get_api_key_fixture):
{'display_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
]
'''
documents =[{"display_name":"test_delete_chunk_with_success.txt","blob":blob}]
documents = [{"display_name": "test_delete_chunk_with_success.txt", "blob": blob}]
docs = ds.upload_documents(documents)
doc = docs[0]
chunk = doc.add_chunk(content="This is a chunk addition test")
@ -143,13 +162,14 @@ def test_update_chunk_content(get_api_key_fixture):
{'display_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
]
'''
documents =[{"display_name":"test_update_chunk_content_with_success.txt","blob":blob}]
documents = [{"display_name": "test_update_chunk_content_with_success.txt", "blob": blob}]
docs = ds.upload_documents(documents)
doc = docs[0]
chunk = doc.add_chunk(content="This is a chunk addition test")
# For Elasticsearch, the chunk is not searchable in shot time (~2s).
sleep(3)
chunk.update({"content":"This is a updated content"})
chunk.update({"content": "This is a updated content"})
def test_update_chunk_available(get_api_key_fixture):
API_KEY = get_api_key_fixture
@ -164,13 +184,13 @@ def test_update_chunk_available(get_api_key_fixture):
{'display_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
]
'''
documents =[{"display_name":"test_update_chunk_available_with_success.txt","blob":blob}]
documents = [{"display_name": "test_update_chunk_available_with_success.txt", "blob": blob}]
docs = ds.upload_documents(documents)
doc = docs[0]
chunk = doc.add_chunk(content="This is a chunk addition test")
# For Elasticsearch, the chunk is not searchable in shot time (~2s).
sleep(3)
chunk.update({"available":0})
chunk.update({"available": 0})
def test_retrieve_chunks(get_api_key_fixture):
@ -186,11 +206,11 @@ def test_retrieve_chunks(get_api_key_fixture):
{'display_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
]
'''
documents =[{"display_name":"test_retrieve_chunks.txt","blob":blob}]
documents = [{"display_name": "test_retrieve_chunks.txt", "blob": blob}]
docs = ds.upload_documents(documents)
doc = docs[0]
doc.add_chunk(content="This is a chunk addition test")
rag.retrieve(dataset_ids=[ds.id],document_ids=[doc.id])
rag.retrieve(dataset_ids=[ds.id], document_ids=[doc.id])
rag.delete_datasets(ids=[ds.id])
# test different parameters for the retrieval