Update comments (#4569)

### What problem does this PR solve? Add license statement. ### Type of change - [x] Refactoring Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2026-01-23 11:36:38 +08:00 · 2025-01-21 20:52:28 +08:00
parent 583050a876
commit 3894de895b
86 changed files with 1034 additions and 145 deletions
--- a/sdk/python/test/test_sdk_api/t_chunk.py
+++ b/sdk/python/test/test_sdk_api/t_chunk.py
@ -1,13 +1,30 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
 from ragflow_sdk import RAGFlow
 from common import HOST_ADDRESS
 from time import sleep

+
 def test_parse_document_with_txt(get_api_key_fixture):
    API_KEY = get_api_key_fixture
    rag = RAGFlow(API_KEY, HOST_ADDRESS)
    ds = rag.create_dataset(name="test_parse_document")
    name = 'ragflow_test.txt'
-    with open("test_data/ragflow_test.txt", "rb") as file :
+    with open("test_data/ragflow_test.txt", "rb") as file:
        blob = file.read()
    docs = ds.upload_documents([{"display_name": name, "blob": blob}])
    doc = docs[0]
@ -21,14 +38,15 @@ def test_parse_document_with_txt(get_api_key_fixture):
        raise Exception("Run time ERROR: Document parsing did not complete in time.")
    '''

+
 def test_parse_and_cancel_document(get_api_key_fixture):
    API_KEY = get_api_key_fixture
    rag = RAGFlow(API_KEY, HOST_ADDRESS)
    ds = rag.create_dataset(name="test_parse_and_cancel_document")
    name = 'ragflow_test.txt'
-    with open("test_data/ragflow_test.txt", "rb") as file :
+    with open("test_data/ragflow_test.txt", "rb") as file:
        blob = file.read()
-    docs=ds.upload_documents([{"display_name": name, "blob": blob}])
+    docs = ds.upload_documents([{"display_name": name, "blob": blob}])
    doc = docs[0]
    ds.async_parse_documents(document_ids=[doc.id])
    sleep(1)
@ -60,6 +78,7 @@ def test_bulk_parse_documents(get_api_key_fixture):
        raise Exception("Run time ERROR: Bulk document parsing did not complete in time.")
    '''

+
 def test_list_chunks_with_success(get_api_key_fixture):
    API_KEY = get_api_key_fixture
    rag = RAGFlow(API_KEY, HOST_ADDRESS)
@ -73,7 +92,7 @@ def test_list_chunks_with_success(get_api_key_fixture):
        {'display_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
    ]
    '''
-    documents =[{"display_name":"test_list_chunks_with_success.txt","blob":blob}]
+    documents = [{"display_name": "test_list_chunks_with_success.txt", "blob": blob}]
    docs = ds.upload_documents(documents)
    ids = [doc.id for doc in docs]
    ds.async_parse_documents(ids)
@ -103,7 +122,7 @@ def test_add_chunk_with_success(get_api_key_fixture):
        {'display_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
    ]
    '''
-    documents =[{"display_name":"test_list_chunks_with_success.txt","blob":blob}]
+    documents = [{"display_name": "test_list_chunks_with_success.txt", "blob": blob}]
    docs = ds.upload_documents(documents)
    doc = docs[0]
    doc.add_chunk(content="This is a chunk addition test")
@ -122,7 +141,7 @@ def test_delete_chunk_with_success(get_api_key_fixture):
        {'display_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
    ]
    '''
-    documents =[{"display_name":"test_delete_chunk_with_success.txt","blob":blob}]
+    documents = [{"display_name": "test_delete_chunk_with_success.txt", "blob": blob}]
    docs = ds.upload_documents(documents)
    doc = docs[0]
    chunk = doc.add_chunk(content="This is a chunk addition test")
@ -143,13 +162,14 @@ def test_update_chunk_content(get_api_key_fixture):
        {'display_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
    ]
    '''
-    documents =[{"display_name":"test_update_chunk_content_with_success.txt","blob":blob}]
+    documents = [{"display_name": "test_update_chunk_content_with_success.txt", "blob": blob}]
    docs = ds.upload_documents(documents)
    doc = docs[0]
    chunk = doc.add_chunk(content="This is a chunk addition test")
    # For Elasticsearch, the chunk is not searchable in shot time (~2s).
    sleep(3)
-    chunk.update({"content":"This is a updated content"})
+    chunk.update({"content": "This is a updated content"})
+

 def test_update_chunk_available(get_api_key_fixture):
    API_KEY = get_api_key_fixture
@ -164,13 +184,13 @@ def test_update_chunk_available(get_api_key_fixture):
        {'display_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
    ]
    '''
-    documents =[{"display_name":"test_update_chunk_available_with_success.txt","blob":blob}]
+    documents = [{"display_name": "test_update_chunk_available_with_success.txt", "blob": blob}]
    docs = ds.upload_documents(documents)
    doc = docs[0]
    chunk = doc.add_chunk(content="This is a chunk addition test")
    # For Elasticsearch, the chunk is not searchable in shot time (~2s).
    sleep(3)
-    chunk.update({"available":0})
+    chunk.update({"available": 0})


 def test_retrieve_chunks(get_api_key_fixture):
@ -186,11 +206,11 @@ def test_retrieve_chunks(get_api_key_fixture):
        {'display_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
    ]
    '''
-    documents =[{"display_name":"test_retrieve_chunks.txt","blob":blob}]
+    documents = [{"display_name": "test_retrieve_chunks.txt", "blob": blob}]
    docs = ds.upload_documents(documents)
    doc = docs[0]
    doc.add_chunk(content="This is a chunk addition test")
-    rag.retrieve(dataset_ids=[ds.id],document_ids=[doc.id])
+    rag.retrieve(dataset_ids=[ds.id], document_ids=[doc.id])
    rag.delete_datasets(ids=[ds.id])

 # test different parameters for the retrieval