mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-01-30 23:26:36 +08:00
Test: Refactor test concurrency handling and add SDK chunk management tests (#8112)
### What problem does this PR solve? - Improve concurrent test cases by using as_completed for better reliability - Rename variables for clarity (chunk_num -> count) - Add new SDK API test suite for chunk management operations - Update HTTP API tests with consistent concurrency patterns ### Type of change - [x] Add test cases - [x] Refactoring
This commit is contained in:
@ -13,7 +13,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
import pytest
|
||||
from common import INVALID_API_TOKEN, add_chunk, delete_documents, list_chunks
|
||||
@ -224,7 +224,7 @@ class TestAddChunk:
|
||||
|
||||
@pytest.mark.skip(reason="issues/6411")
|
||||
def test_concurrent_add_chunk(self, api_key, add_document):
|
||||
chunk_num = 50
|
||||
count = 50
|
||||
dataset_id, document_id = add_document
|
||||
res = list_chunks(api_key, dataset_id, document_id)
|
||||
if res["code"] != 0:
|
||||
@ -240,11 +240,12 @@ class TestAddChunk:
|
||||
document_id,
|
||||
{"content": f"chunk test {i}"},
|
||||
)
|
||||
for i in range(chunk_num)
|
||||
for i in range(count)
|
||||
]
|
||||
responses = [f.result() for f in futures]
|
||||
assert all(r["code"] == 0 for r in responses)
|
||||
responses = list(as_completed(futures))
|
||||
assert len(responses) == count, responses
|
||||
assert all(future.result()["code"] == 0 for future in futures)
|
||||
res = list_chunks(api_key, dataset_id, document_id)
|
||||
if res["code"] != 0:
|
||||
assert False, res
|
||||
assert res["data"]["doc"]["chunk_count"] == chunks_count + chunk_num
|
||||
assert res["data"]["doc"]["chunk_count"] == chunks_count + count
|
||||
|
||||
@ -13,7 +13,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
import pytest
|
||||
from common import INVALID_API_TOKEN, batch_add_chunks, delete_chunks, list_chunks
|
||||
@ -121,9 +121,9 @@ class TestChunksDeletion:
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_concurrent_deletion(self, api_key, add_document):
|
||||
chunks_num = 100
|
||||
count = 100
|
||||
dataset_id, document_id = add_document
|
||||
chunk_ids = batch_add_chunks(api_key, dataset_id, document_id, chunks_num)
|
||||
chunk_ids = batch_add_chunks(api_key, dataset_id, document_id, count)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [
|
||||
@ -134,10 +134,11 @@ class TestChunksDeletion:
|
||||
document_id,
|
||||
{"chunk_ids": chunk_ids[i : i + 1]},
|
||||
)
|
||||
for i in range(chunks_num)
|
||||
for i in range(count)
|
||||
]
|
||||
responses = [f.result() for f in futures]
|
||||
assert all(r["code"] == 0 for r in responses)
|
||||
responses = list(as_completed(futures))
|
||||
assert len(responses) == count, responses
|
||||
assert all(future.result()["code"] == 0 for future in futures)
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_delete_1k(self, api_key, add_document):
|
||||
|
||||
@ -14,7 +14,7 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
import os
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
import pytest
|
||||
from common import INVALID_API_TOKEN, batch_add_chunks, list_chunks
|
||||
@ -149,12 +149,12 @@ class TestChunksList:
|
||||
@pytest.mark.p3
|
||||
def test_concurrent_list(self, api_key, add_chunks):
|
||||
dataset_id, document_id, _ = add_chunks
|
||||
|
||||
count = 100
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [executor.submit(list_chunks, api_key, dataset_id, document_id) for i in range(100)]
|
||||
responses = [f.result() for f in futures]
|
||||
assert all(r["code"] == 0 for r in responses)
|
||||
assert all(len(r["data"]["chunks"]) == 5 for r in responses)
|
||||
futures = [executor.submit(list_chunks, api_key, dataset_id, document_id) for i in range(count)]
|
||||
responses = list(as_completed(futures))
|
||||
assert len(responses) == count, responses
|
||||
assert all(len(future.result()["data"]["chunks"]) == 5 for future in futures)
|
||||
|
||||
@pytest.mark.p1
|
||||
def test_default(self, api_key, add_document):
|
||||
|
||||
@ -14,6 +14,7 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
import os
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
import pytest
|
||||
from common import (
|
||||
@ -302,12 +303,12 @@ class TestChunksRetrieval:
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_concurrent_retrieval(self, api_key, add_chunks):
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
dataset_id, _, _ = add_chunks
|
||||
count = 100
|
||||
payload = {"question": "chunk", "dataset_ids": [dataset_id]}
|
||||
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [executor.submit(retrieval_chunks, api_key, payload) for i in range(100)]
|
||||
responses = [f.result() for f in futures]
|
||||
assert all(r["code"] == 0 for r in responses)
|
||||
futures = [executor.submit(retrieval_chunks, api_key, payload) for i in range(count)]
|
||||
responses = list(as_completed(futures))
|
||||
assert len(responses) == count, responses
|
||||
assert all(future.result()["code"] == 0 for future in futures)
|
||||
|
||||
@ -14,7 +14,7 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
import os
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from random import randint
|
||||
|
||||
import pytest
|
||||
@ -219,7 +219,7 @@ class TestUpdatedChunk:
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="issues/6554")
|
||||
def test_concurrent_update_chunk(self, api_key, add_chunks):
|
||||
chunk_num = 50
|
||||
count = 50
|
||||
dataset_id, document_id, chunk_ids = add_chunks
|
||||
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
@ -232,10 +232,11 @@ class TestUpdatedChunk:
|
||||
chunk_ids[randint(0, 3)],
|
||||
{"content": f"update chunk test {i}"},
|
||||
)
|
||||
for i in range(chunk_num)
|
||||
for i in range(count)
|
||||
]
|
||||
responses = [f.result() for f in futures]
|
||||
assert all(r["code"] == 0 for r in responses)
|
||||
responses = list(as_completed(futures))
|
||||
assert len(responses) == count, responses
|
||||
assert all(future.result()["code"] == 0 for future in futures)
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_update_chunk_to_deleted_document(self, api_key, add_chunks):
|
||||
|
||||
@ -85,7 +85,7 @@ class TestCapability:
|
||||
futures = [executor.submit(create_dataset, api_key, {"name": f"dataset_{i}"}) for i in range(count)]
|
||||
responses = list(as_completed(futures))
|
||||
assert len(responses) == count, responses
|
||||
assert all(futures.result()["code"] == 0 for futures in futures)
|
||||
assert all(future.result()["code"] == 0 for future in futures)
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("clear_datasets")
|
||||
|
||||
@ -93,7 +93,7 @@ class TestCapability:
|
||||
futures = [executor.submit(delete_datasets, api_key, {"ids": ids[i : i + 1]}) for i in range(count)]
|
||||
responses = list(as_completed(futures))
|
||||
assert len(responses) == count, responses
|
||||
assert all(futures.result()["code"] == 0 for futures in futures)
|
||||
assert all(future.result()["code"] == 0 for future in futures)
|
||||
|
||||
|
||||
class TestDatasetsDelete:
|
||||
|
||||
@ -49,7 +49,7 @@ class TestCapability:
|
||||
futures = [executor.submit(list_datasets, api_key) for i in range(count)]
|
||||
responses = list(as_completed(futures))
|
||||
assert len(responses) == count, responses
|
||||
assert all(futures.result()["code"] == 0 for futures in futures)
|
||||
assert all(future.result()["code"] == 0 for future in futures)
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("add_datasets")
|
||||
|
||||
@ -95,7 +95,7 @@ class TestCapability:
|
||||
futures = [executor.submit(update_dataset, api_key, dataset_id, {"name": f"dataset_{i}"}) for i in range(count)]
|
||||
responses = list(as_completed(futures))
|
||||
assert len(responses) == count, responses
|
||||
assert all(futures.result()["code"] == 0 for futures in futures)
|
||||
assert all(future.result()["code"] == 0 for future in futures)
|
||||
|
||||
|
||||
class TestDatasetUpdate:
|
||||
|
||||
@ -15,7 +15,6 @@
|
||||
#
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
|
||||
import pytest
|
||||
from common import INVALID_API_TOKEN, bulk_upload_documents, delete_documents, list_documents
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
@ -165,7 +164,7 @@ def test_concurrent_deletion(api_key, add_dataset, tmp_path):
|
||||
]
|
||||
responses = list(as_completed(futures))
|
||||
assert len(responses) == count, responses
|
||||
assert all(futures.result()["code"] == 0 for futures in futures)
|
||||
assert all(future.result()["code"] == 0 for future in futures)
|
||||
|
||||
|
||||
@pytest.mark.p3
|
||||
|
||||
@ -348,7 +348,7 @@ class TestDocumentsList:
|
||||
futures = [executor.submit(list_documents, api_key, dataset_id) for i in range(count)]
|
||||
responses = list(as_completed(futures))
|
||||
assert len(responses) == count, responses
|
||||
assert all(futures.result()["code"] == 0 for futures in futures)
|
||||
assert all(future.result()["code"] == 0 for future in futures)
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_invalid_params(self, api_key, add_documents):
|
||||
|
||||
@ -211,7 +211,7 @@ def test_concurrent_parse(api_key, add_dataset_func, tmp_path):
|
||||
]
|
||||
responses = list(as_completed(futures))
|
||||
assert len(responses) == count, responses
|
||||
assert all(futures.result()["code"] == 0 for futures in futures)
|
||||
assert all(future.result()["code"] == 0 for future in futures)
|
||||
|
||||
condition(api_key, dataset_id, count)
|
||||
|
||||
|
||||
@ -213,7 +213,7 @@ class TestDocumentsUpload:
|
||||
futures = [executor.submit(upload_documents, api_key, dataset_id, fps[i : i + 1]) for i in range(count)]
|
||||
responses = list(as_completed(futures))
|
||||
assert len(responses) == count, responses
|
||||
assert all(futures.result()["code"] == 0 for futures in futures)
|
||||
assert all(future.result()["code"] == 0 for future in futures)
|
||||
|
||||
res = list_datasets(api_key, {"id": dataset_id})
|
||||
assert res["data"][0]["document_count"] == count
|
||||
|
||||
Reference in New Issue
Block a user