Test: Refactor test concurrency handling and add SDK chunk management tests (#8112)

### What problem does this PR solve?

- Improve concurrent test cases by using as_completed for better
reliability
- Rename variables for clarity (chunk_num -> count)
- Add new SDK API test suite for chunk management operations
- Update HTTP API tests with consistent concurrency patterns

### Type of change

- [x] Add test cases
- [x] Refactoring
This commit is contained in:
Liu An
2025-06-06 19:43:14 +08:00
committed by GitHub
parent 157cd8b1b0
commit 5825a24d26
21 changed files with 946 additions and 55 deletions

View File

@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import ThreadPoolExecutor, as_completed
import pytest
from common import INVALID_API_TOKEN, add_chunk, delete_documents, list_chunks
@ -224,7 +224,7 @@ class TestAddChunk:
@pytest.mark.skip(reason="issues/6411")
def test_concurrent_add_chunk(self, api_key, add_document):
chunk_num = 50
count = 50
dataset_id, document_id = add_document
res = list_chunks(api_key, dataset_id, document_id)
if res["code"] != 0:
@ -240,11 +240,12 @@ class TestAddChunk:
document_id,
{"content": f"chunk test {i}"},
)
for i in range(chunk_num)
for i in range(count)
]
responses = [f.result() for f in futures]
assert all(r["code"] == 0 for r in responses)
responses = list(as_completed(futures))
assert len(responses) == count, responses
assert all(future.result()["code"] == 0 for future in futures)
res = list_chunks(api_key, dataset_id, document_id)
if res["code"] != 0:
assert False, res
assert res["data"]["doc"]["chunk_count"] == chunks_count + chunk_num
assert res["data"]["doc"]["chunk_count"] == chunks_count + count

View File

@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import ThreadPoolExecutor, as_completed
import pytest
from common import INVALID_API_TOKEN, batch_add_chunks, delete_chunks, list_chunks
@ -121,9 +121,9 @@ class TestChunksDeletion:
@pytest.mark.p3
def test_concurrent_deletion(self, api_key, add_document):
chunks_num = 100
count = 100
dataset_id, document_id = add_document
chunk_ids = batch_add_chunks(api_key, dataset_id, document_id, chunks_num)
chunk_ids = batch_add_chunks(api_key, dataset_id, document_id, count)
with ThreadPoolExecutor(max_workers=5) as executor:
futures = [
@ -134,10 +134,11 @@ class TestChunksDeletion:
document_id,
{"chunk_ids": chunk_ids[i : i + 1]},
)
for i in range(chunks_num)
for i in range(count)
]
responses = [f.result() for f in futures]
assert all(r["code"] == 0 for r in responses)
responses = list(as_completed(futures))
assert len(responses) == count, responses
assert all(future.result()["code"] == 0 for future in futures)
@pytest.mark.p3
def test_delete_1k(self, api_key, add_document):

View File

@ -14,7 +14,7 @@
# limitations under the License.
#
import os
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import ThreadPoolExecutor, as_completed
import pytest
from common import INVALID_API_TOKEN, batch_add_chunks, list_chunks
@ -149,12 +149,12 @@ class TestChunksList:
@pytest.mark.p3
def test_concurrent_list(self, api_key, add_chunks):
dataset_id, document_id, _ = add_chunks
count = 100
with ThreadPoolExecutor(max_workers=5) as executor:
futures = [executor.submit(list_chunks, api_key, dataset_id, document_id) for i in range(100)]
responses = [f.result() for f in futures]
assert all(r["code"] == 0 for r in responses)
assert all(len(r["data"]["chunks"]) == 5 for r in responses)
futures = [executor.submit(list_chunks, api_key, dataset_id, document_id) for i in range(count)]
responses = list(as_completed(futures))
assert len(responses) == count, responses
assert all(len(future.result()["data"]["chunks"]) == 5 for future in futures)
@pytest.mark.p1
def test_default(self, api_key, add_document):

View File

@ -14,6 +14,7 @@
# limitations under the License.
#
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
import pytest
from common import (
@ -302,12 +303,12 @@ class TestChunksRetrieval:
@pytest.mark.p3
def test_concurrent_retrieval(self, api_key, add_chunks):
from concurrent.futures import ThreadPoolExecutor
dataset_id, _, _ = add_chunks
count = 100
payload = {"question": "chunk", "dataset_ids": [dataset_id]}
with ThreadPoolExecutor(max_workers=5) as executor:
futures = [executor.submit(retrieval_chunks, api_key, payload) for i in range(100)]
responses = [f.result() for f in futures]
assert all(r["code"] == 0 for r in responses)
futures = [executor.submit(retrieval_chunks, api_key, payload) for i in range(count)]
responses = list(as_completed(futures))
assert len(responses) == count, responses
assert all(future.result()["code"] == 0 for future in futures)

View File

@ -14,7 +14,7 @@
# limitations under the License.
#
import os
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import ThreadPoolExecutor, as_completed
from random import randint
import pytest
@ -219,7 +219,7 @@ class TestUpdatedChunk:
@pytest.mark.p3
@pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="issues/6554")
def test_concurrent_update_chunk(self, api_key, add_chunks):
chunk_num = 50
count = 50
dataset_id, document_id, chunk_ids = add_chunks
with ThreadPoolExecutor(max_workers=5) as executor:
@ -232,10 +232,11 @@ class TestUpdatedChunk:
chunk_ids[randint(0, 3)],
{"content": f"update chunk test {i}"},
)
for i in range(chunk_num)
for i in range(count)
]
responses = [f.result() for f in futures]
assert all(r["code"] == 0 for r in responses)
responses = list(as_completed(futures))
assert len(responses) == count, responses
assert all(future.result()["code"] == 0 for future in futures)
@pytest.mark.p3
def test_update_chunk_to_deleted_document(self, api_key, add_chunks):

View File

@ -85,7 +85,7 @@ class TestCapability:
futures = [executor.submit(create_dataset, api_key, {"name": f"dataset_{i}"}) for i in range(count)]
responses = list(as_completed(futures))
assert len(responses) == count, responses
assert all(futures.result()["code"] == 0 for futures in futures)
assert all(future.result()["code"] == 0 for future in futures)
@pytest.mark.usefixtures("clear_datasets")

View File

@ -93,7 +93,7 @@ class TestCapability:
futures = [executor.submit(delete_datasets, api_key, {"ids": ids[i : i + 1]}) for i in range(count)]
responses = list(as_completed(futures))
assert len(responses) == count, responses
assert all(futures.result()["code"] == 0 for futures in futures)
assert all(future.result()["code"] == 0 for future in futures)
class TestDatasetsDelete:

View File

@ -49,7 +49,7 @@ class TestCapability:
futures = [executor.submit(list_datasets, api_key) for i in range(count)]
responses = list(as_completed(futures))
assert len(responses) == count, responses
assert all(futures.result()["code"] == 0 for futures in futures)
assert all(future.result()["code"] == 0 for future in futures)
@pytest.mark.usefixtures("add_datasets")

View File

@ -95,7 +95,7 @@ class TestCapability:
futures = [executor.submit(update_dataset, api_key, dataset_id, {"name": f"dataset_{i}"}) for i in range(count)]
responses = list(as_completed(futures))
assert len(responses) == count, responses
assert all(futures.result()["code"] == 0 for futures in futures)
assert all(future.result()["code"] == 0 for future in futures)
class TestDatasetUpdate:

View File

@ -15,7 +15,6 @@
#
from concurrent.futures import ThreadPoolExecutor, as_completed
import pytest
from common import INVALID_API_TOKEN, bulk_upload_documents, delete_documents, list_documents
from libs.auth import RAGFlowHttpApiAuth
@ -165,7 +164,7 @@ def test_concurrent_deletion(api_key, add_dataset, tmp_path):
]
responses = list(as_completed(futures))
assert len(responses) == count, responses
assert all(futures.result()["code"] == 0 for futures in futures)
assert all(future.result()["code"] == 0 for future in futures)
@pytest.mark.p3

View File

@ -348,7 +348,7 @@ class TestDocumentsList:
futures = [executor.submit(list_documents, api_key, dataset_id) for i in range(count)]
responses = list(as_completed(futures))
assert len(responses) == count, responses
assert all(futures.result()["code"] == 0 for futures in futures)
assert all(future.result()["code"] == 0 for future in futures)
@pytest.mark.p3
def test_invalid_params(self, api_key, add_documents):

View File

@ -211,7 +211,7 @@ def test_concurrent_parse(api_key, add_dataset_func, tmp_path):
]
responses = list(as_completed(futures))
assert len(responses) == count, responses
assert all(futures.result()["code"] == 0 for futures in futures)
assert all(future.result()["code"] == 0 for future in futures)
condition(api_key, dataset_id, count)

View File

@ -213,7 +213,7 @@ class TestDocumentsUpload:
futures = [executor.submit(upload_documents, api_key, dataset_id, fps[i : i + 1]) for i in range(count)]
responses = list(as_completed(futures))
assert len(responses) == count, responses
assert all(futures.result()["code"] == 0 for futures in futures)
assert all(future.result()["code"] == 0 for future in futures)
res = list_datasets(api_key, {"id": dataset_id})
assert res["data"][0]["document_count"] == count