diff --git a/sdk/python/test/test_http_api/common.py b/sdk/python/test/test_http_api/common.py index 834a470d2..dd33ac9a3 100644 --- a/sdk/python/test/test_http_api/common.py +++ b/sdk/python/test/test_http_api/common.py @@ -174,8 +174,6 @@ def stop_parse_documnet(auth, dataset_id, payload=None): # CHUNK MANAGEMENT WITHIN DATASET def add_chunk(auth, dataset_id, document_id, payload=None): - url = f"{HOST_ADDRESS}{CHUNK_API_URL}".format( - dataset_id=dataset_id, document_id=document_id - ) + url = f"{HOST_ADDRESS}{CHUNK_API_URL}".format(dataset_id=dataset_id, document_id=document_id) res = requests.post(url=url, headers=HEADERS, auth=auth, json=payload) return res.json() diff --git a/sdk/python/test/test_http_api/test_dataset_mangement/test_delete_dataset.py b/sdk/python/test/test_http_api/test_dataset_mangement/test_delete_dataset.py index 8327b737c..78acca5cf 100644 --- a/sdk/python/test/test_http_api/test_dataset_mangement/test_delete_dataset.py +++ b/sdk/python/test/test_http_api/test_dataset_mangement/test_delete_dataset.py @@ -38,9 +38,7 @@ class TestAuthorization: ), ], ) - def test_invalid_auth( - self, get_http_api_auth, auth, expected_code, expected_message - ): + def test_invalid_auth(self, get_http_api_auth, auth, expected_code, expected_message): ids = create_datasets(get_http_api_auth, 1) res = delete_dataset(auth, {"ids": ids}) assert res["code"] == expected_code @@ -73,9 +71,7 @@ class TestDatasetDeletion: (lambda r: {"ids": r}, 0, "", 0), ], ) - def test_basic_scenarios( - self, get_http_api_auth, payload, expected_code, expected_message, remaining - ): + def test_basic_scenarios(self, get_http_api_auth, payload, expected_code, expected_message, remaining): ids = create_datasets(get_http_api_auth, 3) if callable(payload): payload = payload(ids) @@ -120,7 +116,8 @@ class TestDatasetDeletion: ids = create_datasets(get_http_api_auth, 1) res = delete_dataset(get_http_api_auth, {"ids": ids + ids}) assert res["code"] == 0 - #assert res["data"]["success_count"] == 1 + assert res["data"]["errors"][0] == f"Duplicate dataset ids: {ids[0]}" + assert res["data"]["success_count"] == 1 res = list_dataset(get_http_api_auth) assert len(res["data"]) == 0 @@ -129,12 +126,7 @@ class TestDatasetDeletion: ids = create_datasets(get_http_api_auth, 100) with ThreadPoolExecutor(max_workers=5) as executor: - futures = [ - executor.submit( - delete_dataset, get_http_api_auth, {"ids": ids[i : i + 1]} - ) - for i in range(100) - ] + futures = [executor.submit(delete_dataset, get_http_api_auth, {"ids": ids[i : i + 1]}) for i in range(100)] responses = [f.result() for f in futures] assert all(r["code"] == 0 for r in responses) diff --git a/sdk/python/test/test_http_api/test_file_management_within_dataset/test_delete_documents.py b/sdk/python/test/test_http_api/test_file_management_within_dataset/test_delete_documents.py index 3be7de82e..858f5e006 100644 --- a/sdk/python/test/test_http_api/test_file_management_within_dataset/test_delete_documents.py +++ b/sdk/python/test/test_http_api/test_file_management_within_dataset/test_delete_documents.py @@ -38,9 +38,7 @@ class TestAuthorization: ), ], ) - def test_invalid_auth( - self, get_http_api_auth, tmp_path, auth, expected_code, expected_message - ): + def test_invalid_auth(self, get_http_api_auth, tmp_path, auth, expected_code, expected_message): ids = create_datasets(get_http_api_auth, 1) document_ids = batch_upload_documents(get_http_api_auth, ids[0], 1, tmp_path) res = delete_documnet(auth, ids[0], {"ids": document_ids[0]}) @@ -54,11 +52,11 @@ class TestDocumentDeletion: [ (None, 0, "", 0), ({"ids": []}, 0, "", 0), - ({"ids": ["invalid_id"]}, 102, "Document not found!", 3), + ({"ids": ["invalid_id"]}, 102, "Documents not found: ['invalid_id']", 3), ( {"ids": ["\n!?。;!?\"'"]}, 102, - "Document not found!", + """Documents not found: [\'\\n!?。;!?"\\\'\']""", 3, ), ( @@ -86,8 +84,8 @@ class TestDocumentDeletion: payload = payload(document_ids) res = delete_documnet(get_http_api_auth, ids[0], payload) assert res["code"] == expected_code - #if res["code"] != 0: - # assert res["message"] == expected_message + if res["code"] != 0: + assert res["message"] == expected_message res = list_documnet(get_http_api_auth, ids[0]) assert len(res["data"]["docs"]) == remaining @@ -104,16 +102,14 @@ class TestDocumentDeletion: ), ], ) - def test_invalid_dataset_id( - self, get_http_api_auth, tmp_path, dataset_id, expected_code, expected_message - ): + def test_invalid_dataset_id(self, get_http_api_auth, tmp_path, dataset_id, expected_code, expected_message): ids = create_datasets(get_http_api_auth, 1) document_ids = batch_upload_documents(get_http_api_auth, ids[0], 3, tmp_path) res = delete_documnet(get_http_api_auth, dataset_id, {"ids": document_ids[:1]}) assert res["code"] == expected_code assert res["message"] == expected_message - @pytest.mark.xfail(reason="issues/6174") + # @pytest.mark.xfail(reason="issues/6174") @pytest.mark.parametrize( "payload", [ @@ -128,9 +124,8 @@ class TestDocumentDeletion: if callable(payload): payload = payload(document_ids) res = delete_documnet(get_http_api_auth, ids[0], payload) - assert res["code"] == 0 - assert res["data"]["errors"][0] == "You don't own the dataset invalid_id" - assert res["data"]["success_count"] == 3 + assert res["code"] == 102 + assert res["message"] == "Documents not found: ['invalid_id']" res = list_documnet(get_http_api_auth, ids[0]) assert len(res["data"]["docs"]) == 0 @@ -143,17 +138,16 @@ class TestDocumentDeletion: assert res["code"] == 0 res = delete_documnet(get_http_api_auth, ids[0], {"ids": document_ids}) - assert res["code"] in [102, 500] - #assert res["message"] == "Document not found!" + assert res["code"] == 102 + assert res["message"] == f"Documents not found: {document_ids}" - @pytest.mark.xfail(reason="issues/6234") def test_duplicate_deletion(self, get_http_api_auth, tmp_path): ids = create_datasets(get_http_api_auth, 1) document_ids = batch_upload_documents(get_http_api_auth, ids[0], 1, tmp_path) - res = delete_documnet( - get_http_api_auth, ids[0], {"ids": document_ids + document_ids} - ) + res = delete_documnet(get_http_api_auth, ids[0], {"ids": document_ids + document_ids}) assert res["code"] == 0 + assert res["data"]["errors"][0] == f"Duplicate document ids: {document_ids[0]}" + assert res["data"]["success_count"] == 1 res = list_documnet(get_http_api_auth, ids[0]) assert len(res["data"]["docs"]) == 0 @@ -162,9 +156,7 @@ class TestDocumentDeletion: def test_concurrent_deletion(self, get_http_api_auth, tmp_path): documnets_num = 100 ids = create_datasets(get_http_api_auth, 1) - document_ids = batch_upload_documents( - get_http_api_auth, ids[0], documnets_num, tmp_path - ) + document_ids = batch_upload_documents(get_http_api_auth, ids[0], documnets_num, tmp_path) with ThreadPoolExecutor(max_workers=5) as executor: futures = [ @@ -183,9 +175,7 @@ class TestDocumentDeletion: def test_delete_1k(self, get_http_api_auth, tmp_path): documnets_num = 1_000 ids = create_datasets(get_http_api_auth, 1) - document_ids = batch_upload_documents( - get_http_api_auth, ids[0], documnets_num, tmp_path - ) + document_ids = batch_upload_documents(get_http_api_auth, ids[0], documnets_num, tmp_path) res = list_documnet(get_http_api_auth, ids[0]) assert res["data"]["total"] == documnets_num diff --git a/sdk/python/test/test_http_api/test_file_management_within_dataset/test_parse_documents.py b/sdk/python/test/test_http_api/test_file_management_within_dataset/test_parse_documents.py index f96a06613..728f6a222 100644 --- a/sdk/python/test/test_http_api/test_file_management_within_dataset/test_parse_documents.py +++ b/sdk/python/test/test_http_api/test_file_management_within_dataset/test_parse_documents.py @@ -50,9 +50,7 @@ class TestAuthorization: ), ], ) - def test_invalid_auth( - self, get_http_api_auth, tmp_path, auth, expected_code, expected_message - ): + def test_invalid_auth(self, get_http_api_auth, tmp_path, auth, expected_code, expected_message): ids = create_datasets(get_http_api_auth, 1) document_ids = batch_upload_documents(get_http_api_auth, ids[0], 1, tmp_path) res = parse_documnet(auth, ids[0], {"document_ids": document_ids[0]}) @@ -68,32 +66,30 @@ class TestDocumentsParse: None, 102, """AttributeError("\'NoneType\' object has no attribute \'get\'")""", - marks=pytest.mark.xfail, + marks=pytest.mark.skip, ), ({"document_ids": []}, 102, "`document_ids` is required"), ( {"document_ids": ["invalid_id"]}, 102, - "You don't own the document invalid_id.", + "Documents not found: ['invalid_id']", ), ( {"document_ids": ["\n!?。;!?\"'"]}, 102, - """You don\'t own the document \n!?。;!?"\'.""", + """Documents not found: [\'\\n!?。;!?"\\\'\']""", ), pytest.param( "not json", 102, "AttributeError(\"'str' object has no attribute 'get'\")", - marks=pytest.mark.xfail, + marks=pytest.mark.skip, ), (lambda r: {"document_ids": r[:1]}, 0, ""), (lambda r: {"document_ids": r}, 0, ""), ], ) - def test_basic_scenarios( - self, get_http_api_auth, tmp_path, payload, expected_code, expected_message - ): + def test_basic_scenarios(self, get_http_api_auth, tmp_path, payload, expected_code, expected_message): @wait_for(10, 1, "Document parsing timeout") def condition(_auth, _dataset_id, _document_ids): for _document_id in _document_ids: @@ -104,20 +100,16 @@ class TestDocumentsParse: ids = create_datasets(get_http_api_auth, 1) dataset_id = ids[0] - document_ids = batch_upload_documents( - get_http_api_auth, dataset_id, 3, tmp_path - ) + document_ids = batch_upload_documents(get_http_api_auth, dataset_id, 3, tmp_path) if callable(payload): payload = payload(document_ids) res = parse_documnet(get_http_api_auth, dataset_id, payload) assert res["code"] == expected_code - #if expected_code != 0: - # assert res["message"] == expected_message + if expected_code != 0: + assert res["message"] == expected_message if expected_code == 0: condition(get_http_api_auth, dataset_id, payload["document_ids"]) - validate_document_details( - get_http_api_auth, dataset_id, payload["document_ids"] - ) + validate_document_details(get_http_api_auth, dataset_id, payload["document_ids"]) @pytest.mark.parametrize( "dataset_id, expected_code, expected_message", @@ -140,13 +132,10 @@ class TestDocumentsParse: ): ids = create_datasets(get_http_api_auth, 1) document_ids = batch_upload_documents(get_http_api_auth, ids[0], 1, tmp_path) - res = parse_documnet( - get_http_api_auth, dataset_id, {"document_ids": document_ids} - ) + res = parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) assert res["code"] == expected_code assert res["message"] == expected_message - @pytest.mark.skip(reason="issues/6229") @pytest.mark.parametrize( "payload", [ @@ -155,9 +144,7 @@ class TestDocumentsParse: lambda r: {"document_ids": r + ["invalid_id"]}, ], ) - def test_parse_partial_invalid_document_id( - self, get_http_api_auth, tmp_path, payload - ): + def test_parse_partial_invalid_document_id(self, get_http_api_auth, tmp_path, payload): @wait_for(10, 1, "Document parsing timeout") def condition(_auth, _dataset_id): res = list_documnet(_auth, _dataset_id) @@ -168,14 +155,12 @@ class TestDocumentsParse: ids = create_datasets(get_http_api_auth, 1) dataset_id = ids[0] - document_ids = batch_upload_documents( - get_http_api_auth, dataset_id, 3, tmp_path - ) + document_ids = batch_upload_documents(get_http_api_auth, dataset_id, 3, tmp_path) if callable(payload): payload = payload(document_ids) res = parse_documnet(get_http_api_auth, dataset_id, payload) assert res["code"] == 102 - assert res["message"] == "You don't own the document invalid_id." + assert res["message"] == "Documents not found: ['invalid_id']" condition(get_http_api_auth, dataset_id) @@ -192,22 +177,15 @@ class TestDocumentsParse: ids = create_datasets(get_http_api_auth, 1) dataset_id = ids[0] - document_ids = batch_upload_documents( - get_http_api_auth, dataset_id, 1, tmp_path - ) - res = parse_documnet( - get_http_api_auth, dataset_id, {"document_ids": document_ids} - ) + document_ids = batch_upload_documents(get_http_api_auth, dataset_id, 1, tmp_path) + res = parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) assert res["code"] == 0 condition(get_http_api_auth, dataset_id) - res = parse_documnet( - get_http_api_auth, dataset_id, {"document_ids": document_ids} - ) + res = parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) assert res["code"] == 0 - @pytest.mark.skip(reason="issues/6234") def test_duplicate_parse(self, get_http_api_auth, tmp_path): @wait_for(10, 1, "Document parsing timeout") def condition(_auth, _dataset_id): @@ -219,13 +197,11 @@ class TestDocumentsParse: ids = create_datasets(get_http_api_auth, 1) dataset_id = ids[0] - document_ids = batch_upload_documents( - get_http_api_auth, dataset_id, 1, tmp_path - ) - res = parse_documnet( - get_http_api_auth, dataset_id, {"document_ids": document_ids + document_ids} - ) + document_ids = batch_upload_documents(get_http_api_auth, dataset_id, 1, tmp_path) + res = parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids + document_ids}) assert res["code"] == 0 + assert res["data"]["errors"][0] == f"Duplicate document ids: {document_ids[0]}" + assert res["data"]["success_count"] == 1 condition(get_http_api_auth, dataset_id) @@ -244,12 +220,8 @@ class TestDocumentsParse: document_num = 100 ids = create_datasets(get_http_api_auth, 1) dataset_id = ids[0] - document_ids = batch_upload_documents( - get_http_api_auth, dataset_id, document_num, tmp_path - ) - res = parse_documnet( - get_http_api_auth, dataset_id, {"document_ids": document_ids} - ) + document_ids = batch_upload_documents(get_http_api_auth, dataset_id, document_num, tmp_path) + res = parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) assert res["code"] == 0 condition(get_http_api_auth, dataset_id, document_num) @@ -269,9 +241,7 @@ class TestDocumentsParse: document_num = 100 ids = create_datasets(get_http_api_auth, 1) dataset_id = ids[0] - document_ids = batch_upload_documents( - get_http_api_auth, dataset_id, document_num, tmp_path - ) + document_ids = batch_upload_documents(get_http_api_auth, dataset_id, document_num, tmp_path) with ThreadPoolExecutor(max_workers=5) as executor: futures = [ diff --git a/sdk/python/test/test_http_api/test_file_management_within_dataset/test_stop_parse_documents.py b/sdk/python/test/test_http_api/test_file_management_within_dataset/test_stop_parse_documents.py index 3710c4697..9f8278729 100644 --- a/sdk/python/test/test_http_api/test_file_management_within_dataset/test_stop_parse_documents.py +++ b/sdk/python/test/test_http_api/test_file_management_within_dataset/test_stop_parse_documents.py @@ -60,9 +60,7 @@ class TestAuthorization: ), ], ) - def test_invalid_auth( - self, get_http_api_auth, auth, expected_code, expected_message - ): + def test_invalid_auth(self, get_http_api_auth, auth, expected_code, expected_message): ids = create_datasets(get_http_api_auth, 1) res = stop_parse_documnet(auth, ids[0]) assert res["code"] == expected_code @@ -78,7 +76,7 @@ class TestDocumentsParseStop: None, 102, """AttributeError("\'NoneType\' object has no attribute \'get\'")""", - marks=pytest.mark.xfail, + marks=pytest.mark.skip, ), ({"document_ids": []}, 102, "`document_ids` is required"), ( @@ -95,15 +93,13 @@ class TestDocumentsParseStop: "not json", 102, "AttributeError(\"'str' object has no attribute 'get'\")", - marks=pytest.mark.xfail, + marks=pytest.mark.skip, ), (lambda r: {"document_ids": r[:1]}, 0, ""), (lambda r: {"document_ids": r}, 0, ""), ], ) - def test_basic_scenarios( - self, get_http_api_auth, tmp_path, payload, expected_code, expected_message - ): + def test_basic_scenarios(self, get_http_api_auth, tmp_path, payload, expected_code, expected_message): @wait_for(10, 1, "Document parsing timeout") def condition(_auth, _dataset_id, _document_ids): for _document_id in _document_ids: @@ -114,9 +110,7 @@ class TestDocumentsParseStop: ids = create_datasets(get_http_api_auth, 1) dataset_id = ids[0] - document_ids = batch_upload_documents( - get_http_api_auth, dataset_id, 3, tmp_path - ) + document_ids = batch_upload_documents(get_http_api_auth, dataset_id, 3, tmp_path) parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) if callable(payload): @@ -127,16 +121,10 @@ class TestDocumentsParseStop: if expected_code != 0: assert res["message"] == expected_message else: - completed_document_ids = list( - set(document_ids) - set(payload["document_ids"]) - ) + completed_document_ids = list(set(document_ids) - set(payload["document_ids"])) condition(get_http_api_auth, dataset_id, completed_document_ids) - validate_document_parse_cancel( - get_http_api_auth, dataset_id, payload["document_ids"] - ) - validate_document_parse_done( - get_http_api_auth, dataset_id, completed_document_ids - ) + validate_document_parse_cancel(get_http_api_auth, dataset_id, payload["document_ids"]) + validate_document_parse_done(get_http_api_auth, dataset_id, completed_document_ids) @pytest.mark.parametrize( "dataset_id, expected_code, expected_message", @@ -159,13 +147,11 @@ class TestDocumentsParseStop: ): ids = create_datasets(get_http_api_auth, 1) document_ids = batch_upload_documents(get_http_api_auth, ids[0], 1, tmp_path) - res = stop_parse_documnet( - get_http_api_auth, dataset_id, {"document_ids": document_ids} - ) + res = stop_parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) assert res["code"] == expected_code assert res["message"] == expected_message - @pytest.mark.xfail + @pytest.mark.skip @pytest.mark.parametrize( "payload", [ @@ -174,54 +160,40 @@ class TestDocumentsParseStop: lambda r: {"document_ids": r + ["invalid_id"]}, ], ) - def test_stop_parse_partial_invalid_document_id( - self, get_http_api_auth, tmp_path, payload - ): + def test_stop_parse_partial_invalid_document_id(self, get_http_api_auth, tmp_path, payload): ids = create_datasets(get_http_api_auth, 1) dataset_id = ids[0] - document_ids = batch_upload_documents( - get_http_api_auth, dataset_id, 3, tmp_path - ) + document_ids = batch_upload_documents(get_http_api_auth, dataset_id, 3, tmp_path) parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) if callable(payload): payload = payload(document_ids) res = stop_parse_documnet(get_http_api_auth, dataset_id, payload) assert res["code"] == 102 + assert res["message"] == "You don't own the document invalid_id." validate_document_parse_cancel(get_http_api_auth, dataset_id, document_ids) def test_repeated_stop_parse(self, get_http_api_auth, tmp_path): ids = create_datasets(get_http_api_auth, 1) dataset_id = ids[0] - document_ids = batch_upload_documents( - get_http_api_auth, dataset_id, 1, tmp_path - ) + document_ids = batch_upload_documents(get_http_api_auth, dataset_id, 1, tmp_path) parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) - res = stop_parse_documnet( - get_http_api_auth, dataset_id, {"document_ids": document_ids} - ) + res = stop_parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) assert res["code"] == 0 - res = stop_parse_documnet( - get_http_api_auth, dataset_id, {"document_ids": document_ids} - ) + res = stop_parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) assert res["code"] == 102 assert res["message"] == "Can't stop parsing document with progress at 0 or 1" - @pytest.mark.xfail def test_duplicate_stop_parse(self, get_http_api_auth, tmp_path): ids = create_datasets(get_http_api_auth, 1) dataset_id = ids[0] - document_ids = batch_upload_documents( - get_http_api_auth, dataset_id, 1, tmp_path - ) + document_ids = batch_upload_documents(get_http_api_auth, dataset_id, 1, tmp_path) parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) - res = stop_parse_documnet( - get_http_api_auth, dataset_id, {"document_ids": document_ids + document_ids} - ) + res = stop_parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids + document_ids}) assert res["code"] == 0 - assert res["success_count"] == 1 + assert res["data"]["success_count"] == 1 assert f"Duplicate document ids: {document_ids[0]}" in res["data"]["errors"] @pytest.mark.slow @@ -229,13 +201,9 @@ class TestDocumentsParseStop: document_num = 100 ids = create_datasets(get_http_api_auth, 1) dataset_id = ids[0] - document_ids = batch_upload_documents( - get_http_api_auth, dataset_id, document_num, tmp_path - ) + document_ids = batch_upload_documents(get_http_api_auth, dataset_id, document_num, tmp_path) parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) - res = stop_parse_documnet( - get_http_api_auth, dataset_id, {"document_ids": document_ids} - ) + res = stop_parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) assert res["code"] == 0 validate_document_parse_cancel(get_http_api_auth, dataset_id, document_ids) @@ -244,9 +212,7 @@ class TestDocumentsParseStop: document_num = 50 ids = create_datasets(get_http_api_auth, 1) dataset_id = ids[0] - document_ids = batch_upload_documents( - get_http_api_auth, dataset_id, document_num, tmp_path - ) + document_ids = batch_upload_documents(get_http_api_auth, dataset_id, document_num, tmp_path) parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) with ThreadPoolExecutor(max_workers=5) as executor: