From 1d9ca172e3b70271ad4f021d76a044d435f3b1f4 Mon Sep 17 00:00:00 2001 From: hy89 <31279043+hy89@users.noreply.github.com> Date: Thu, 20 Mar 2025 16:00:17 +0800 Subject: [PATCH] Fix(api): correct document parsing progress check logic (#6318) - Fix incorrect progress check condition that prevented re-parsing of completed documents - Allow parsing for documents with progress 0.0 (not started) or 1.0 (completed) - Only block parsing for documents currently in progress (0.0 < progress < 1.0) Close #6312 --------- Co-authored-by: Kevin Hu --- api/apps/sdk/doc.py | 4 ++-- .../test_parse_documents.py | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py index 25c78d520..65271dba4 100644 --- a/api/apps/sdk/doc.py +++ b/api/apps/sdk/doc.py @@ -688,9 +688,9 @@ def parse(tenant_id, dataset_id): continue if not doc: return get_error_data_result(message=f"You don't own the document {id}.") - if doc[0].progress != 0.0: + if 0.0 < doc[0].progress < 1.0: return get_error_data_result( - "Can't stop parsing document with progress at 0 or 100" + "Can't parse document that is currently being processed" ) info = {"run": "1", "progress": 0, "progress_msg": "", "chunk_num": 0, "token_num": 0} DocumentService.update_by_id(id, info) diff --git a/sdk/python/test/test_http_api/test_file_management_within_dataset/test_parse_documents.py b/sdk/python/test/test_http_api/test_file_management_within_dataset/test_parse_documents.py index d4a5ff0a7..f96a06613 100644 --- a/sdk/python/test/test_http_api/test_file_management_within_dataset/test_parse_documents.py +++ b/sdk/python/test/test_http_api/test_file_management_within_dataset/test_parse_documents.py @@ -205,8 +205,7 @@ class TestDocumentsParse: res = parse_documnet( get_http_api_auth, dataset_id, {"document_ids": document_ids} ) - assert res["code"] == 102 - assert res["message"] == "Can't stop parsing document with progress at 0 or 100" + assert res["code"] == 0 @pytest.mark.skip(reason="issues/6234") def test_duplicate_parse(self, get_http_api_auth, tmp_path):