From d7681302043c50c478f411a580c553e7d14d7467 Mon Sep 17 00:00:00 2001 From: Yongteng Lei Date: Thu, 26 Jun 2025 17:46:53 +0800 Subject: [PATCH] Fix: chunk number error after re-parsing (#8513) ### What problem does this PR solve? Fix chunk number error after re-parsing. #8503. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- api/apps/document_app.py | 7 +++++++ api/db/services/document_service.py | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/api/apps/document_app.py b/api/apps/document_app.py index 68a76394f..90d62b0b3 100644 --- a/api/apps/document_app.py +++ b/api/apps/document_app.py @@ -365,6 +365,13 @@ def run(): info["progress_msg"] = "" info["chunk_num"] = 0 info["token_num"] = 0 + + e, doc = DocumentService.get_by_id(id) + if not e: + return get_data_error_result(message="Document not found!") + if doc.run == TaskStatus.DONE.value: + DocumentService.clear_chunk_num_when_rerun(doc.id) + DocumentService.update_by_id(id, info) tenant_id = DocumentService.get_tenant_id(id) if not tenant_id: diff --git a/api/db/services/document_service.py b/api/db/services/document_service.py index 8b7bc6660..c69f75aca 100644 --- a/api/db/services/document_service.py +++ b/api/db/services/document_service.py @@ -279,6 +279,24 @@ class DocumentService(CommonService): Knowledgebase.id == doc.kb_id).execute() return num + + @classmethod + @DB.connection_context() + def clear_chunk_num_when_rerun(cls, doc_id): + doc = cls.model.get_by_id(doc_id) + assert doc, "Can't fine document in database." + + num = ( + Knowledgebase.update( + token_num=Knowledgebase.token_num - doc.token_num, + chunk_num=Knowledgebase.chunk_num - doc.chunk_num, + ) + .where(Knowledgebase.id == doc.kb_id) + .execute() + ) + return num + + @classmethod @DB.connection_context() def get_tenant_id(cls, doc_id):