From 08c01b76d5e63b349590ca1d9ba00fed75e29416 Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Fri, 23 Jan 2026 12:54:08 +0800 Subject: [PATCH] Fix: missing parent chunk issue. (#12789) ### What problem does this PR solve? Close #12783 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- agent/component/categorize.py | 2 +- api/apps/sdk/dify_retrieval.py | 1 + api/apps/sdk/doc.py | 1 + api/db/services/document_service.py | 2 ++ 4 files changed, 5 insertions(+), 1 deletion(-) diff --git a/agent/component/categorize.py b/agent/component/categorize.py index 0804ca64a..b5a6a4b9c 100644 --- a/agent/component/categorize.py +++ b/agent/component/categorize.py @@ -147,7 +147,7 @@ class Categorize(LLM, ABC): category_counts[c] = count cpn_ids = list(self._param.category_description.items())[-1][1]["to"] - max_category = list(self._param.category_description.keys())[0] + max_category = list(self._param.category_description.keys())[-1] if any(category_counts.values()): max_category = max(category_counts.items(), key=lambda x: x[1])[0] cpn_ids = self._param.category_description[max_category]["to"] diff --git a/api/apps/sdk/dify_retrieval.py b/api/apps/sdk/dify_retrieval.py index 0841bf7bd..64752fee4 100644 --- a/api/apps/sdk/dify_retrieval.py +++ b/api/apps/sdk/dify_retrieval.py @@ -148,6 +148,7 @@ async def retrieval(tenant_id): doc_ids=doc_ids, rank_feature=label_question(question, [kb]) ) + ranks["chunks"] = settings.retriever.retrieval_by_children(ranks["chunks"], [tenant_id]) if use_kg: ck = await settings.kg_retriever.retrieval(question, diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py index 0973d98ee..2e97c1668 100644 --- a/api/apps/sdk/doc.py +++ b/api/apps/sdk/doc.py @@ -1579,6 +1579,7 @@ async def retrieval_test(tenant_id): cks = await settings.retriever.retrieval_by_toc(question, ranks["chunks"], tenant_ids, chat_mdl, size) if cks: ranks["chunks"] = cks + ranks["chunks"] = settings.retriever.retrieval_by_children(ranks["chunks"], tenant_ids) if use_kg: ck = await settings.kg_retriever.retrieval(question, [k.tenant_id for k in kbs], kb_ids, embd_mdl, LLMBundle(kb.tenant_id, LLMType.CHAT)) if ck["content_with_weight"]: diff --git a/api/db/services/document_service.py b/api/db/services/document_service.py index 018a24d89..7035948ba 100644 --- a/api/db/services/document_service.py +++ b/api/db/services/document_service.py @@ -786,6 +786,8 @@ class DocumentService(CommonService): return "string" if isinstance(value, (int, float)): return "number" + if re.match(r"\d{4}\-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}", str(value)): + return "time" return "string" fields = [cls.model.id, cls.model.meta_fields]