From 4b1b68c5fc7e52de3bcc544a01c456c24482626d Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Wed, 13 Aug 2025 12:43:31 +0800 Subject: [PATCH] Fix: no doc hits after meta data filter. (#9435) ### What problem does this PR solve? ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- api/db/services/dialog_service.py | 11 +++++++---- rag/prompts/citation_prompt.md | 3 ++- rag/utils/s3_conn.py | 1 - 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/api/db/services/dialog_service.py b/api/db/services/dialog_service.py index 62372c7c3..c2748589b 100644 --- a/api/db/services/dialog_service.py +++ b/api/db/services/dialog_service.py @@ -365,8 +365,12 @@ def chat(dialog, messages, stream=True, **kwargs): if dialog.meta_data_filter.get("method") == "auto": filters = gen_meta_filter(chat_mdl, metas, questions[-1]) attachments.extend(meta_filter(metas, filters)) + if not attachments: + attachments = None elif dialog.meta_data_filter.get("method") == "manual": attachments.extend(meta_filter(metas, dialog.meta_data_filter["manual"])) + if not attachments: + attachments = None if prompt_config.get("keyword", False): questions[-1] += keyword_extraction(chat_mdl, questions[-1]) @@ -375,17 +379,16 @@ def chat(dialog, messages, stream=True, **kwargs): thought = "" kbinfos = {"total": 0, "chunks": [], "doc_aggs": []} + knowledges = [] - if "knowledge" not in [p["key"] for p in prompt_config["parameters"]]: - knowledges = [] - else: + if attachments is not None and "knowledge" in [p["key"] for p in prompt_config["parameters"]]: tenant_ids = list(set([kb.tenant_id for kb in kbs])) knowledges = [] if prompt_config.get("reasoning", False): reasoner = DeepResearcher( chat_mdl, prompt_config, - partial(retriever.retrieval, embd_mdl=embd_mdl, tenant_ids=tenant_ids, kb_ids=dialog.kb_ids, page=1, page_size=dialog.top_n, similarity_threshold=0.2, vector_similarity_weight=0.3), + partial(retriever.retrieval, embd_mdl=embd_mdl, tenant_ids=tenant_ids, kb_ids=dialog.kb_ids, page=1, page_size=dialog.top_n, similarity_threshold=0.2, vector_similarity_weight=0.3, doc_ids=attachments), ) for think in reasoner.thinking(kbinfos, " ".join(questions)): diff --git a/rag/prompts/citation_prompt.md b/rag/prompts/citation_prompt.md index 53928b991..55c89c454 100644 --- a/rag/prompts/citation_prompt.md +++ b/rag/prompts/citation_prompt.md @@ -105,4 +105,5 @@ REMEMBER: - Cite FACTS, not opinions or transitions - Each citation supports the ENTIRE sentence - When in doubt, ask: "Would a fact-checker need to verify this?" -- Place citations at sentence end, before punctuation \ No newline at end of file +- Place citations at sentence end, before punctuation +- Format likes this is FORBIDDEN: [ID:0, ID:5, ID:...]. It MUST be seperated like, [ID:0][ID:5]... diff --git a/rag/utils/s3_conn.py b/rag/utils/s3_conn.py index 038e47135..74049c7c1 100644 --- a/rag/utils/s3_conn.py +++ b/rag/utils/s3_conn.py @@ -191,7 +191,6 @@ class RAGFlowS3: time.sleep(1) return - @use_prefix_path @use_default_bucket def rm_bucket(self, bucket, *args, **kwargs): for conn in self.conn: