From 4b1b68c5fc7e52de3bcc544a01c456c24482626d Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhu.sh@gmail.com>
Date: Wed, 13 Aug 2025 12:43:31 +0800
Subject: [PATCH] Fix: no doc hits after meta data filter. (#9435)

### What problem does this PR solve?


### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
---
 api/db/services/dialog_service.py | 11 +++++++----
 rag/prompts/citation_prompt.md    |  3 ++-
 rag/utils/s3_conn.py              |  1 -
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/api/db/services/dialog_service.py b/api/db/services/dialog_service.py
index 62372c7c3..c2748589b 100644
--- a/api/db/services/dialog_service.py
+++ b/api/db/services/dialog_service.py
@@ -365,8 +365,12 @@ def chat(dialog, messages, stream=True, **kwargs):
         if dialog.meta_data_filter.get("method") == "auto":
             filters = gen_meta_filter(chat_mdl, metas, questions[-1])
             attachments.extend(meta_filter(metas, filters))
+            if not attachments:
+                attachments = None
         elif dialog.meta_data_filter.get("method") == "manual":
             attachments.extend(meta_filter(metas, dialog.meta_data_filter["manual"]))
+            if not attachments:
+                attachments = None
 
     if prompt_config.get("keyword", False):
         questions[-1] += keyword_extraction(chat_mdl, questions[-1])
@@ -375,17 +379,16 @@ def chat(dialog, messages, stream=True, **kwargs):
 
     thought = ""
     kbinfos = {"total": 0, "chunks": [], "doc_aggs": []}
+    knowledges = []
 
-    if "knowledge" not in [p["key"] for p in prompt_config["parameters"]]:
-        knowledges = []
-    else:
+    if attachments is not None and "knowledge" in [p["key"] for p in prompt_config["parameters"]]:
         tenant_ids = list(set([kb.tenant_id for kb in kbs]))
         knowledges = []
         if prompt_config.get("reasoning", False):
             reasoner = DeepResearcher(
                 chat_mdl,
                 prompt_config,
-                partial(retriever.retrieval, embd_mdl=embd_mdl, tenant_ids=tenant_ids, kb_ids=dialog.kb_ids, page=1, page_size=dialog.top_n, similarity_threshold=0.2, vector_similarity_weight=0.3),
+                partial(retriever.retrieval, embd_mdl=embd_mdl, tenant_ids=tenant_ids, kb_ids=dialog.kb_ids, page=1, page_size=dialog.top_n, similarity_threshold=0.2, vector_similarity_weight=0.3, doc_ids=attachments),
             )
 
             for think in reasoner.thinking(kbinfos, " ".join(questions)):
diff --git a/rag/prompts/citation_prompt.md b/rag/prompts/citation_prompt.md
index 53928b991..55c89c454 100644
--- a/rag/prompts/citation_prompt.md
+++ b/rag/prompts/citation_prompt.md
@@ -105,4 +105,5 @@ REMEMBER:
 - Cite FACTS, not opinions or transitions
 - Each citation supports the ENTIRE sentence
 - When in doubt, ask: "Would a fact-checker need to verify this?"
-- Place citations at sentence end, before punctuation
\ No newline at end of file
+- Place citations at sentence end, before punctuation
+- Format likes this is FORBIDDEN: [ID:0, ID:5, ID:...]. It MUST be seperated like, [ID:0][ID:5]...
diff --git a/rag/utils/s3_conn.py b/rag/utils/s3_conn.py
index 038e47135..74049c7c1 100644
--- a/rag/utils/s3_conn.py
+++ b/rag/utils/s3_conn.py
@@ -191,7 +191,6 @@ class RAGFlowS3:
                 time.sleep(1)
         return
 
-    @use_prefix_path
     @use_default_bucket
     def rm_bucket(self, bucket, *args, **kwargs):
         for conn in self.conn: