Feat: support tree structured deep-research policy. (#12559)

### What problem does this PR solve? #12558 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
2026-01-23 03:26:53 +08:00 · 2026-01-13 09:41:35 +08:00
parent 867ec94258
commit 44bada64c9
15 changed files with 1166 additions and 1381 deletions
--- a/1
+++ b/1
@ -187,7 +187,6 @@ COPY deepdoc deepdoc
 COPY rag rag
 COPY agent agent
 COPY graphrag graphrag
-COPY agentic_reasoning agentic_reasoning
 COPY pyproject.toml uv.lock ./
 COPY mcp mcp
 COPY plugin plugin
--- a/agentic_reasoning/init.py
+++ b/agentic_reasoning/init.py
@ -1 +0,0 @@
-from .deep_research import DeepResearcher as DeepResearcher
--- a/agentic_reasoning/deep_research.py
+++ b/agentic_reasoning/deep_research.py
@ -1,311 +0,0 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-import logging
-import re
-from functools import partial
-from agentic_reasoning.prompts import BEGIN_SEARCH_QUERY, BEGIN_SEARCH_RESULT, END_SEARCH_RESULT, MAX_SEARCH_LIMIT, \
-    END_SEARCH_QUERY, REASON_PROMPT, RELEVANT_EXTRACTION_PROMPT
-from api.db.services.llm_service import LLMBundle
-from rag.nlp import extract_between
-from rag.prompts import kb_prompt
-from rag.utils.tavily_conn import Tavily
-
-
-class DeepResearcher:
-    def __init__(self,
-                 chat_mdl: LLMBundle,
-                 prompt_config: dict,
-                 kb_retrieve: partial = None,
-                 kg_retrieve: partial = None
-                 ):
-        self.chat_mdl = chat_mdl
-        self.prompt_config = prompt_config
-        self._kb_retrieve = kb_retrieve
-        self._kg_retrieve = kg_retrieve
-
-    def _remove_tags(text: str, start_tag: str, end_tag: str) -> str:
-        """Remove tags but keep the content between them."""
-        if not text:
-            return text
-        text = re.sub(re.escape(start_tag), "", text)
-        return re.sub(re.escape(end_tag), "", text)
-
-    @staticmethod
-    def _remove_query_tags(text: str) -> str:
-        """Remove Query Tags"""
-        return DeepResearcher._remove_tags(text, BEGIN_SEARCH_QUERY, END_SEARCH_QUERY)
-
-    @staticmethod
-    def _remove_result_tags(text: str) -> str:
-        """Remove Result Tags"""
-        return DeepResearcher._remove_tags(text, BEGIN_SEARCH_RESULT, END_SEARCH_RESULT)
-
-    async def _generate_reasoning(self, msg_history):
-        """Generate reasoning steps (delta output)"""
-        raw_answer = ""
-        cleaned_answer = ""
-        if msg_history[-1]["role"] != "user":
-            msg_history.append({"role": "user", "content": "Continues reasoning with the new information.\n"})
-        else:
-            msg_history[-1]["content"] += "\n\nContinues reasoning with the new information.\n"
-
-        async for delta in self.chat_mdl.async_chat_streamly_delta(REASON_PROMPT, msg_history, {"temperature": 0.7}):
-            if not delta:
-                continue
-            raw_answer += delta
-            cleaned_full = re.sub(r"^.*</think>", "", raw_answer, flags=re.DOTALL)
-            if not cleaned_full:
-                continue
-            if cleaned_full.startswith(cleaned_answer):
-                delta_clean = cleaned_full[len(cleaned_answer):]
-            else:
-                delta_clean = cleaned_full
-            if not delta_clean:
-                continue
-            cleaned_answer = cleaned_full
-            yield delta_clean
-
-    def _extract_search_queries(self, query_think, question, step_index):
-        """Extract search queries from thinking"""
-        queries = extract_between(query_think, BEGIN_SEARCH_QUERY, END_SEARCH_QUERY)
-        if not queries and step_index == 0:
-            # If this is the first step and no queries are found, use the original question as the query
-            queries = [question]
-        return queries
-
-    def _truncate_previous_reasoning(self, all_reasoning_steps):
-        """Truncate previous reasoning steps to maintain a reasonable length"""
-        truncated_prev_reasoning = ""
-        for i, step in enumerate(all_reasoning_steps):
-            truncated_prev_reasoning += f"Step {i + 1}: {step}\n\n"
-
-        prev_steps = truncated_prev_reasoning.split('\n\n')
-        if len(prev_steps) <= 5:
-            truncated_prev_reasoning = '\n\n'.join(prev_steps)
-        else:
-            truncated_prev_reasoning = ''
-            for i, step in enumerate(prev_steps):
-                if i == 0 or i >= len(prev_steps) - 4 or BEGIN_SEARCH_QUERY in step or BEGIN_SEARCH_RESULT in step:
-                    truncated_prev_reasoning += step + '\n\n'
-                else:
-                    if truncated_prev_reasoning[-len('\n\n...\n\n'):] != '\n\n...\n\n':
-                        truncated_prev_reasoning += '...\n\n'
-
-        return truncated_prev_reasoning.strip('\n')
-
-    def _retrieve_information(self, search_query):
-        """Retrieve information from different sources"""
-        # 1. Knowledge base retrieval
-        kbinfos = []
-        try:
-            kbinfos = self._kb_retrieve(question=search_query) if self._kb_retrieve else {"chunks": [], "doc_aggs": []}
-        except Exception as e:
-            logging.error(f"Knowledge base retrieval error: {e}")
-
-        # 2. Web retrieval (if Tavily API is configured)
-        try:
-            if self.prompt_config.get("tavily_api_key"):
-                tav = Tavily(self.prompt_config["tavily_api_key"])
-                tav_res = tav.retrieve_chunks(search_query)
-                kbinfos["chunks"].extend(tav_res["chunks"])
-                kbinfos["doc_aggs"].extend(tav_res["doc_aggs"])
-        except Exception as e:
-            logging.error(f"Web retrieval error: {e}")
-
-        # 3. Knowledge graph retrieval (if configured)
-        try:
-            if self.prompt_config.get("use_kg") and self._kg_retrieve:
-                ck = self._kg_retrieve(question=search_query)
-                if ck["content_with_weight"]:
-                    kbinfos["chunks"].insert(0, ck)
-        except Exception as e:
-            logging.error(f"Knowledge graph retrieval error: {e}")
-
-        return kbinfos
-
-    def _update_chunk_info(self, chunk_info, kbinfos):
-        """Update chunk information for citations"""
-        if not chunk_info["chunks"]:
-            # If this is the first retrieval, use the retrieval results directly
-            for k in chunk_info.keys():
-                chunk_info[k] = kbinfos[k]
-        else:
-            # Merge newly retrieved information, avoiding duplicates
-            cids = [c["chunk_id"] for c in chunk_info["chunks"]]
-            for c in kbinfos["chunks"]:
-                if c["chunk_id"] not in cids:
-                    chunk_info["chunks"].append(c)
-
-            dids = [d["doc_id"] for d in chunk_info["doc_aggs"]]
-            for d in kbinfos["doc_aggs"]:
-                if d["doc_id"] not in dids:
-                    chunk_info["doc_aggs"].append(d)
-
-    async def _extract_relevant_info(self, truncated_prev_reasoning, search_query, kbinfos):
-        """Extract and summarize relevant information (delta output)"""
-        raw_answer = ""
-        cleaned_answer = ""
-        async for delta in self.chat_mdl.async_chat_streamly_delta(
-                RELEVANT_EXTRACTION_PROMPT.format(
-                    prev_reasoning=truncated_prev_reasoning,
-                    search_query=search_query,
-                    document="\n".join(kb_prompt(kbinfos, 4096))
-                ),
-                [{"role": "user",
-                  "content": f'Now you should analyze each web page and find helpful information based on the current search query "{search_query}" and previous reasoning steps.'}],
-                {"temperature": 0.7}):
-            if not delta:
-                continue
-            raw_answer += delta
-            cleaned_full = re.sub(r"^.*</think>", "", raw_answer, flags=re.DOTALL)
-            if not cleaned_full:
-                continue
-            if cleaned_full.startswith(cleaned_answer):
-                delta_clean = cleaned_full[len(cleaned_answer):]
-            else:
-                delta_clean = cleaned_full
-            if not delta_clean:
-                continue
-            cleaned_answer = cleaned_full
-            yield delta_clean
-
-    async def thinking(self, chunk_info: dict, question: str):
-        executed_search_queries = []
-        msg_history = [{"role": "user", "content": f'Question:\"{question}\"\n'}]
-        all_reasoning_steps = []
-        think = "<think>"
-        last_idx = 0
-        endswith_think = False
-        last_full = ""
-
-        def emit_delta(full_text: str):
-            nonlocal last_idx, endswith_think, last_full
-            if full_text == last_full:
-                return None
-            last_full = full_text
-            delta_ans = full_text[last_idx:]
-
-            if delta_ans.find("<think>") == 0:
-                last_idx += len("<think>")
-                delta = "<think>"
-            elif delta_ans.find("<think>") > 0:
-                delta = full_text[last_idx:last_idx + delta_ans.find("<think>")]
-                last_idx += delta_ans.find("<think>")
-            elif delta_ans.endswith("</think>"):
-                endswith_think = True
-                delta = re.sub(r"(<think>|</think>)", "", delta_ans)
-            elif endswith_think:
-                endswith_think = False
-                delta = "</think>"
-            else:
-                last_idx = len(full_text)
-                if full_text.endswith("</think>"):
-                    last_idx -= len("</think>")
-                delta = re.sub(r"(<think>|</think>)", "", delta_ans)
-
-            if not delta:
-                return None
-            if delta == "<think>":
-                return {"answer": "", "reference": {}, "audio_binary": None, "final": False, "start_to_think": True}
-            if delta == "</think>":
-                return {"answer": "", "reference": {}, "audio_binary": None, "final": False, "end_to_think": True}
-            return {"answer": delta, "reference": {}, "audio_binary": None, "final": False}
-
-        def flush_think_close():
-            nonlocal endswith_think
-            if endswith_think:
-                endswith_think = False
-                return {"answer": "", "reference": {}, "audio_binary": None, "final": False, "end_to_think": True}
-            return None
-
-        for step_index in range(MAX_SEARCH_LIMIT + 1):
-            # Check if the maximum search limit has been reached
-            if step_index == MAX_SEARCH_LIMIT - 1:
-                summary_think = f"\n{BEGIN_SEARCH_RESULT}\nThe maximum search limit is exceeded. You are not allowed to search.\n{END_SEARCH_RESULT}\n"
-                payload = emit_delta(think + summary_think)
-                if payload:
-                    yield payload
-                all_reasoning_steps.append(summary_think)
-                msg_history.append({"role": "assistant", "content": summary_think})
-                break
-
-            # Step 1: Generate reasoning
-            query_think = ""
-            async for delta in self._generate_reasoning(msg_history):
-                query_think += delta
-                payload = emit_delta(think + self._remove_query_tags(query_think))
-                if payload:
-                    yield payload
-
-            think += self._remove_query_tags(query_think)
-            all_reasoning_steps.append(query_think)
-
-            # Step 2: Extract search queries
-            queries = self._extract_search_queries(query_think, question, step_index)
-            if not queries and step_index > 0:
-                # If not the first step and no queries, end the search process
-                break
-
-            # Process each search query
-            for search_query in queries:
-                msg_history.append({"role": "assistant", "content": search_query})
-                think += f"\n\n> {step_index + 1}. {search_query}\n\n"
-                payload = emit_delta(think)
-                if payload:
-                    yield payload
-
-                # Check if the query has already been executed
-                if search_query in executed_search_queries:
-                    summary_think = f"\n{BEGIN_SEARCH_RESULT}\nYou have searched this query. Please refer to previous results.\n{END_SEARCH_RESULT}\n"
-                    payload = emit_delta(think + summary_think)
-                    if payload:
-                        yield payload
-                    all_reasoning_steps.append(summary_think)
-                    msg_history.append({"role": "user", "content": summary_think})
-                    think += summary_think
-                    continue
-
-                executed_search_queries.append(search_query)
-
-                # Step 3: Truncate previous reasoning steps
-                truncated_prev_reasoning = self._truncate_previous_reasoning(all_reasoning_steps)
-
-                # Step 4: Retrieve information
-                kbinfos = self._retrieve_information(search_query)
-
-                # Step 5: Update chunk information
-                self._update_chunk_info(chunk_info, kbinfos)
-
-                # Step 6: Extract relevant information
-                think += "\n\n"
-                summary_think = ""
-                async for delta in self._extract_relevant_info(truncated_prev_reasoning, search_query, kbinfos):
-                    summary_think += delta
-                    payload = emit_delta(think + self._remove_result_tags(summary_think))
-                    if payload:
-                        yield payload
-
-                all_reasoning_steps.append(summary_think)
-                msg_history.append(
-                    {"role": "user", "content": f"\n\n{BEGIN_SEARCH_RESULT}{summary_think}{END_SEARCH_RESULT}\n\n"})
-                think += self._remove_result_tags(summary_think)
-
-        final_payload = emit_delta(think + "</think>")
-        if final_payload:
-            yield final_payload
-        close_payload = flush_think_close()
-        if close_payload:
-            yield close_payload
--- a/agentic_reasoning/prompts.py
+++ b/agentic_reasoning/prompts.py
@ -1,147 +0,0 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-
-BEGIN_SEARCH_QUERY = "<|begin_search_query|>"
-END_SEARCH_QUERY = "<|end_search_query|>"
-BEGIN_SEARCH_RESULT = "<|begin_search_result|>"
-END_SEARCH_RESULT = "<|end_search_result|>"
-MAX_SEARCH_LIMIT = 6
-
-REASON_PROMPT = f"""You are an advanced reasoning agent. Your goal is to answer the user's question by breaking it down into a series of verifiable steps.
-
-You have access to a powerful search tool to find information.
-
-**Your Task:**
-1.  Analyze the user's question.
-2.  If you need information, issue a search query to find a specific fact.
-3.  Review the search results.
-4.  Repeat the search process until you have all the facts needed to answer the question.
-5.  Once you have gathered sufficient information, synthesize the facts and provide the final answer directly.
-
-**Tool Usage:**
- To search, you MUST write your query between the special tokens: {BEGIN_SEARCH_QUERY}your query{END_SEARCH_QUERY}.
- The system will provide results between {BEGIN_SEARCH_RESULT}search results{END_SEARCH_RESULT}.
- You have a maximum of {MAX_SEARCH_LIMIT} search attempts.
-
---
-**Example 1: Multi-hop Question**
-
-**Question:** "Are both the directors of Jaws and Casino Royale from the same country?"
-
-**Your Thought Process & Actions:**
-First, I need to identify the director of Jaws.
-{BEGIN_SEARCH_QUERY}who is the director of Jaws?{END_SEARCH_QUERY}
-[System returns search results]
-{BEGIN_SEARCH_RESULT}
-Jaws is a 1975 American thriller film directed by Steven Spielberg.
-{END_SEARCH_RESULT}
-Okay, the director of Jaws is Steven Spielberg. Now I need to find out his nationality.
-{BEGIN_SEARCH_QUERY}where is Steven Spielberg from?{END_SEARCH_QUERY}
-[System returns search results]
-{BEGIN_SEARCH_RESULT}
-Steven Allan Spielberg is an American filmmaker. Born in Cincinnati, Ohio...
-{END_SEARCH_RESULT}
-So, Steven Spielberg is from the USA. Next, I need to find the director of Casino Royale.
-{BEGIN_SEARCH_QUERY}who is the director of Casino Royale 2006?{END_SEARCH_QUERY}
-[System returns search results]
-{BEGIN_SEARCH_RESULT}
-Casino Royale is a 2006 spy film directed by Martin Campbell.
-{END_SEARCH_RESULT}
-The director of Casino Royale is Martin Campbell. Now I need his nationality.
-{BEGIN_SEARCH_QUERY}where is Martin Campbell from?{END_SEARCH_QUERY}
-[System returns search results]
-{BEGIN_SEARCH_RESULT}
-Martin Campbell (born 24 October 1943) is a New Zealand film and television director.
-{END_SEARCH_RESULT}
-I have all the information. Steven Spielberg is from the USA, and Martin Campbell is from New Zealand. They are not from the same country.
-
-Final Answer: No, the directors of Jaws and Casino Royale are not from the same country. Steven Spielberg is from the USA, and Martin Campbell is from New Zealand.
-
---
-**Example 2: Simple Fact Retrieval**
-
-**Question:** "When was the founder of craigslist born?"
-
-**Your Thought Process & Actions:**
-First, I need to know who founded craigslist.
-{BEGIN_SEARCH_QUERY}who founded craigslist?{END_SEARCH_QUERY}
-[System returns search results]
-{BEGIN_SEARCH_RESULT}
-Craigslist was founded in 1995 by Craig Newmark.
-{END_SEARCH_RESULT}
-The founder is Craig Newmark. Now I need his birth date.
-{BEGIN_SEARCH_QUERY}when was Craig Newmark born?{END_SEARCH_QUERY}
-[System returns search results]
-{BEGIN_SEARCH_RESULT}
-Craig Newmark was born on December 6, 1952.
-{END_SEARCH_RESULT}
-I have found the answer.
-
-Final Answer: The founder of craigslist, Craig Newmark, was born on December 6, 1952.
-
---
-**Important Rules:**
- **One Fact at a Time:** Decompose the problem and issue one search query at a time to find a single, specific piece of information.
- **Be Precise:** Formulate clear and precise search queries. If a search fails, rephrase it.
- **Synthesize at the End:** Do not provide the final answer until you have completed all necessary searches.
- **Language Consistency:** Your search queries should be in the same language as the user's question.
-
-Now, begin your work. Please answer the following question by thinking step-by-step.
-"""
-
-RELEVANT_EXTRACTION_PROMPT = """You are a highly efficient information extraction module. Your sole purpose is to extract the single most relevant piece of information from the provided `Searched Web Pages` that directly answers the `Current Search Query`.
-
-**Your Task:**
-1.  Read the `Current Search Query` to understand what specific information is needed.
-2.  Scan the `Searched Web Pages` to find the answer to that query.
-3.  Extract only the essential, factual information that answers the query. Be concise.
-
-**Context (For Your Information Only):**
-The `Previous Reasoning Steps` are provided to give you context on the overall goal, but your primary focus MUST be on answering the `Current Search Query`. Do not use information from the previous steps in your output.
-
-**Output Format:**
-Your response must follow one of two formats precisely.
-
-1.  **If a direct and relevant answer is found:**
-    - Start your response immediately with `Final Information`.
-    - Provide only the extracted fact(s). Do not add any extra conversational text.
-
-    *Example:*
-    `Current Search Query`: Where is Martin Campbell from?
-    `Searched Web Pages`: [Long article snippet about Martin Campbell's career, which includes the sentence "Martin Campbell (born 24 October 1943) is a New Zealand film and television director..."]
-    
-    *Your Output:*
-    Final Information
-    Martin Campbell is a New Zealand film and television director.
-
-2.  **If no relevant answer that directly addresses the query is found in the web pages:**
-    - Start your response immediately with `Final Information`.
-    - Write the exact phrase: `No helpful information found.`
-
---
-**BEGIN TASK**
-
-**Inputs:**
-
- **Previous Reasoning Steps:**
-{prev_reasoning}
-
- **Current Search Query:**
-{search_query}
-
- **Searched Web Pages:**
-{document}
-"""
--- a/api/apps/kb_app.py
+++ b/api/apps/kb_app.py
@ -174,6 +174,7 @@ async def update_metadata_setting():
            message="Database error (Knowledgebase rename)!")
    kb = kb.to_dict()
    kb["parser_config"]["metadata"] = req["metadata"]
+    kb["parser_config"]["enable_metadata"] = req.get("enable_metadata", True)
    KnowledgebaseService.update_by_id(kb["id"], kb)
    return get_json_result(data=kb)

--- a/api/db/services/conversation_service.py
+++ b/api/db/services/conversation_service.py
@ -64,6 +64,7 @@ class ConversationService(CommonService):
            offset += limit
        return res

+
 def structure_answer(conv, ans, message_id, session_id):
    reference = ans["reference"]
    if not isinstance(reference, dict):
@ -107,6 +108,7 @@ def structure_answer(conv, ans, message_id, session_id):
            conv.reference[-1] = reference
    return ans

+
 async def async_completion(tenant_id, chat_id, question, name="New session", session_id=None, stream=True, **kwargs):
    assert name, "`name` can not be empty."
    dia = DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value)
--- a/api/db/services/dialog_service.py
+++ b/api/db/services/dialog_service.py
@ -13,6 +13,7 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
+import asyncio
 import binascii
 import logging
 import re
@ -23,7 +24,6 @@ from functools import partial
 from timeit import default_timer as timer
 from langfuse import Langfuse
 from peewee import fn
-from agentic_reasoning import DeepResearcher
 from api.db.services.file_service import FileService
 from common.constants import LLMType, ParserType, StatusEnum
 from api.db.db_models import DB, Dialog
@ -36,6 +36,7 @@ from common.metadata_utils import apply_meta_data_filter
 from api.db.services.tenant_llm_service import TenantLLMService
 from common.time_utils import current_timestamp, datetime_format
 from graphrag.general.mind_map_extractor import MindMapExtractor
+from rag.advanced_rag import DeepResearcher
 from rag.app.resume import forbidden_select_fields4resume
 from rag.app.tag import label_question
 from rag.nlp.search import index_name
@ -380,16 +381,35 @@ async def async_chat(dialog, messages, stream=True, **kwargs):
                    doc_ids=attachments,
                ),
            )
+            queue = asyncio.Queue()
+            async def callback(msg:str):
+                nonlocal queue
+                await queue.put(msg + "<br/>")

+            await callback("<START_DEEP_RESEARCH>")
+            task = asyncio.create_task(reasoner.research(kbinfos, questions[-1], questions[-1], callback=callback))
+            while True:
+                msg = await queue.get()
+                if msg.find("<START_DEEP_RESEARCH>") == 0:
+                    yield {"answer": "", "reference": {}, "audio_binary": None, "final": False, "start_to_think": True}
+                elif msg.find("<END_DEEP_RESEARCH>") == 0:
+                    yield {"answer": "", "reference": {}, "audio_binary": None, "final": False, "end_to_think": True}
+                    break
+                else:
+                    yield {"answer": msg, "reference": {}, "audio_binary": None, "final": False}
+
+            await task
+            '''
            async for think in reasoner.thinking(kbinfos, attachments_ + " ".join(questions)):
                if isinstance(think, str):
                    thought = think
                    knowledges = [t for t in think.split("\n") if t]
                elif stream:
                    yield think
+            '''
        else:
            if embd_mdl:
-                kbinfos = retriever.retrieval(
+                kbinfos = await asyncio.to_thread(retriever.retrieval,
                    " ".join(questions),
                    embd_mdl,
                    tenant_ids,
@ -420,8 +440,7 @@ async def async_chat(dialog, messages, stream=True, **kwargs):
                if ck["content_with_weight"]:
                    kbinfos["chunks"].insert(0, ck)

-            knowledges = kb_prompt(kbinfos, max_tokens)
-
+    knowledges = kb_prompt(kbinfos, max_tokens)
    logging.debug("{}->{}".format(" ".join(questions), "\n->".join(knowledges)))

    retrieval_ts = timer()
--- a/pyproject.toml
+++ b/pyproject.toml
@ -177,7 +177,6 @@ url = "https://pypi.tuna.tsinghua.edu.cn/simple"
 [tool.setuptools]
 packages = [
    'agent',
-    'agentic_reasoning',
    'api',
    'deepdoc',
    'graphrag',
--- a/rag/advanced_rag/init.py
+++ b/rag/advanced_rag/init.py
@ -0,0 +1,20 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+from .tree_structured_query_decomposition_retrieval import TreeStructuredQueryDecompositionRetrieval as DeepResearcher
+
+
+__all__ = ['DeepResearcher']
--- a/rag/advanced_rag/tree_structured_query_decomposition_retrieval.py
+++ b/rag/advanced_rag/tree_structured_query_decomposition_retrieval.py
@ -0,0 +1,126 @@
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import asyncio
+import logging
+from functools import partial
+from api.db.services.llm_service import LLMBundle
+from rag.prompts import kb_prompt
+from rag.prompts.generator import sufficiency_check, multi_queries_gen
+from rag.utils.tavily_conn import Tavily
+from timeit import default_timer as timer
+
+
+class TreeStructuredQueryDecompositionRetrieval:
+    def __init__(self,
+                 chat_mdl: LLMBundle,
+                 prompt_config: dict,
+                 kb_retrieve: partial = None,
+                 kg_retrieve: partial = None
+                 ):
+        self.chat_mdl = chat_mdl
+        self.prompt_config = prompt_config
+        self._kb_retrieve = kb_retrieve
+        self._kg_retrieve = kg_retrieve
+        self._lock = asyncio.Lock()
+
+    def _retrieve_information(self, search_query):
+        """Retrieve information from different sources"""
+        # 1. Knowledge base retrieval
+        kbinfos = []
+        try:
+            kbinfos = self._kb_retrieve(question=search_query) if self._kb_retrieve else {"chunks": [], "doc_aggs": []}
+        except Exception as e:
+            logging.error(f"Knowledge base retrieval error: {e}")
+
+        # 2. Web retrieval (if Tavily API is configured)
+        try:
+            if self.prompt_config.get("tavily_api_key"):
+                tav = Tavily(self.prompt_config["tavily_api_key"])
+                tav_res = tav.retrieve_chunks(search_query)
+                kbinfos["chunks"].extend(tav_res["chunks"])
+                kbinfos["doc_aggs"].extend(tav_res["doc_aggs"])
+        except Exception as e:
+            logging.error(f"Web retrieval error: {e}")
+
+        # 3. Knowledge graph retrieval (if configured)
+        try:
+            if self.prompt_config.get("use_kg") and self._kg_retrieve:
+                ck = self._kg_retrieve(question=search_query)
+                if ck["content_with_weight"]:
+                    kbinfos["chunks"].insert(0, ck)
+        except Exception as e:
+            logging.error(f"Knowledge graph retrieval error: {e}")
+
+        return kbinfos
+
+    async def _async_update_chunk_info(self, chunk_info, kbinfos):
+        async with self._lock:
+            """Update chunk information for citations"""
+            if not chunk_info["chunks"]:
+                # If this is the first retrieval, use the retrieval results directly
+                for k in chunk_info.keys():
+                    chunk_info[k] = kbinfos[k]
+            else:
+                # Merge newly retrieved information, avoiding duplicates
+                cids = [c["chunk_id"] for c in chunk_info["chunks"]]
+                for c in kbinfos["chunks"]:
+                    if c["chunk_id"] not in cids:
+                        chunk_info["chunks"].append(c)
+
+                dids = [d["doc_id"] for d in chunk_info["doc_aggs"]]
+                for d in kbinfos["doc_aggs"]:
+                    if d["doc_id"] not in dids:
+                        chunk_info["doc_aggs"].append(d)
+
+    async def research(self, chunk_info, question, query, depth=3, callback=None):
+        if callback:
+            await callback("<START_DEEP_RESEARCH>")
+        await self._research(chunk_info, question, query, depth, callback)
+        if callback:
+            await callback("<END_DEEP_RESEARCH>")
+
+    async def _research(self, chunk_info, question, query, depth=3, callback=None):
+        if depth == 0:
+            #if callback:
+            #    await callback("Reach the max search depth.")
+            return ""
+        if callback:
+            await callback(f"Searching by `{query}`...")
+        st = timer()
+        ret = self._retrieve_information(query)
+        if callback:
+            await callback("Retrieval %d results by %.1fms"%(len(ret["chunks"]), (timer()-st)*1000))
+        await self._async_update_chunk_info(chunk_info, ret)
+        ret = kb_prompt(ret, self.chat_mdl.max_length*0.5)
+
+        if callback:
+            await callback("Checking the sufficiency for retrieved information.")
+        suff = await sufficiency_check(self.chat_mdl, question, ret)
+        if suff["is_sufficient"]:
+            if callback:
+                await callback("Yes, it's sufficient.")
+            return ret
+
+        #if callback:
+        #    await callback("The retrieved information is not sufficient. Planing next steps...")
+        succ_question_info = await multi_queries_gen(self.chat_mdl, question, query, suff["missing_information"], ret)
+        if callback:
+            await callback("Next step is to search for the following questions:\n" + "\n - ".join(step["question"] for step in succ_question_info["questions"]))
+        steps = []
+        for step in succ_question_info["questions"]:
+            steps.append(asyncio.create_task(self._research(chunk_info, step["question"], step["query"], depth-1, callback)))
+        results = await asyncio.gather(*steps, return_exceptions=True)
+        return "\n".join([str(r) for r in results])
--- a/rag/nlp/search.py
+++ b/rag/nlp/search.py
@ -382,6 +382,7 @@ class Dealer:

        # Ensure RERANK_LIMIT is multiple of page_size
        RERANK_LIMIT = math.ceil(64 / page_size) * page_size if page_size > 1 else 1
+        RERANK_LIMIT = max(30, RERANK_LIMIT)
        req = {
            "kb_ids": kb_ids,
            "doc_ids": doc_ids,
--- a/rag/prompts/generator.py
+++ b/rag/prompts/generator.py
@ -38,7 +38,7 @@ def get_value(d, k1, k2):


 def chunks_format(reference):
-    if not reference or (reference is not dict):
+    if not reference or not isinstance(reference, dict):
        return []
    return [
        {
@ -485,20 +485,26 @@ async def gen_meta_filter(chat_mdl, meta_data: dict, query: str) -> dict:
    return {"conditions": []}


-async def gen_json(system_prompt: str, user_prompt: str, chat_mdl, gen_conf=None):
+async def gen_json(system_prompt: str, user_prompt: str, chat_mdl, gen_conf={}, max_retry=2):
    from graphrag.utils import get_llm_cache, set_llm_cache
    cached = get_llm_cache(chat_mdl.llm_name, system_prompt, user_prompt, gen_conf)
    if cached:
        return json_repair.loads(cached)
    _, msg = message_fit_in(form_message(system_prompt, user_prompt), chat_mdl.max_length)
-    ans = await chat_mdl.async_chat(msg[0]["content"], msg[1:], gen_conf=gen_conf)
-    ans = re.sub(r"(^.*</think>|```json\n|```\n*$)", "", ans, flags=re.DOTALL)
-    try:
-        res = json_repair.loads(ans)
-        set_llm_cache(chat_mdl.llm_name, system_prompt, ans, user_prompt, gen_conf)
-        return res
-    except Exception:
-        logging.exception(f"Loading json failure: {ans}")
+    err = ""
+    ans = ""
+    for _ in range(max_retry):
+        if ans and err:
+            msg[-1]["content"] += f"\nGenerated JSON is as following:\n{ans}\nBut exception while loading:\n{err}\nPlease reconsider and correct it."
+        ans = await chat_mdl.async_chat(msg[0]["content"], msg[1:], gen_conf=gen_conf)
+        ans = re.sub(r"(^.*</think>|```json\n|```\n*$)", "", ans, flags=re.DOTALL)
+        try:
+            res = json_repair.loads(ans)
+            set_llm_cache(chat_mdl.llm_name, system_prompt, ans, user_prompt, gen_conf)
+            return res
+        except Exception as e:
+            logging.exception(f"Loading json failure: {ans}")
+            err += str(e)


 TOC_DETECTION = load_prompt("toc_detection")
@ -847,8 +853,6 @@ async def run_toc_from_text(chunks, chat_mdl, callback=None):

 TOC_RELEVANCE_SYSTEM = load_prompt("toc_relevance_system")
 TOC_RELEVANCE_USER = load_prompt("toc_relevance_user")
-
-
 async def relevant_chunks_with_toc(query: str, toc: list[dict], chat_mdl, topn: int = 6):
    import numpy as np
    try:
@ -876,8 +880,6 @@ async def relevant_chunks_with_toc(query: str, toc: list[dict], chat_mdl, topn:


 META_DATA = load_prompt("meta_data")
-
-
 async def gen_metadata(chat_mdl, schema: dict, content: str):
    template = PROMPT_JINJA_ENV.from_string(META_DATA)
    for k, desc in schema["properties"].items():
@ -890,3 +892,34 @@ async def gen_metadata(chat_mdl, schema: dict, content: str):
    _, msg = message_fit_in(form_message(system_prompt, user_prompt), chat_mdl.max_length)
    ans = await chat_mdl.async_chat(msg[0]["content"], msg[1:])
    return re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
+
+
+SUFFICIENCY_CHECK = load_prompt("sufficiency_check")
+async def sufficiency_check(chat_mdl, question: str, ret_content: str):
+    try:
+        return await gen_json(
+            PROMPT_JINJA_ENV.from_string(SUFFICIENCY_CHECK).render(question=question, retrieved_docs=ret_content),
+            "Output:\n",
+            chat_mdl
+        )
+    except Exception as e:
+        logging.exception(e)
+    return {}
+
+
+MULTI_QUERIES_GEN = load_prompt("multi_queries_gen")
+async def multi_queries_gen(chat_mdl, question: str, query:str, missing_infos:list[str], ret_content: str):
+    try:
+        return await gen_json(
+            PROMPT_JINJA_ENV.from_string(MULTI_QUERIES_GEN).render(
+                original_question=question,
+                original_query=query,
+                missing_info="\n - ".join(missing_infos),
+                retrieved_docs=ret_content
+            ),
+            "Output:\n",
+            chat_mdl
+        )
+    except Exception as e:
+        logging.exception(e)
+    return {}
--- a/rag/prompts/multi_queries_gen.md
+++ b/rag/prompts/multi_queries_gen.md
@ -0,0 +1,41 @@
+You are a query optimization expert. 
+The user's original query failed to retrieve sufficient information; 
+please generate multiple complementary improved questions and corresponding queries.
+
+Original query:
+{{ original_query }}
+
+Original question:
+{{ original_question }}
+
+Currently, retrieved content:
+{{ retrieved_docs }}
+
+Missing information:
+{{ missing_info }}
+
+Please generate 2-3 complementary queries to help find the missing information. These queries should:
+1. Focus on different missing information points.
+2. Use different expressions.
+3. Avoid being identical to the original query.
+4. Remain concise and clear.
+
+Output format (JSON):
+```json
+{
+    "reasoning": "Explanation of query generation strategy",
+    "questions": [
+        {"question": "Improved question 1", "query": "Improved query 1"},
+        {"question": "Improved question 2", "query": "Improved query 2"},
+        {"question": "Improved question 3", "query": "Improved query 3"}
+    ]
+}
+```
+
+Requirements:
+1. Questions array contains 1-3 questions and corresponding queries.
+2. Each question length is between 5-200 characters.
+3. Each query length is between 1-5 keywords.
+4. Each query MUST be in the same language as the retrieved content in. 
+5. DO NOT generate question and query that is similar to the original query. 
+6. Reasoning explains the generation strategy.
--- a/rag/prompts/sufficiency_check.md
+++ b/rag/prompts/sufficiency_check.md
@ -0,0 +1,24 @@
+You are a information retrieval evaluation expert. Please assess whether the currently retrieved content is sufficient to answer the user's question.
+
+User question:
+{{ question }}
+
+Retrieved content:
+{{ retrieved_docs }}
+
+Please determine whether these content are sufficient to answer the user's question.
+
+Output format (JSON):
+```json
+{
+    "is_sufficient": true/false,
+    "reasoning": "Your reasoning for the judgment",
+    "missing_information": ["Missing information 1", "Missing information 2"]
+}
+```
+
+Requirements:
+1. If the retrieved content contains key information needed to answer the query, judge as sufficient (true).
+2. If key information is missing, judge as insufficient (false), and list the missing information.
+3. The `reasoning` should be concise and clear.
+4. The `missing_information` should only be filled when insufficient, otherwise empty array.
--- a/uv.lock
+++ b/uv.lock
				`@ -1 +0,0 @@`
				`from .deep_research import DeepResearcher as DeepResearcher`