Feat: support tree structured deep-research policy. (#12559)

### What problem does this PR solve? #12558 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
2026-01-31 23:55:06 +08:00 · 2026-01-13 09:41:35 +08:00
parent 867ec94258
commit 44bada64c9
15 changed files with 1166 additions and 1381 deletions
--- a/1
+++ b/1
@ -187,7 +187,6 @@ COPY deepdoc deepdoc
 COPY rag rag
 COPY agent agent
 COPY graphrag graphrag
 COPY agentic_reasoning agentic_reasoning
 COPY pyproject.toml uv.lock ./
 COPY mcp mcp
 COPY plugin plugin
--- a/agentic_reasoning/init.py
+++ b/agentic_reasoning/init.py
@ -1 +0,0 @@
 from .deep_research import DeepResearcher as DeepResearcher
--- a/agentic_reasoning/deep_research.py
+++ b/agentic_reasoning/deep_research.py
@ -1,311 +0,0 @@
 #
 #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 import logging
 import re
 from functools import partial
 from agentic_reasoning.prompts import BEGIN_SEARCH_QUERY, BEGIN_SEARCH_RESULT, END_SEARCH_RESULT, MAX_SEARCH_LIMIT, \
    END_SEARCH_QUERY, REASON_PROMPT, RELEVANT_EXTRACTION_PROMPT
 from api.db.services.llm_service import LLMBundle
 from rag.nlp import extract_between
 from rag.prompts import kb_prompt
 from rag.utils.tavily_conn import Tavily
 class DeepResearcher:
    def __init__(self,
                 chat_mdl: LLMBundle,
                 prompt_config: dict,
                 kb_retrieve: partial = None,
                 kg_retrieve: partial = None
                 ):
        self.chat_mdl = chat_mdl
        self.prompt_config = prompt_config
        self._kb_retrieve = kb_retrieve
        self._kg_retrieve = kg_retrieve
    def _remove_tags(text: str, start_tag: str, end_tag: str) -> str:
        """Remove tags but keep the content between them."""
        if not text:
            return text
        text = re.sub(re.escape(start_tag), "", text)
        return re.sub(re.escape(end_tag), "", text)
    @staticmethod
    def _remove_query_tags(text: str) -> str:
        """Remove Query Tags"""
        return DeepResearcher._remove_tags(text, BEGIN_SEARCH_QUERY, END_SEARCH_QUERY)
    @staticmethod
    def _remove_result_tags(text: str) -> str:
        """Remove Result Tags"""
        return DeepResearcher._remove_tags(text, BEGIN_SEARCH_RESULT, END_SEARCH_RESULT)
    async def _generate_reasoning(self, msg_history):
        """Generate reasoning steps (delta output)"""
        raw_answer = ""
        cleaned_answer = ""
        if msg_history[-1]["role"] != "user":
            msg_history.append({"role": "user", "content": "Continues reasoning with the new information.\n"})
        else:
            msg_history[-1]["content"] += "\n\nContinues reasoning with the new information.\n"
        async for delta in self.chat_mdl.async_chat_streamly_delta(REASON_PROMPT, msg_history, {"temperature": 0.7}):
            if not delta:
                continue
            raw_answer += delta
            cleaned_full = re.sub(r"^.*</think>", "", raw_answer, flags=re.DOTALL)
            if not cleaned_full:
                continue
            if cleaned_full.startswith(cleaned_answer):
                delta_clean = cleaned_full[len(cleaned_answer):]
            else:
                delta_clean = cleaned_full
            if not delta_clean:
                continue
            cleaned_answer = cleaned_full
            yield delta_clean
    def _extract_search_queries(self, query_think, question, step_index):
        """Extract search queries from thinking"""
        queries = extract_between(query_think, BEGIN_SEARCH_QUERY, END_SEARCH_QUERY)
        if not queries and step_index == 0:
            # If this is the first step and no queries are found, use the original question as the query
            queries = [question]
        return queries
    def _truncate_previous_reasoning(self, all_reasoning_steps):
        """Truncate previous reasoning steps to maintain a reasonable length"""
        truncated_prev_reasoning = ""
        for i, step in enumerate(all_reasoning_steps):
            truncated_prev_reasoning += f"Step {i + 1}: {step}\n\n"
        prev_steps = truncated_prev_reasoning.split('\n\n')
        if len(prev_steps) <= 5:
            truncated_prev_reasoning = '\n\n'.join(prev_steps)
        else:
            truncated_prev_reasoning = ''
            for i, step in enumerate(prev_steps):
                if i == 0 or i >= len(prev_steps) - 4 or BEGIN_SEARCH_QUERY in step or BEGIN_SEARCH_RESULT in step:
                    truncated_prev_reasoning += step + '\n\n'
                else:
                    if truncated_prev_reasoning[-len('\n\n...\n\n'):] != '\n\n...\n\n':
                        truncated_prev_reasoning += '...\n\n'
        return truncated_prev_reasoning.strip('\n')
    def _retrieve_information(self, search_query):
        """Retrieve information from different sources"""
        # 1. Knowledge base retrieval
        kbinfos = []
        try:
            kbinfos = self._kb_retrieve(question=search_query) if self._kb_retrieve else {"chunks": [], "doc_aggs": []}
        except Exception as e:
            logging.error(f"Knowledge base retrieval error: {e}")
        # 2. Web retrieval (if Tavily API is configured)
        try:
            if self.prompt_config.get("tavily_api_key"):
                tav = Tavily(self.prompt_config["tavily_api_key"])
                tav_res = tav.retrieve_chunks(search_query)
                kbinfos["chunks"].extend(tav_res["chunks"])
                kbinfos["doc_aggs"].extend(tav_res["doc_aggs"])
        except Exception as e:
            logging.error(f"Web retrieval error: {e}")
        # 3. Knowledge graph retrieval (if configured)
        try:
            if self.prompt_config.get("use_kg") and self._kg_retrieve:
                ck = self._kg_retrieve(question=search_query)
                if ck["content_with_weight"]:
                    kbinfos["chunks"].insert(0, ck)
        except Exception as e:
            logging.error(f"Knowledge graph retrieval error: {e}")
        return kbinfos
    def _update_chunk_info(self, chunk_info, kbinfos):
        """Update chunk information for citations"""
        if not chunk_info["chunks"]:
            # If this is the first retrieval, use the retrieval results directly
            for k in chunk_info.keys():
                chunk_info[k] = kbinfos[k]
        else:
            # Merge newly retrieved information, avoiding duplicates
            cids = [c["chunk_id"] for c in chunk_info["chunks"]]
            for c in kbinfos["chunks"]:
                if c["chunk_id"] not in cids:
                    chunk_info["chunks"].append(c)
            dids = [d["doc_id"] for d in chunk_info["doc_aggs"]]
            for d in kbinfos["doc_aggs"]:
                if d["doc_id"] not in dids:
                    chunk_info["doc_aggs"].append(d)
    async def _extract_relevant_info(self, truncated_prev_reasoning, search_query, kbinfos):
        """Extract and summarize relevant information (delta output)"""
        raw_answer = ""
        cleaned_answer = ""
        async for delta in self.chat_mdl.async_chat_streamly_delta(
                RELEVANT_EXTRACTION_PROMPT.format(
                    prev_reasoning=truncated_prev_reasoning,
                    search_query=search_query,
                    document="\n".join(kb_prompt(kbinfos, 4096))
                ),
                [{"role": "user",
                  "content": f'Now you should analyze each web page and find helpful information based on the current search query "{search_query}" and previous reasoning steps.'}],
                {"temperature": 0.7}):
            if not delta:
                continue
            raw_answer += delta
            cleaned_full = re.sub(r"^.*</think>", "", raw_answer, flags=re.DOTALL)
            if not cleaned_full:
                continue
            if cleaned_full.startswith(cleaned_answer):
                delta_clean = cleaned_full[len(cleaned_answer):]
            else:
                delta_clean = cleaned_full
            if not delta_clean:
                continue
            cleaned_answer = cleaned_full
            yield delta_clean
    async def thinking(self, chunk_info: dict, question: str):
        executed_search_queries = []
        msg_history = [{"role": "user", "content": f'Question:\"{question}\"\n'}]
        all_reasoning_steps = []
        think = "<think>"
        last_idx = 0
        endswith_think = False
        last_full = ""
        def emit_delta(full_text: str):
            nonlocal last_idx, endswith_think, last_full
            if full_text == last_full:
                return None
            last_full = full_text
            delta_ans = full_text[last_idx:]
            if delta_ans.find("<think>") == 0:
                last_idx += len("<think>")
                delta = "<think>"
            elif delta_ans.find("<think>") > 0:
                delta = full_text[last_idx:last_idx + delta_ans.find("<think>")]
                last_idx += delta_ans.find("<think>")
            elif delta_ans.endswith("</think>"):
                endswith_think = True
                delta = re.sub(r"(<think>|</think>)", "", delta_ans)
            elif endswith_think:
                endswith_think = False
                delta = "</think>"
            else:
                last_idx = len(full_text)
                if full_text.endswith("</think>"):
                    last_idx -= len("</think>")
                delta = re.sub(r"(<think>|</think>)", "", delta_ans)
            if not delta:
                return None
            if delta == "<think>":
                return {"answer": "", "reference": {}, "audio_binary": None, "final": False, "start_to_think": True}
            if delta == "</think>":
                return {"answer": "", "reference": {}, "audio_binary": None, "final": False, "end_to_think": True}
            return {"answer": delta, "reference": {}, "audio_binary": None, "final": False}
        def flush_think_close():
            nonlocal endswith_think
            if endswith_think:
                endswith_think = False
                return {"answer": "", "reference": {}, "audio_binary": None, "final": False, "end_to_think": True}
            return None
        for step_index in range(MAX_SEARCH_LIMIT + 1):
            # Check if the maximum search limit has been reached
            if step_index == MAX_SEARCH_LIMIT - 1:
                summary_think = f"\n{BEGIN_SEARCH_RESULT}\nThe maximum search limit is exceeded. You are not allowed to search.\n{END_SEARCH_RESULT}\n"
                payload = emit_delta(think + summary_think)
                if payload:
                    yield payload
                all_reasoning_steps.append(summary_think)
                msg_history.append({"role": "assistant", "content": summary_think})
                break
            # Step 1: Generate reasoning
            query_think = ""
            async for delta in self._generate_reasoning(msg_history):
                query_think += delta
                payload = emit_delta(think + self._remove_query_tags(query_think))
                if payload:
                    yield payload
            think += self._remove_query_tags(query_think)
            all_reasoning_steps.append(query_think)
            # Step 2: Extract search queries
            queries = self._extract_search_queries(query_think, question, step_index)
            if not queries and step_index > 0:
                # If not the first step and no queries, end the search process
                break
            # Process each search query
            for search_query in queries:
                msg_history.append({"role": "assistant", "content": search_query})
                think += f"\n\n> {step_index + 1}. {search_query}\n\n"
                payload = emit_delta(think)
                if payload:
                    yield payload
                # Check if the query has already been executed
                if search_query in executed_search_queries:
                    summary_think = f"\n{BEGIN_SEARCH_RESULT}\nYou have searched this query. Please refer to previous results.\n{END_SEARCH_RESULT}\n"
                    payload = emit_delta(think + summary_think)
                    if payload:
                        yield payload
                    all_reasoning_steps.append(summary_think)
                    msg_history.append({"role": "user", "content": summary_think})
                    think += summary_think
                    continue
                executed_search_queries.append(search_query)
                # Step 3: Truncate previous reasoning steps
                truncated_prev_reasoning = self._truncate_previous_reasoning(all_reasoning_steps)
                # Step 4: Retrieve information
                kbinfos = self._retrieve_information(search_query)
                # Step 5: Update chunk information
                self._update_chunk_info(chunk_info, kbinfos)
                # Step 6: Extract relevant information
                think += "\n\n"
                summary_think = ""
                async for delta in self._extract_relevant_info(truncated_prev_reasoning, search_query, kbinfos):
                    summary_think += delta
                    payload = emit_delta(think + self._remove_result_tags(summary_think))
                    if payload:
                        yield payload
                all_reasoning_steps.append(summary_think)
                msg_history.append(
                    {"role": "user", "content": f"\n\n{BEGIN_SEARCH_RESULT}{summary_think}{END_SEARCH_RESULT}\n\n"})
                think += self._remove_result_tags(summary_think)
        final_payload = emit_delta(think + "</think>")
        if final_payload:
            yield final_payload
        close_payload = flush_think_close()
        if close_payload:
            yield close_payload
--- a/agentic_reasoning/prompts.py
+++ b/agentic_reasoning/prompts.py
@ -1,147 +0,0 @@
 #
 #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 BEGIN_SEARCH_QUERY = "<|begin_search_query|>"
 END_SEARCH_QUERY = "<|end_search_query|>"
 BEGIN_SEARCH_RESULT = "<|begin_search_result|>"
 END_SEARCH_RESULT = "<|end_search_result|>"
 MAX_SEARCH_LIMIT = 6
 REASON_PROMPT = f"""You are an advanced reasoning agent. Your goal is to answer the user's question by breaking it down into a series of verifiable steps.
 You have access to a powerful search tool to find information.
 **Your Task:**
 1.  Analyze the user's question.
 2.  If you need information, issue a search query to find a specific fact.
 3.  Review the search results.
 4.  Repeat the search process until you have all the facts needed to answer the question.
 5.  Once you have gathered sufficient information, synthesize the facts and provide the final answer directly.
 **Tool Usage:**
 - To search, you MUST write your query between the special tokens: {BEGIN_SEARCH_QUERY}your query{END_SEARCH_QUERY}.
 - The system will provide results between {BEGIN_SEARCH_RESULT}search results{END_SEARCH_RESULT}.
 - You have a maximum of {MAX_SEARCH_LIMIT} search attempts.
 ---
 **Example 1: Multi-hop Question**
 **Question:** "Are both the directors of Jaws and Casino Royale from the same country?"
 **Your Thought Process & Actions:**
 First, I need to identify the director of Jaws.
 {BEGIN_SEARCH_QUERY}who is the director of Jaws?{END_SEARCH_QUERY}
 [System returns search results]
 {BEGIN_SEARCH_RESULT}
 Jaws is a 1975 American thriller film directed by Steven Spielberg.
 {END_SEARCH_RESULT}
 Okay, the director of Jaws is Steven Spielberg. Now I need to find out his nationality.
 {BEGIN_SEARCH_QUERY}where is Steven Spielberg from?{END_SEARCH_QUERY}
 [System returns search results]
 {BEGIN_SEARCH_RESULT}
 Steven Allan Spielberg is an American filmmaker. Born in Cincinnati, Ohio...
 {END_SEARCH_RESULT}
 So, Steven Spielberg is from the USA. Next, I need to find the director of Casino Royale.
 {BEGIN_SEARCH_QUERY}who is the director of Casino Royale 2006?{END_SEARCH_QUERY}
 [System returns search results]
 {BEGIN_SEARCH_RESULT}
 Casino Royale is a 2006 spy film directed by Martin Campbell.
 {END_SEARCH_RESULT}
 The director of Casino Royale is Martin Campbell. Now I need his nationality.
 {BEGIN_SEARCH_QUERY}where is Martin Campbell from?{END_SEARCH_QUERY}
 [System returns search results]
 {BEGIN_SEARCH_RESULT}
 Martin Campbell (born 24 October 1943) is a New Zealand film and television director.
 {END_SEARCH_RESULT}
 I have all the information. Steven Spielberg is from the USA, and Martin Campbell is from New Zealand. They are not from the same country.
 Final Answer: No, the directors of Jaws and Casino Royale are not from the same country. Steven Spielberg is from the USA, and Martin Campbell is from New Zealand.
 ---
 **Example 2: Simple Fact Retrieval**
 **Question:** "When was the founder of craigslist born?"
 **Your Thought Process & Actions:**
 First, I need to know who founded craigslist.
 {BEGIN_SEARCH_QUERY}who founded craigslist?{END_SEARCH_QUERY}
 [System returns search results]
 {BEGIN_SEARCH_RESULT}
 Craigslist was founded in 1995 by Craig Newmark.
 {END_SEARCH_RESULT}
 The founder is Craig Newmark. Now I need his birth date.
 {BEGIN_SEARCH_QUERY}when was Craig Newmark born?{END_SEARCH_QUERY}
 [System returns search results]
 {BEGIN_SEARCH_RESULT}
 Craig Newmark was born on December 6, 1952.
 {END_SEARCH_RESULT}
 I have found the answer.
 Final Answer: The founder of craigslist, Craig Newmark, was born on December 6, 1952.
 ---
 **Important Rules:**
 - **One Fact at a Time:** Decompose the problem and issue one search query at a time to find a single, specific piece of information.
 - **Be Precise:** Formulate clear and precise search queries. If a search fails, rephrase it.
 - **Synthesize at the End:** Do not provide the final answer until you have completed all necessary searches.
 - **Language Consistency:** Your search queries should be in the same language as the user's question.
 Now, begin your work. Please answer the following question by thinking step-by-step.
 """
 RELEVANT_EXTRACTION_PROMPT = """You are a highly efficient information extraction module. Your sole purpose is to extract the single most relevant piece of information from the provided `Searched Web Pages` that directly answers the `Current Search Query`.
 **Your Task:**
 1.  Read the `Current Search Query` to understand what specific information is needed.
 2.  Scan the `Searched Web Pages` to find the answer to that query.
 3.  Extract only the essential, factual information that answers the query. Be concise.
 **Context (For Your Information Only):**
 The `Previous Reasoning Steps` are provided to give you context on the overall goal, but your primary focus MUST be on answering the `Current Search Query`. Do not use information from the previous steps in your output.
 **Output Format:**
 Your response must follow one of two formats precisely.
 1.  **If a direct and relevant answer is found:**
    - Start your response immediately with `Final Information`.
    - Provide only the extracted fact(s). Do not add any extra conversational text.
    *Example:*
    `Current Search Query`: Where is Martin Campbell from?
    `Searched Web Pages`: [Long article snippet about Martin Campbell's career, which includes the sentence "Martin Campbell (born 24 October 1943) is a New Zealand film and television director..."]
    *Your Output:*
    Final Information
    Martin Campbell is a New Zealand film and television director.
 2.  **If no relevant answer that directly addresses the query is found in the web pages:**
    - Start your response immediately with `Final Information`.
    - Write the exact phrase: `No helpful information found.`
 ---
 **BEGIN TASK**
 **Inputs:**
 - **Previous Reasoning Steps:**
 {prev_reasoning}
 - **Current Search Query:**
 {search_query}
 - **Searched Web Pages:**
 {document}
 """
--- a/api/apps/kb_app.py
+++ b/api/apps/kb_app.py
@ -174,6 +174,7 @@ async def update_metadata_setting():
            message="Database error (Knowledgebase rename)!")
    kb = kb.to_dict()
    kb["parser_config"]["metadata"] = req["metadata"]
    kb["parser_config"]["enable_metadata"] = req.get("enable_metadata", True)
    KnowledgebaseService.update_by_id(kb["id"], kb)
    return get_json_result(data=kb)
--- a/api/db/services/conversation_service.py
+++ b/api/db/services/conversation_service.py
@ -64,6 +64,7 @@ class ConversationService(CommonService):
            offset += limit
        return res
 def structure_answer(conv, ans, message_id, session_id):
    reference = ans["reference"]
    if not isinstance(reference, dict):
@ -107,6 +108,7 @@ def structure_answer(conv, ans, message_id, session_id):
            conv.reference[-1] = reference
    return ans
 async def async_completion(tenant_id, chat_id, question, name="New session", session_id=None, stream=True, **kwargs):
    assert name, "`name` can not be empty."
    dia = DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value)
--- a/api/db/services/dialog_service.py
+++ b/api/db/services/dialog_service.py
@ -13,6 +13,7 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 import asyncio
 import binascii
 import logging
 import re
@ -23,7 +24,6 @@ from functools import partial
 from timeit import default_timer as timer
 from langfuse import Langfuse
 from peewee import fn
 from agentic_reasoning import DeepResearcher
 from api.db.services.file_service import FileService
 from common.constants import LLMType, ParserType, StatusEnum
 from api.db.db_models import DB, Dialog
@ -36,6 +36,7 @@ from common.metadata_utils import apply_meta_data_filter
 from api.db.services.tenant_llm_service import TenantLLMService
 from common.time_utils import current_timestamp, datetime_format
 from graphrag.general.mind_map_extractor import MindMapExtractor
 from rag.advanced_rag import DeepResearcher
 from rag.app.resume import forbidden_select_fields4resume
 from rag.app.tag import label_question
 from rag.nlp.search import index_name
@ -380,16 +381,35 @@ async def async_chat(dialog, messages, stream=True, **kwargs):
                    doc_ids=attachments,
                ),
            )
            queue = asyncio.Queue()
            async def callback(msg:str):
                nonlocal queue
                await queue.put(msg + "<br/>")
            await callback("<START_DEEP_RESEARCH>")
            task = asyncio.create_task(reasoner.research(kbinfos, questions[-1], questions[-1], callback=callback))
            while True:
                msg = await queue.get()
                if msg.find("<START_DEEP_RESEARCH>") == 0:
                    yield {"answer": "", "reference": {}, "audio_binary": None, "final": False, "start_to_think": True}
                elif msg.find("<END_DEEP_RESEARCH>") == 0:
                    yield {"answer": "", "reference": {}, "audio_binary": None, "final": False, "end_to_think": True}
                    break
                else:
                    yield {"answer": msg, "reference": {}, "audio_binary": None, "final": False}
            await task
            '''
            async for think in reasoner.thinking(kbinfos, attachments_ + " ".join(questions)):
                if isinstance(think, str):
                    thought = think
                    knowledges = [t for t in think.split("\n") if t]
                elif stream:
                    yield think
            '''
        else:
            if embd_mdl:
-                kbinfos = retriever.retrieval(
+                kbinfos = await asyncio.to_thread(retriever.retrieval,
                    " ".join(questions),
                    embd_mdl,
                    tenant_ids,
@ -420,8 +440,7 @@ async def async_chat(dialog, messages, stream=True, **kwargs):
                if ck["content_with_weight"]:
                    kbinfos["chunks"].insert(0, ck)
-            knowledges = kb_prompt(kbinfos, max_tokens)
+    knowledges = kb_prompt(kbinfos, max_tokens)
    logging.debug("{}->{}".format(" ".join(questions), "\n->".join(knowledges)))
    retrieval_ts = timer()
--- a/pyproject.toml
+++ b/pyproject.toml
@ -177,7 +177,6 @@ url = "https://pypi.tuna.tsinghua.edu.cn/simple"
 [tool.setuptools]
 packages = [
    'agent',
    'agentic_reasoning',
    'api',
    'deepdoc',
    'graphrag',
--- a/rag/advanced_rag/init.py
+++ b/rag/advanced_rag/init.py
@ -0,0 +1,20 @@
 #
 #  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 from .tree_structured_query_decomposition_retrieval import TreeStructuredQueryDecompositionRetrieval as DeepResearcher
 __all__ = ['DeepResearcher']
--- a/rag/advanced_rag/tree_structured_query_decomposition_retrieval.py
+++ b/rag/advanced_rag/tree_structured_query_decomposition_retrieval.py
@ -0,0 +1,126 @@
 #
 #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 import asyncio
 import logging
 from functools import partial
 from api.db.services.llm_service import LLMBundle
 from rag.prompts import kb_prompt
 from rag.prompts.generator import sufficiency_check, multi_queries_gen
 from rag.utils.tavily_conn import Tavily
 from timeit import default_timer as timer
 class TreeStructuredQueryDecompositionRetrieval:
    def __init__(self,
                 chat_mdl: LLMBundle,
                 prompt_config: dict,
                 kb_retrieve: partial = None,
                 kg_retrieve: partial = None
                 ):
        self.chat_mdl = chat_mdl
        self.prompt_config = prompt_config
        self._kb_retrieve = kb_retrieve
        self._kg_retrieve = kg_retrieve
        self._lock = asyncio.Lock()
    def _retrieve_information(self, search_query):
        """Retrieve information from different sources"""
        # 1. Knowledge base retrieval
        kbinfos = []
        try:
            kbinfos = self._kb_retrieve(question=search_query) if self._kb_retrieve else {"chunks": [], "doc_aggs": []}
        except Exception as e:
            logging.error(f"Knowledge base retrieval error: {e}")
        # 2. Web retrieval (if Tavily API is configured)
        try:
            if self.prompt_config.get("tavily_api_key"):
                tav = Tavily(self.prompt_config["tavily_api_key"])
                tav_res = tav.retrieve_chunks(search_query)
                kbinfos["chunks"].extend(tav_res["chunks"])
                kbinfos["doc_aggs"].extend(tav_res["doc_aggs"])
        except Exception as e:
            logging.error(f"Web retrieval error: {e}")
        # 3. Knowledge graph retrieval (if configured)
        try:
            if self.prompt_config.get("use_kg") and self._kg_retrieve:
                ck = self._kg_retrieve(question=search_query)
                if ck["content_with_weight"]:
                    kbinfos["chunks"].insert(0, ck)
        except Exception as e:
            logging.error(f"Knowledge graph retrieval error: {e}")
        return kbinfos
    async def _async_update_chunk_info(self, chunk_info, kbinfos):
        async with self._lock:
            """Update chunk information for citations"""
            if not chunk_info["chunks"]:
                # If this is the first retrieval, use the retrieval results directly
                for k in chunk_info.keys():
                    chunk_info[k] = kbinfos[k]
            else:
                # Merge newly retrieved information, avoiding duplicates
                cids = [c["chunk_id"] for c in chunk_info["chunks"]]
                for c in kbinfos["chunks"]:
                    if c["chunk_id"] not in cids:
                        chunk_info["chunks"].append(c)
                dids = [d["doc_id"] for d in chunk_info["doc_aggs"]]
                for d in kbinfos["doc_aggs"]:
                    if d["doc_id"] not in dids:
                        chunk_info["doc_aggs"].append(d)
    async def research(self, chunk_info, question, query, depth=3, callback=None):
        if callback:
            await callback("<START_DEEP_RESEARCH>")
        await self._research(chunk_info, question, query, depth, callback)
        if callback:
            await callback("<END_DEEP_RESEARCH>")
    async def _research(self, chunk_info, question, query, depth=3, callback=None):
        if depth == 0:
            #if callback:
            #    await callback("Reach the max search depth.")
            return ""
        if callback:
            await callback(f"Searching by `{query}`...")
        st = timer()
        ret = self._retrieve_information(query)
        if callback:
            await callback("Retrieval %d results by %.1fms"%(len(ret["chunks"]), (timer()-st)*1000))
        await self._async_update_chunk_info(chunk_info, ret)
        ret = kb_prompt(ret, self.chat_mdl.max_length*0.5)
        if callback:
            await callback("Checking the sufficiency for retrieved information.")
        suff = await sufficiency_check(self.chat_mdl, question, ret)
        if suff["is_sufficient"]:
            if callback:
                await callback("Yes, it's sufficient.")
            return ret
        #if callback:
        #    await callback("The retrieved information is not sufficient. Planing next steps...")
        succ_question_info = await multi_queries_gen(self.chat_mdl, question, query, suff["missing_information"], ret)
        if callback:
            await callback("Next step is to search for the following questions:\n" + "\n - ".join(step["question"] for step in succ_question_info["questions"]))
        steps = []
        for step in succ_question_info["questions"]:
            steps.append(asyncio.create_task(self._research(chunk_info, step["question"], step["query"], depth-1, callback)))
        results = await asyncio.gather(*steps, return_exceptions=True)
        return "\n".join([str(r) for r in results])
--- a/rag/nlp/search.py
+++ b/rag/nlp/search.py
@ -382,6 +382,7 @@ class Dealer:
        # Ensure RERANK_LIMIT is multiple of page_size
        RERANK_LIMIT = math.ceil(64 / page_size) * page_size if page_size > 1 else 1
        RERANK_LIMIT = max(30, RERANK_LIMIT)
        req = {
            "kb_ids": kb_ids,
            "doc_ids": doc_ids,
--- a/rag/prompts/generator.py
+++ b/rag/prompts/generator.py
@ -38,7 +38,7 @@ def get_value(d, k1, k2):
 def chunks_format(reference):
-    if not reference or (reference is not dict):
+    if not reference or not isinstance(reference, dict):
        return []
    return [
        {
@ -485,20 +485,26 @@ async def gen_meta_filter(chat_mdl, meta_data: dict, query: str) -> dict:
    return {"conditions": []}
-async def gen_json(system_prompt: str, user_prompt: str, chat_mdl, gen_conf=None):
+async def gen_json(system_prompt: str, user_prompt: str, chat_mdl, gen_conf={}, max_retry=2):
    from graphrag.utils import get_llm_cache, set_llm_cache
    cached = get_llm_cache(chat_mdl.llm_name, system_prompt, user_prompt, gen_conf)
    if cached:
        return json_repair.loads(cached)
    _, msg = message_fit_in(form_message(system_prompt, user_prompt), chat_mdl.max_length)
-    ans = await chat_mdl.async_chat(msg[0]["content"], msg[1:], gen_conf=gen_conf)
+    err = ""
-    ans = re.sub(r"(^.*</think>|```json\n|```\n*$)", "", ans, flags=re.DOTALL)
+    ans = ""
-    try:
+    for _ in range(max_retry):
-        res = json_repair.loads(ans)
+        if ans and err:
-        set_llm_cache(chat_mdl.llm_name, system_prompt, ans, user_prompt, gen_conf)
+            msg[-1]["content"] += f"\nGenerated JSON is as following:\n{ans}\nBut exception while loading:\n{err}\nPlease reconsider and correct it."
-        return res
+        ans = await chat_mdl.async_chat(msg[0]["content"], msg[1:], gen_conf=gen_conf)
-    except Exception:
+        ans = re.sub(r"(^.*</think>|```json\n|```\n*$)", "", ans, flags=re.DOTALL)
-        logging.exception(f"Loading json failure: {ans}")
+        try:
            res = json_repair.loads(ans)
            set_llm_cache(chat_mdl.llm_name, system_prompt, ans, user_prompt, gen_conf)
            return res
        except Exception as e:
            logging.exception(f"Loading json failure: {ans}")
            err += str(e)
 TOC_DETECTION = load_prompt("toc_detection")
@ -847,8 +853,6 @@ async def run_toc_from_text(chunks, chat_mdl, callback=None):
 TOC_RELEVANCE_SYSTEM = load_prompt("toc_relevance_system")
 TOC_RELEVANCE_USER = load_prompt("toc_relevance_user")
 async def relevant_chunks_with_toc(query: str, toc: list[dict], chat_mdl, topn: int = 6):
    import numpy as np
    try:
@ -876,8 +880,6 @@ async def relevant_chunks_with_toc(query: str, toc: list[dict], chat_mdl, topn:
 META_DATA = load_prompt("meta_data")
 async def gen_metadata(chat_mdl, schema: dict, content: str):
    template = PROMPT_JINJA_ENV.from_string(META_DATA)
    for k, desc in schema["properties"].items():
@ -890,3 +892,34 @@ async def gen_metadata(chat_mdl, schema: dict, content: str):
    _, msg = message_fit_in(form_message(system_prompt, user_prompt), chat_mdl.max_length)
    ans = await chat_mdl.async_chat(msg[0]["content"], msg[1:])
    return re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
 SUFFICIENCY_CHECK = load_prompt("sufficiency_check")
 async def sufficiency_check(chat_mdl, question: str, ret_content: str):
    try:
        return await gen_json(
            PROMPT_JINJA_ENV.from_string(SUFFICIENCY_CHECK).render(question=question, retrieved_docs=ret_content),
            "Output:\n",
            chat_mdl
        )
    except Exception as e:
        logging.exception(e)
    return {}
 MULTI_QUERIES_GEN = load_prompt("multi_queries_gen")
 async def multi_queries_gen(chat_mdl, question: str, query:str, missing_infos:list[str], ret_content: str):
    try:
        return await gen_json(
            PROMPT_JINJA_ENV.from_string(MULTI_QUERIES_GEN).render(
                original_question=question,
                original_query=query,
                missing_info="\n - ".join(missing_infos),
                retrieved_docs=ret_content
            ),
            "Output:\n",
            chat_mdl
        )
    except Exception as e:
        logging.exception(e)
    return {}
--- a/rag/prompts/multi_queries_gen.md
+++ b/rag/prompts/multi_queries_gen.md
@ -0,0 +1,41 @@
 You are a query optimization expert. 
 The user's original query failed to retrieve sufficient information; 
 please generate multiple complementary improved questions and corresponding queries.
 Original query:
 {{ original_query }}
 Original question:
 {{ original_question }}
 Currently, retrieved content:
 {{ retrieved_docs }}
 Missing information:
 {{ missing_info }}
 Please generate 2-3 complementary queries to help find the missing information. These queries should:
 1. Focus on different missing information points.
 2. Use different expressions.
 3. Avoid being identical to the original query.
 4. Remain concise and clear.
 Output format (JSON):
 ```json
 {
    "reasoning": "Explanation of query generation strategy",
    "questions": [
        {"question": "Improved question 1", "query": "Improved query 1"},
        {"question": "Improved question 2", "query": "Improved query 2"},
        {"question": "Improved question 3", "query": "Improved query 3"}
    ]
 }
 ```
 Requirements:
 1. Questions array contains 1-3 questions and corresponding queries.
 2. Each question length is between 5-200 characters.
 3. Each query length is between 1-5 keywords.
 4. Each query MUST be in the same language as the retrieved content in. 
 5. DO NOT generate question and query that is similar to the original query. 
 6. Reasoning explains the generation strategy.
--- a/rag/prompts/sufficiency_check.md
+++ b/rag/prompts/sufficiency_check.md
@ -0,0 +1,24 @@
 You are a information retrieval evaluation expert. Please assess whether the currently retrieved content is sufficient to answer the user's question.
 User question:
 {{ question }}
 Retrieved content:
 {{ retrieved_docs }}
 Please determine whether these content are sufficient to answer the user's question.
 Output format (JSON):
 ```json
 {
    "is_sufficient": true/false,
    "reasoning": "Your reasoning for the judgment",
    "missing_information": ["Missing information 1", "Missing information 2"]
 }
 ```
 Requirements:
 1. If the retrieved content contains key information needed to answer the query, judge as sufficient (true).
 2. If key information is missing, judge as insufficient (false), and list the missing information.
 3. The `reasoning` should be concise and clear.
 4. The `missing_information` should only be filled when insufficient, otherwise empty array.
--- a/uv.lock
+++ b/uv.lock
		`@ -1 +0,0 @@`
			`from .deep_research import DeepResearcher as DeepResearcher`