Feat: support tree structured deep-research policy. (#12559)

### What problem does this PR solve?

#12558
### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Kevin Hu
2026-01-13 09:41:35 +08:00
committed by GitHub
parent 867ec94258
commit 44bada64c9
15 changed files with 1166 additions and 1381 deletions

View File

@ -187,7 +187,6 @@ COPY deepdoc deepdoc
COPY rag rag
COPY agent agent
COPY graphrag graphrag
COPY agentic_reasoning agentic_reasoning
COPY pyproject.toml uv.lock ./
COPY mcp mcp
COPY plugin plugin

View File

@ -1 +0,0 @@
from .deep_research import DeepResearcher as DeepResearcher

View File

@ -1,311 +0,0 @@
#
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import logging
import re
from functools import partial
from agentic_reasoning.prompts import BEGIN_SEARCH_QUERY, BEGIN_SEARCH_RESULT, END_SEARCH_RESULT, MAX_SEARCH_LIMIT, \
END_SEARCH_QUERY, REASON_PROMPT, RELEVANT_EXTRACTION_PROMPT
from api.db.services.llm_service import LLMBundle
from rag.nlp import extract_between
from rag.prompts import kb_prompt
from rag.utils.tavily_conn import Tavily
class DeepResearcher:
def __init__(self,
chat_mdl: LLMBundle,
prompt_config: dict,
kb_retrieve: partial = None,
kg_retrieve: partial = None
):
self.chat_mdl = chat_mdl
self.prompt_config = prompt_config
self._kb_retrieve = kb_retrieve
self._kg_retrieve = kg_retrieve
def _remove_tags(text: str, start_tag: str, end_tag: str) -> str:
"""Remove tags but keep the content between them."""
if not text:
return text
text = re.sub(re.escape(start_tag), "", text)
return re.sub(re.escape(end_tag), "", text)
@staticmethod
def _remove_query_tags(text: str) -> str:
"""Remove Query Tags"""
return DeepResearcher._remove_tags(text, BEGIN_SEARCH_QUERY, END_SEARCH_QUERY)
@staticmethod
def _remove_result_tags(text: str) -> str:
"""Remove Result Tags"""
return DeepResearcher._remove_tags(text, BEGIN_SEARCH_RESULT, END_SEARCH_RESULT)
async def _generate_reasoning(self, msg_history):
"""Generate reasoning steps (delta output)"""
raw_answer = ""
cleaned_answer = ""
if msg_history[-1]["role"] != "user":
msg_history.append({"role": "user", "content": "Continues reasoning with the new information.\n"})
else:
msg_history[-1]["content"] += "\n\nContinues reasoning with the new information.\n"
async for delta in self.chat_mdl.async_chat_streamly_delta(REASON_PROMPT, msg_history, {"temperature": 0.7}):
if not delta:
continue
raw_answer += delta
cleaned_full = re.sub(r"^.*</think>", "", raw_answer, flags=re.DOTALL)
if not cleaned_full:
continue
if cleaned_full.startswith(cleaned_answer):
delta_clean = cleaned_full[len(cleaned_answer):]
else:
delta_clean = cleaned_full
if not delta_clean:
continue
cleaned_answer = cleaned_full
yield delta_clean
def _extract_search_queries(self, query_think, question, step_index):
"""Extract search queries from thinking"""
queries = extract_between(query_think, BEGIN_SEARCH_QUERY, END_SEARCH_QUERY)
if not queries and step_index == 0:
# If this is the first step and no queries are found, use the original question as the query
queries = [question]
return queries
def _truncate_previous_reasoning(self, all_reasoning_steps):
"""Truncate previous reasoning steps to maintain a reasonable length"""
truncated_prev_reasoning = ""
for i, step in enumerate(all_reasoning_steps):
truncated_prev_reasoning += f"Step {i + 1}: {step}\n\n"
prev_steps = truncated_prev_reasoning.split('\n\n')
if len(prev_steps) <= 5:
truncated_prev_reasoning = '\n\n'.join(prev_steps)
else:
truncated_prev_reasoning = ''
for i, step in enumerate(prev_steps):
if i == 0 or i >= len(prev_steps) - 4 or BEGIN_SEARCH_QUERY in step or BEGIN_SEARCH_RESULT in step:
truncated_prev_reasoning += step + '\n\n'
else:
if truncated_prev_reasoning[-len('\n\n...\n\n'):] != '\n\n...\n\n':
truncated_prev_reasoning += '...\n\n'
return truncated_prev_reasoning.strip('\n')
def _retrieve_information(self, search_query):
"""Retrieve information from different sources"""
# 1. Knowledge base retrieval
kbinfos = []
try:
kbinfos = self._kb_retrieve(question=search_query) if self._kb_retrieve else {"chunks": [], "doc_aggs": []}
except Exception as e:
logging.error(f"Knowledge base retrieval error: {e}")
# 2. Web retrieval (if Tavily API is configured)
try:
if self.prompt_config.get("tavily_api_key"):
tav = Tavily(self.prompt_config["tavily_api_key"])
tav_res = tav.retrieve_chunks(search_query)
kbinfos["chunks"].extend(tav_res["chunks"])
kbinfos["doc_aggs"].extend(tav_res["doc_aggs"])
except Exception as e:
logging.error(f"Web retrieval error: {e}")
# 3. Knowledge graph retrieval (if configured)
try:
if self.prompt_config.get("use_kg") and self._kg_retrieve:
ck = self._kg_retrieve(question=search_query)
if ck["content_with_weight"]:
kbinfos["chunks"].insert(0, ck)
except Exception as e:
logging.error(f"Knowledge graph retrieval error: {e}")
return kbinfos
def _update_chunk_info(self, chunk_info, kbinfos):
"""Update chunk information for citations"""
if not chunk_info["chunks"]:
# If this is the first retrieval, use the retrieval results directly
for k in chunk_info.keys():
chunk_info[k] = kbinfos[k]
else:
# Merge newly retrieved information, avoiding duplicates
cids = [c["chunk_id"] for c in chunk_info["chunks"]]
for c in kbinfos["chunks"]:
if c["chunk_id"] not in cids:
chunk_info["chunks"].append(c)
dids = [d["doc_id"] for d in chunk_info["doc_aggs"]]
for d in kbinfos["doc_aggs"]:
if d["doc_id"] not in dids:
chunk_info["doc_aggs"].append(d)
async def _extract_relevant_info(self, truncated_prev_reasoning, search_query, kbinfos):
"""Extract and summarize relevant information (delta output)"""
raw_answer = ""
cleaned_answer = ""
async for delta in self.chat_mdl.async_chat_streamly_delta(
RELEVANT_EXTRACTION_PROMPT.format(
prev_reasoning=truncated_prev_reasoning,
search_query=search_query,
document="\n".join(kb_prompt(kbinfos, 4096))
),
[{"role": "user",
"content": f'Now you should analyze each web page and find helpful information based on the current search query "{search_query}" and previous reasoning steps.'}],
{"temperature": 0.7}):
if not delta:
continue
raw_answer += delta
cleaned_full = re.sub(r"^.*</think>", "", raw_answer, flags=re.DOTALL)
if not cleaned_full:
continue
if cleaned_full.startswith(cleaned_answer):
delta_clean = cleaned_full[len(cleaned_answer):]
else:
delta_clean = cleaned_full
if not delta_clean:
continue
cleaned_answer = cleaned_full
yield delta_clean
async def thinking(self, chunk_info: dict, question: str):
executed_search_queries = []
msg_history = [{"role": "user", "content": f'Question:\"{question}\"\n'}]
all_reasoning_steps = []
think = "<think>"
last_idx = 0
endswith_think = False
last_full = ""
def emit_delta(full_text: str):
nonlocal last_idx, endswith_think, last_full
if full_text == last_full:
return None
last_full = full_text
delta_ans = full_text[last_idx:]
if delta_ans.find("<think>") == 0:
last_idx += len("<think>")
delta = "<think>"
elif delta_ans.find("<think>") > 0:
delta = full_text[last_idx:last_idx + delta_ans.find("<think>")]
last_idx += delta_ans.find("<think>")
elif delta_ans.endswith("</think>"):
endswith_think = True
delta = re.sub(r"(<think>|</think>)", "", delta_ans)
elif endswith_think:
endswith_think = False
delta = "</think>"
else:
last_idx = len(full_text)
if full_text.endswith("</think>"):
last_idx -= len("</think>")
delta = re.sub(r"(<think>|</think>)", "", delta_ans)
if not delta:
return None
if delta == "<think>":
return {"answer": "", "reference": {}, "audio_binary": None, "final": False, "start_to_think": True}
if delta == "</think>":
return {"answer": "", "reference": {}, "audio_binary": None, "final": False, "end_to_think": True}
return {"answer": delta, "reference": {}, "audio_binary": None, "final": False}
def flush_think_close():
nonlocal endswith_think
if endswith_think:
endswith_think = False
return {"answer": "", "reference": {}, "audio_binary": None, "final": False, "end_to_think": True}
return None
for step_index in range(MAX_SEARCH_LIMIT + 1):
# Check if the maximum search limit has been reached
if step_index == MAX_SEARCH_LIMIT - 1:
summary_think = f"\n{BEGIN_SEARCH_RESULT}\nThe maximum search limit is exceeded. You are not allowed to search.\n{END_SEARCH_RESULT}\n"
payload = emit_delta(think + summary_think)
if payload:
yield payload
all_reasoning_steps.append(summary_think)
msg_history.append({"role": "assistant", "content": summary_think})
break
# Step 1: Generate reasoning
query_think = ""
async for delta in self._generate_reasoning(msg_history):
query_think += delta
payload = emit_delta(think + self._remove_query_tags(query_think))
if payload:
yield payload
think += self._remove_query_tags(query_think)
all_reasoning_steps.append(query_think)
# Step 2: Extract search queries
queries = self._extract_search_queries(query_think, question, step_index)
if not queries and step_index > 0:
# If not the first step and no queries, end the search process
break
# Process each search query
for search_query in queries:
msg_history.append({"role": "assistant", "content": search_query})
think += f"\n\n> {step_index + 1}. {search_query}\n\n"
payload = emit_delta(think)
if payload:
yield payload
# Check if the query has already been executed
if search_query in executed_search_queries:
summary_think = f"\n{BEGIN_SEARCH_RESULT}\nYou have searched this query. Please refer to previous results.\n{END_SEARCH_RESULT}\n"
payload = emit_delta(think + summary_think)
if payload:
yield payload
all_reasoning_steps.append(summary_think)
msg_history.append({"role": "user", "content": summary_think})
think += summary_think
continue
executed_search_queries.append(search_query)
# Step 3: Truncate previous reasoning steps
truncated_prev_reasoning = self._truncate_previous_reasoning(all_reasoning_steps)
# Step 4: Retrieve information
kbinfos = self._retrieve_information(search_query)
# Step 5: Update chunk information
self._update_chunk_info(chunk_info, kbinfos)
# Step 6: Extract relevant information
think += "\n\n"
summary_think = ""
async for delta in self._extract_relevant_info(truncated_prev_reasoning, search_query, kbinfos):
summary_think += delta
payload = emit_delta(think + self._remove_result_tags(summary_think))
if payload:
yield payload
all_reasoning_steps.append(summary_think)
msg_history.append(
{"role": "user", "content": f"\n\n{BEGIN_SEARCH_RESULT}{summary_think}{END_SEARCH_RESULT}\n\n"})
think += self._remove_result_tags(summary_think)
final_payload = emit_delta(think + "</think>")
if final_payload:
yield final_payload
close_payload = flush_think_close()
if close_payload:
yield close_payload

View File

@ -1,147 +0,0 @@
#
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
BEGIN_SEARCH_QUERY = "<|begin_search_query|>"
END_SEARCH_QUERY = "<|end_search_query|>"
BEGIN_SEARCH_RESULT = "<|begin_search_result|>"
END_SEARCH_RESULT = "<|end_search_result|>"
MAX_SEARCH_LIMIT = 6
REASON_PROMPT = f"""You are an advanced reasoning agent. Your goal is to answer the user's question by breaking it down into a series of verifiable steps.
You have access to a powerful search tool to find information.
**Your Task:**
1. Analyze the user's question.
2. If you need information, issue a search query to find a specific fact.
3. Review the search results.
4. Repeat the search process until you have all the facts needed to answer the question.
5. Once you have gathered sufficient information, synthesize the facts and provide the final answer directly.
**Tool Usage:**
- To search, you MUST write your query between the special tokens: {BEGIN_SEARCH_QUERY}your query{END_SEARCH_QUERY}.
- The system will provide results between {BEGIN_SEARCH_RESULT}search results{END_SEARCH_RESULT}.
- You have a maximum of {MAX_SEARCH_LIMIT} search attempts.
---
**Example 1: Multi-hop Question**
**Question:** "Are both the directors of Jaws and Casino Royale from the same country?"
**Your Thought Process & Actions:**
First, I need to identify the director of Jaws.
{BEGIN_SEARCH_QUERY}who is the director of Jaws?{END_SEARCH_QUERY}
[System returns search results]
{BEGIN_SEARCH_RESULT}
Jaws is a 1975 American thriller film directed by Steven Spielberg.
{END_SEARCH_RESULT}
Okay, the director of Jaws is Steven Spielberg. Now I need to find out his nationality.
{BEGIN_SEARCH_QUERY}where is Steven Spielberg from?{END_SEARCH_QUERY}
[System returns search results]
{BEGIN_SEARCH_RESULT}
Steven Allan Spielberg is an American filmmaker. Born in Cincinnati, Ohio...
{END_SEARCH_RESULT}
So, Steven Spielberg is from the USA. Next, I need to find the director of Casino Royale.
{BEGIN_SEARCH_QUERY}who is the director of Casino Royale 2006?{END_SEARCH_QUERY}
[System returns search results]
{BEGIN_SEARCH_RESULT}
Casino Royale is a 2006 spy film directed by Martin Campbell.
{END_SEARCH_RESULT}
The director of Casino Royale is Martin Campbell. Now I need his nationality.
{BEGIN_SEARCH_QUERY}where is Martin Campbell from?{END_SEARCH_QUERY}
[System returns search results]
{BEGIN_SEARCH_RESULT}
Martin Campbell (born 24 October 1943) is a New Zealand film and television director.
{END_SEARCH_RESULT}
I have all the information. Steven Spielberg is from the USA, and Martin Campbell is from New Zealand. They are not from the same country.
Final Answer: No, the directors of Jaws and Casino Royale are not from the same country. Steven Spielberg is from the USA, and Martin Campbell is from New Zealand.
---
**Example 2: Simple Fact Retrieval**
**Question:** "When was the founder of craigslist born?"
**Your Thought Process & Actions:**
First, I need to know who founded craigslist.
{BEGIN_SEARCH_QUERY}who founded craigslist?{END_SEARCH_QUERY}
[System returns search results]
{BEGIN_SEARCH_RESULT}
Craigslist was founded in 1995 by Craig Newmark.
{END_SEARCH_RESULT}
The founder is Craig Newmark. Now I need his birth date.
{BEGIN_SEARCH_QUERY}when was Craig Newmark born?{END_SEARCH_QUERY}
[System returns search results]
{BEGIN_SEARCH_RESULT}
Craig Newmark was born on December 6, 1952.
{END_SEARCH_RESULT}
I have found the answer.
Final Answer: The founder of craigslist, Craig Newmark, was born on December 6, 1952.
---
**Important Rules:**
- **One Fact at a Time:** Decompose the problem and issue one search query at a time to find a single, specific piece of information.
- **Be Precise:** Formulate clear and precise search queries. If a search fails, rephrase it.
- **Synthesize at the End:** Do not provide the final answer until you have completed all necessary searches.
- **Language Consistency:** Your search queries should be in the same language as the user's question.
Now, begin your work. Please answer the following question by thinking step-by-step.
"""
RELEVANT_EXTRACTION_PROMPT = """You are a highly efficient information extraction module. Your sole purpose is to extract the single most relevant piece of information from the provided `Searched Web Pages` that directly answers the `Current Search Query`.
**Your Task:**
1. Read the `Current Search Query` to understand what specific information is needed.
2. Scan the `Searched Web Pages` to find the answer to that query.
3. Extract only the essential, factual information that answers the query. Be concise.
**Context (For Your Information Only):**
The `Previous Reasoning Steps` are provided to give you context on the overall goal, but your primary focus MUST be on answering the `Current Search Query`. Do not use information from the previous steps in your output.
**Output Format:**
Your response must follow one of two formats precisely.
1. **If a direct and relevant answer is found:**
- Start your response immediately with `Final Information`.
- Provide only the extracted fact(s). Do not add any extra conversational text.
*Example:*
`Current Search Query`: Where is Martin Campbell from?
`Searched Web Pages`: [Long article snippet about Martin Campbell's career, which includes the sentence "Martin Campbell (born 24 October 1943) is a New Zealand film and television director..."]
*Your Output:*
Final Information
Martin Campbell is a New Zealand film and television director.
2. **If no relevant answer that directly addresses the query is found in the web pages:**
- Start your response immediately with `Final Information`.
- Write the exact phrase: `No helpful information found.`
---
**BEGIN TASK**
**Inputs:**
- **Previous Reasoning Steps:**
{prev_reasoning}
- **Current Search Query:**
{search_query}
- **Searched Web Pages:**
{document}
"""

View File

@ -174,6 +174,7 @@ async def update_metadata_setting():
message="Database error (Knowledgebase rename)!")
kb = kb.to_dict()
kb["parser_config"]["metadata"] = req["metadata"]
kb["parser_config"]["enable_metadata"] = req.get("enable_metadata", True)
KnowledgebaseService.update_by_id(kb["id"], kb)
return get_json_result(data=kb)

View File

@ -64,6 +64,7 @@ class ConversationService(CommonService):
offset += limit
return res
def structure_answer(conv, ans, message_id, session_id):
reference = ans["reference"]
if not isinstance(reference, dict):
@ -107,6 +108,7 @@ def structure_answer(conv, ans, message_id, session_id):
conv.reference[-1] = reference
return ans
async def async_completion(tenant_id, chat_id, question, name="New session", session_id=None, stream=True, **kwargs):
assert name, "`name` can not be empty."
dia = DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value)

View File

@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import asyncio
import binascii
import logging
import re
@ -23,7 +24,6 @@ from functools import partial
from timeit import default_timer as timer
from langfuse import Langfuse
from peewee import fn
from agentic_reasoning import DeepResearcher
from api.db.services.file_service import FileService
from common.constants import LLMType, ParserType, StatusEnum
from api.db.db_models import DB, Dialog
@ -36,6 +36,7 @@ from common.metadata_utils import apply_meta_data_filter
from api.db.services.tenant_llm_service import TenantLLMService
from common.time_utils import current_timestamp, datetime_format
from graphrag.general.mind_map_extractor import MindMapExtractor
from rag.advanced_rag import DeepResearcher
from rag.app.resume import forbidden_select_fields4resume
from rag.app.tag import label_question
from rag.nlp.search import index_name
@ -380,16 +381,35 @@ async def async_chat(dialog, messages, stream=True, **kwargs):
doc_ids=attachments,
),
)
queue = asyncio.Queue()
async def callback(msg:str):
nonlocal queue
await queue.put(msg + "<br/>")
await callback("<START_DEEP_RESEARCH>")
task = asyncio.create_task(reasoner.research(kbinfos, questions[-1], questions[-1], callback=callback))
while True:
msg = await queue.get()
if msg.find("<START_DEEP_RESEARCH>") == 0:
yield {"answer": "", "reference": {}, "audio_binary": None, "final": False, "start_to_think": True}
elif msg.find("<END_DEEP_RESEARCH>") == 0:
yield {"answer": "", "reference": {}, "audio_binary": None, "final": False, "end_to_think": True}
break
else:
yield {"answer": msg, "reference": {}, "audio_binary": None, "final": False}
await task
'''
async for think in reasoner.thinking(kbinfos, attachments_ + " ".join(questions)):
if isinstance(think, str):
thought = think
knowledges = [t for t in think.split("\n") if t]
elif stream:
yield think
'''
else:
if embd_mdl:
kbinfos = retriever.retrieval(
kbinfos = await asyncio.to_thread(retriever.retrieval,
" ".join(questions),
embd_mdl,
tenant_ids,
@ -420,8 +440,7 @@ async def async_chat(dialog, messages, stream=True, **kwargs):
if ck["content_with_weight"]:
kbinfos["chunks"].insert(0, ck)
knowledges = kb_prompt(kbinfos, max_tokens)
knowledges = kb_prompt(kbinfos, max_tokens)
logging.debug("{}->{}".format(" ".join(questions), "\n->".join(knowledges)))
retrieval_ts = timer()

View File

@ -177,7 +177,6 @@ url = "https://pypi.tuna.tsinghua.edu.cn/simple"
[tool.setuptools]
packages = [
'agent',
'agentic_reasoning',
'api',
'deepdoc',
'graphrag',

View File

@ -0,0 +1,20 @@
#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from .tree_structured_query_decomposition_retrieval import TreeStructuredQueryDecompositionRetrieval as DeepResearcher
__all__ = ['DeepResearcher']

View File

@ -0,0 +1,126 @@
#
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import asyncio
import logging
from functools import partial
from api.db.services.llm_service import LLMBundle
from rag.prompts import kb_prompt
from rag.prompts.generator import sufficiency_check, multi_queries_gen
from rag.utils.tavily_conn import Tavily
from timeit import default_timer as timer
class TreeStructuredQueryDecompositionRetrieval:
def __init__(self,
chat_mdl: LLMBundle,
prompt_config: dict,
kb_retrieve: partial = None,
kg_retrieve: partial = None
):
self.chat_mdl = chat_mdl
self.prompt_config = prompt_config
self._kb_retrieve = kb_retrieve
self._kg_retrieve = kg_retrieve
self._lock = asyncio.Lock()
def _retrieve_information(self, search_query):
"""Retrieve information from different sources"""
# 1. Knowledge base retrieval
kbinfos = []
try:
kbinfos = self._kb_retrieve(question=search_query) if self._kb_retrieve else {"chunks": [], "doc_aggs": []}
except Exception as e:
logging.error(f"Knowledge base retrieval error: {e}")
# 2. Web retrieval (if Tavily API is configured)
try:
if self.prompt_config.get("tavily_api_key"):
tav = Tavily(self.prompt_config["tavily_api_key"])
tav_res = tav.retrieve_chunks(search_query)
kbinfos["chunks"].extend(tav_res["chunks"])
kbinfos["doc_aggs"].extend(tav_res["doc_aggs"])
except Exception as e:
logging.error(f"Web retrieval error: {e}")
# 3. Knowledge graph retrieval (if configured)
try:
if self.prompt_config.get("use_kg") and self._kg_retrieve:
ck = self._kg_retrieve(question=search_query)
if ck["content_with_weight"]:
kbinfos["chunks"].insert(0, ck)
except Exception as e:
logging.error(f"Knowledge graph retrieval error: {e}")
return kbinfos
async def _async_update_chunk_info(self, chunk_info, kbinfos):
async with self._lock:
"""Update chunk information for citations"""
if not chunk_info["chunks"]:
# If this is the first retrieval, use the retrieval results directly
for k in chunk_info.keys():
chunk_info[k] = kbinfos[k]
else:
# Merge newly retrieved information, avoiding duplicates
cids = [c["chunk_id"] for c in chunk_info["chunks"]]
for c in kbinfos["chunks"]:
if c["chunk_id"] not in cids:
chunk_info["chunks"].append(c)
dids = [d["doc_id"] for d in chunk_info["doc_aggs"]]
for d in kbinfos["doc_aggs"]:
if d["doc_id"] not in dids:
chunk_info["doc_aggs"].append(d)
async def research(self, chunk_info, question, query, depth=3, callback=None):
if callback:
await callback("<START_DEEP_RESEARCH>")
await self._research(chunk_info, question, query, depth, callback)
if callback:
await callback("<END_DEEP_RESEARCH>")
async def _research(self, chunk_info, question, query, depth=3, callback=None):
if depth == 0:
#if callback:
# await callback("Reach the max search depth.")
return ""
if callback:
await callback(f"Searching by `{query}`...")
st = timer()
ret = self._retrieve_information(query)
if callback:
await callback("Retrieval %d results by %.1fms"%(len(ret["chunks"]), (timer()-st)*1000))
await self._async_update_chunk_info(chunk_info, ret)
ret = kb_prompt(ret, self.chat_mdl.max_length*0.5)
if callback:
await callback("Checking the sufficiency for retrieved information.")
suff = await sufficiency_check(self.chat_mdl, question, ret)
if suff["is_sufficient"]:
if callback:
await callback("Yes, it's sufficient.")
return ret
#if callback:
# await callback("The retrieved information is not sufficient. Planing next steps...")
succ_question_info = await multi_queries_gen(self.chat_mdl, question, query, suff["missing_information"], ret)
if callback:
await callback("Next step is to search for the following questions:\n" + "\n - ".join(step["question"] for step in succ_question_info["questions"]))
steps = []
for step in succ_question_info["questions"]:
steps.append(asyncio.create_task(self._research(chunk_info, step["question"], step["query"], depth-1, callback)))
results = await asyncio.gather(*steps, return_exceptions=True)
return "\n".join([str(r) for r in results])

View File

@ -382,6 +382,7 @@ class Dealer:
# Ensure RERANK_LIMIT is multiple of page_size
RERANK_LIMIT = math.ceil(64 / page_size) * page_size if page_size > 1 else 1
RERANK_LIMIT = max(30, RERANK_LIMIT)
req = {
"kb_ids": kb_ids,
"doc_ids": doc_ids,

View File

@ -38,7 +38,7 @@ def get_value(d, k1, k2):
def chunks_format(reference):
if not reference or (reference is not dict):
if not reference or not isinstance(reference, dict):
return []
return [
{
@ -485,20 +485,26 @@ async def gen_meta_filter(chat_mdl, meta_data: dict, query: str) -> dict:
return {"conditions": []}
async def gen_json(system_prompt: str, user_prompt: str, chat_mdl, gen_conf=None):
async def gen_json(system_prompt: str, user_prompt: str, chat_mdl, gen_conf={}, max_retry=2):
from graphrag.utils import get_llm_cache, set_llm_cache
cached = get_llm_cache(chat_mdl.llm_name, system_prompt, user_prompt, gen_conf)
if cached:
return json_repair.loads(cached)
_, msg = message_fit_in(form_message(system_prompt, user_prompt), chat_mdl.max_length)
ans = await chat_mdl.async_chat(msg[0]["content"], msg[1:], gen_conf=gen_conf)
ans = re.sub(r"(^.*</think>|```json\n|```\n*$)", "", ans, flags=re.DOTALL)
try:
res = json_repair.loads(ans)
set_llm_cache(chat_mdl.llm_name, system_prompt, ans, user_prompt, gen_conf)
return res
except Exception:
logging.exception(f"Loading json failure: {ans}")
err = ""
ans = ""
for _ in range(max_retry):
if ans and err:
msg[-1]["content"] += f"\nGenerated JSON is as following:\n{ans}\nBut exception while loading:\n{err}\nPlease reconsider and correct it."
ans = await chat_mdl.async_chat(msg[0]["content"], msg[1:], gen_conf=gen_conf)
ans = re.sub(r"(^.*</think>|```json\n|```\n*$)", "", ans, flags=re.DOTALL)
try:
res = json_repair.loads(ans)
set_llm_cache(chat_mdl.llm_name, system_prompt, ans, user_prompt, gen_conf)
return res
except Exception as e:
logging.exception(f"Loading json failure: {ans}")
err += str(e)
TOC_DETECTION = load_prompt("toc_detection")
@ -847,8 +853,6 @@ async def run_toc_from_text(chunks, chat_mdl, callback=None):
TOC_RELEVANCE_SYSTEM = load_prompt("toc_relevance_system")
TOC_RELEVANCE_USER = load_prompt("toc_relevance_user")
async def relevant_chunks_with_toc(query: str, toc: list[dict], chat_mdl, topn: int = 6):
import numpy as np
try:
@ -876,8 +880,6 @@ async def relevant_chunks_with_toc(query: str, toc: list[dict], chat_mdl, topn:
META_DATA = load_prompt("meta_data")
async def gen_metadata(chat_mdl, schema: dict, content: str):
template = PROMPT_JINJA_ENV.from_string(META_DATA)
for k, desc in schema["properties"].items():
@ -890,3 +892,34 @@ async def gen_metadata(chat_mdl, schema: dict, content: str):
_, msg = message_fit_in(form_message(system_prompt, user_prompt), chat_mdl.max_length)
ans = await chat_mdl.async_chat(msg[0]["content"], msg[1:])
return re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
SUFFICIENCY_CHECK = load_prompt("sufficiency_check")
async def sufficiency_check(chat_mdl, question: str, ret_content: str):
try:
return await gen_json(
PROMPT_JINJA_ENV.from_string(SUFFICIENCY_CHECK).render(question=question, retrieved_docs=ret_content),
"Output:\n",
chat_mdl
)
except Exception as e:
logging.exception(e)
return {}
MULTI_QUERIES_GEN = load_prompt("multi_queries_gen")
async def multi_queries_gen(chat_mdl, question: str, query:str, missing_infos:list[str], ret_content: str):
try:
return await gen_json(
PROMPT_JINJA_ENV.from_string(MULTI_QUERIES_GEN).render(
original_question=question,
original_query=query,
missing_info="\n - ".join(missing_infos),
retrieved_docs=ret_content
),
"Output:\n",
chat_mdl
)
except Exception as e:
logging.exception(e)
return {}

View File

@ -0,0 +1,41 @@
You are a query optimization expert.
The user's original query failed to retrieve sufficient information;
please generate multiple complementary improved questions and corresponding queries.
Original query:
{{ original_query }}
Original question:
{{ original_question }}
Currently, retrieved content:
{{ retrieved_docs }}
Missing information:
{{ missing_info }}
Please generate 2-3 complementary queries to help find the missing information. These queries should:
1. Focus on different missing information points.
2. Use different expressions.
3. Avoid being identical to the original query.
4. Remain concise and clear.
Output format (JSON):
```json
{
"reasoning": "Explanation of query generation strategy",
"questions": [
{"question": "Improved question 1", "query": "Improved query 1"},
{"question": "Improved question 2", "query": "Improved query 2"},
{"question": "Improved question 3", "query": "Improved query 3"}
]
}
```
Requirements:
1. Questions array contains 1-3 questions and corresponding queries.
2. Each question length is between 5-200 characters.
3. Each query length is between 1-5 keywords.
4. Each query MUST be in the same language as the retrieved content in.
5. DO NOT generate question and query that is similar to the original query.
6. Reasoning explains the generation strategy.

View File

@ -0,0 +1,24 @@
You are a information retrieval evaluation expert. Please assess whether the currently retrieved content is sufficient to answer the user's question.
User question:
{{ question }}
Retrieved content:
{{ retrieved_docs }}
Please determine whether these content are sufficient to answer the user's question.
Output format (JSON):
```json
{
"is_sufficient": true/false,
"reasoning": "Your reasoning for the judgment",
"missing_information": ["Missing information 1", "Missing information 2"]
}
```
Requirements:
1. If the retrieved content contains key information needed to answer the query, judge as sufficient (true).
2. If key information is missing, judge as insufficient (false), and list the missing information.
3. The `reasoning` should be concise and clear.
4. The `missing_information` should only be filled when insufficient, otherwise empty array.

1783
uv.lock generated

File diff suppressed because it is too large Load Diff