Feat: support tree structured deep-research policy. (#12559)

### What problem does this PR solve?

#12558
### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Kevin Hu
2026-01-13 09:41:35 +08:00
committed by GitHub
parent 867ec94258
commit 44bada64c9
15 changed files with 1166 additions and 1381 deletions

View File

@ -38,7 +38,7 @@ def get_value(d, k1, k2):
def chunks_format(reference):
if not reference or (reference is not dict):
if not reference or not isinstance(reference, dict):
return []
return [
{
@ -485,20 +485,26 @@ async def gen_meta_filter(chat_mdl, meta_data: dict, query: str) -> dict:
return {"conditions": []}
async def gen_json(system_prompt: str, user_prompt: str, chat_mdl, gen_conf=None):
async def gen_json(system_prompt: str, user_prompt: str, chat_mdl, gen_conf={}, max_retry=2):
from graphrag.utils import get_llm_cache, set_llm_cache
cached = get_llm_cache(chat_mdl.llm_name, system_prompt, user_prompt, gen_conf)
if cached:
return json_repair.loads(cached)
_, msg = message_fit_in(form_message(system_prompt, user_prompt), chat_mdl.max_length)
ans = await chat_mdl.async_chat(msg[0]["content"], msg[1:], gen_conf=gen_conf)
ans = re.sub(r"(^.*</think>|```json\n|```\n*$)", "", ans, flags=re.DOTALL)
try:
res = json_repair.loads(ans)
set_llm_cache(chat_mdl.llm_name, system_prompt, ans, user_prompt, gen_conf)
return res
except Exception:
logging.exception(f"Loading json failure: {ans}")
err = ""
ans = ""
for _ in range(max_retry):
if ans and err:
msg[-1]["content"] += f"\nGenerated JSON is as following:\n{ans}\nBut exception while loading:\n{err}\nPlease reconsider and correct it."
ans = await chat_mdl.async_chat(msg[0]["content"], msg[1:], gen_conf=gen_conf)
ans = re.sub(r"(^.*</think>|```json\n|```\n*$)", "", ans, flags=re.DOTALL)
try:
res = json_repair.loads(ans)
set_llm_cache(chat_mdl.llm_name, system_prompt, ans, user_prompt, gen_conf)
return res
except Exception as e:
logging.exception(f"Loading json failure: {ans}")
err += str(e)
TOC_DETECTION = load_prompt("toc_detection")
@ -847,8 +853,6 @@ async def run_toc_from_text(chunks, chat_mdl, callback=None):
TOC_RELEVANCE_SYSTEM = load_prompt("toc_relevance_system")
TOC_RELEVANCE_USER = load_prompt("toc_relevance_user")
async def relevant_chunks_with_toc(query: str, toc: list[dict], chat_mdl, topn: int = 6):
import numpy as np
try:
@ -876,8 +880,6 @@ async def relevant_chunks_with_toc(query: str, toc: list[dict], chat_mdl, topn:
META_DATA = load_prompt("meta_data")
async def gen_metadata(chat_mdl, schema: dict, content: str):
template = PROMPT_JINJA_ENV.from_string(META_DATA)
for k, desc in schema["properties"].items():
@ -890,3 +892,34 @@ async def gen_metadata(chat_mdl, schema: dict, content: str):
_, msg = message_fit_in(form_message(system_prompt, user_prompt), chat_mdl.max_length)
ans = await chat_mdl.async_chat(msg[0]["content"], msg[1:])
return re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
SUFFICIENCY_CHECK = load_prompt("sufficiency_check")
async def sufficiency_check(chat_mdl, question: str, ret_content: str):
try:
return await gen_json(
PROMPT_JINJA_ENV.from_string(SUFFICIENCY_CHECK).render(question=question, retrieved_docs=ret_content),
"Output:\n",
chat_mdl
)
except Exception as e:
logging.exception(e)
return {}
MULTI_QUERIES_GEN = load_prompt("multi_queries_gen")
async def multi_queries_gen(chat_mdl, question: str, query:str, missing_infos:list[str], ret_content: str):
try:
return await gen_json(
PROMPT_JINJA_ENV.from_string(MULTI_QUERIES_GEN).render(
original_question=question,
original_query=query,
missing_info="\n - ".join(missing_infos),
retrieved_docs=ret_content
),
"Output:\n",
chat_mdl
)
except Exception as e:
logging.exception(e)
return {}

View File

@ -0,0 +1,41 @@
You are a query optimization expert.
The user's original query failed to retrieve sufficient information;
please generate multiple complementary improved questions and corresponding queries.
Original query:
{{ original_query }}
Original question:
{{ original_question }}
Currently, retrieved content:
{{ retrieved_docs }}
Missing information:
{{ missing_info }}
Please generate 2-3 complementary queries to help find the missing information. These queries should:
1. Focus on different missing information points.
2. Use different expressions.
3. Avoid being identical to the original query.
4. Remain concise and clear.
Output format (JSON):
```json
{
"reasoning": "Explanation of query generation strategy",
"questions": [
{"question": "Improved question 1", "query": "Improved query 1"},
{"question": "Improved question 2", "query": "Improved query 2"},
{"question": "Improved question 3", "query": "Improved query 3"}
]
}
```
Requirements:
1. Questions array contains 1-3 questions and corresponding queries.
2. Each question length is between 5-200 characters.
3. Each query length is between 1-5 keywords.
4. Each query MUST be in the same language as the retrieved content in.
5. DO NOT generate question and query that is similar to the original query.
6. Reasoning explains the generation strategy.

View File

@ -0,0 +1,24 @@
You are a information retrieval evaluation expert. Please assess whether the currently retrieved content is sufficient to answer the user's question.
User question:
{{ question }}
Retrieved content:
{{ retrieved_docs }}
Please determine whether these content are sufficient to answer the user's question.
Output format (JSON):
```json
{
"is_sufficient": true/false,
"reasoning": "Your reasoning for the judgment",
"missing_information": ["Missing information 1", "Missing information 2"]
}
```
Requirements:
1. If the retrieved content contains key information needed to answer the query, judge as sufficient (true).
2. If key information is missing, judge as insufficient (false), and list the missing information.
3. The `reasoning` should be concise and clear.
4. The `missing_information` should only be filled when insufficient, otherwise empty array.