Feat: support tree structured deep-research policy. (#12559)

### What problem does this PR solve? #12558 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
2026-01-23 11:36:38 +08:00 · 2026-01-13 09:41:35 +08:00
parent 867ec94258
commit 44bada64c9
15 changed files with 1166 additions and 1381 deletions
--- a/api/db/services/conversation_service.py
+++ b/api/db/services/conversation_service.py
@ -64,6 +64,7 @@ class ConversationService(CommonService):
            offset += limit
        return res

+
 def structure_answer(conv, ans, message_id, session_id):
    reference = ans["reference"]
    if not isinstance(reference, dict):
@ -107,6 +108,7 @@ def structure_answer(conv, ans, message_id, session_id):
            conv.reference[-1] = reference
    return ans

+
 async def async_completion(tenant_id, chat_id, question, name="New session", session_id=None, stream=True, **kwargs):
    assert name, "`name` can not be empty."
    dia = DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value)
--- a/api/db/services/dialog_service.py
+++ b/api/db/services/dialog_service.py
@ -13,6 +13,7 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
+import asyncio
 import binascii
 import logging
 import re
@ -23,7 +24,6 @@ from functools import partial
 from timeit import default_timer as timer
 from langfuse import Langfuse
 from peewee import fn
-from agentic_reasoning import DeepResearcher
 from api.db.services.file_service import FileService
 from common.constants import LLMType, ParserType, StatusEnum
 from api.db.db_models import DB, Dialog
@ -36,6 +36,7 @@ from common.metadata_utils import apply_meta_data_filter
 from api.db.services.tenant_llm_service import TenantLLMService
 from common.time_utils import current_timestamp, datetime_format
 from graphrag.general.mind_map_extractor import MindMapExtractor
+from rag.advanced_rag import DeepResearcher
 from rag.app.resume import forbidden_select_fields4resume
 from rag.app.tag import label_question
 from rag.nlp.search import index_name
@ -380,16 +381,35 @@ async def async_chat(dialog, messages, stream=True, **kwargs):
                    doc_ids=attachments,
                ),
            )
+            queue = asyncio.Queue()
+            async def callback(msg:str):
+                nonlocal queue
+                await queue.put(msg + "<br/>")

+            await callback("<START_DEEP_RESEARCH>")
+            task = asyncio.create_task(reasoner.research(kbinfos, questions[-1], questions[-1], callback=callback))
+            while True:
+                msg = await queue.get()
+                if msg.find("<START_DEEP_RESEARCH>") == 0:
+                    yield {"answer": "", "reference": {}, "audio_binary": None, "final": False, "start_to_think": True}
+                elif msg.find("<END_DEEP_RESEARCH>") == 0:
+                    yield {"answer": "", "reference": {}, "audio_binary": None, "final": False, "end_to_think": True}
+                    break
+                else:
+                    yield {"answer": msg, "reference": {}, "audio_binary": None, "final": False}
+
+            await task
+            '''
            async for think in reasoner.thinking(kbinfos, attachments_ + " ".join(questions)):
                if isinstance(think, str):
                    thought = think
                    knowledges = [t for t in think.split("\n") if t]
                elif stream:
                    yield think
+            '''
        else:
            if embd_mdl:
-                kbinfos = retriever.retrieval(
+                kbinfos = await asyncio.to_thread(retriever.retrieval,
                    " ".join(questions),
                    embd_mdl,
                    tenant_ids,
@ -420,8 +440,7 @@ async def async_chat(dialog, messages, stream=True, **kwargs):
                if ck["content_with_weight"]:
                    kbinfos["chunks"].insert(0, ck)

-            knowledges = kb_prompt(kbinfos, max_tokens)
-
+    knowledges = kb_prompt(kbinfos, max_tokens)
    logging.debug("{}->{}".format(" ".join(questions), "\n->".join(knowledges)))

    retrieval_ts = timer()