Refa: cleanup synchronous functions in chat_model and implement synchronization for conversation and dialog chats (#11779)

### What problem does this PR solve? Cleanup synchronous functions in chat_model and implement synchronization for conversation and dialog chats. ### Type of change - [x] Refactoring - [x] Performance Improvement
2025-12-08 20:42:30 +08:00 · 2025-12-08 09:43:03 +08:00
parent 9b8971a9de
commit 51ec708c58
10 changed files with 421 additions and 843 deletions
--- a/api/apps/conversation_app.py
+++ b/api/apps/conversation_app.py
@ -23,7 +23,7 @@ from quart import Response, request
 from api.apps import current_user, login_required
 from api.db.db_models import APIToken
 from api.db.services.conversation_service import ConversationService, structure_answer
-from api.db.services.dialog_service import DialogService, ask, chat, gen_mindmap
+from api.db.services.dialog_service import DialogService, async_ask, async_chat, gen_mindmap
 from api.db.services.llm_service import LLMBundle
 from api.db.services.search_service import SearchService
 from api.db.services.tenant_llm_service import TenantLLMService
@ -218,10 +218,10 @@ async def completion():
            dia.llm_setting = chat_model_config

        is_embedded = bool(chat_model_id)
-        def stream():
+        async def stream():
            nonlocal dia, msg, req, conv
            try:
-                for ans in chat(dia, msg, True, **req):
+                async for ans in async_chat(dia, msg, True, **req):
                    ans = structure_answer(conv, ans, message_id, conv.id)
                    yield "data:" + json.dumps({"code": 0, "message": "", "data": ans}, ensure_ascii=False) + "\n\n"
                if not is_embedded:
@ -241,7 +241,7 @@ async def completion():

        else:
            answer = None
-            for ans in chat(dia, msg, **req):
+            async for ans in async_chat(dia, msg, **req):
                answer = structure_answer(conv, ans, message_id, conv.id)
                if not is_embedded:
                    ConversationService.update_by_id(conv.id, conv.to_dict())
@ -406,10 +406,10 @@ async def ask_about():
    if search_app:
        search_config = search_app.get("search_config", {})

-    def stream():
+    async def stream():
        nonlocal req, uid
        try:
-            for ans in ask(req["question"], req["kb_ids"], uid, search_config=search_config):
+            async for ans in async_ask(req["question"], req["kb_ids"], uid, search_config=search_config):
                yield "data:" + json.dumps({"code": 0, "message": "", "data": ans}, ensure_ascii=False) + "\n\n"
        except Exception as e:
            yield "data:" + json.dumps({"code": 500, "message": str(e), "data": {"answer": "**ERROR**: " + str(e), "reference": []}}, ensure_ascii=False) + "\n\n"
--- a/api/apps/langfuse_app.py
+++ b/api/apps/langfuse_app.py
@ -34,8 +34,9 @@ async def set_api_key():
    if not all([secret_key, public_key, host]):
        return get_error_data_result(message="Missing required fields")

+    current_user_id = current_user.id
    langfuse_keys = dict(
-        tenant_id=current_user.id,
+        tenant_id=current_user_id,
        secret_key=secret_key,
        public_key=public_key,
        host=host,
@ -45,23 +46,24 @@ async def set_api_key():
    if not langfuse.auth_check():
        return get_error_data_result(message="Invalid Langfuse keys")

-    langfuse_entry = TenantLangfuseService.filter_by_tenant(tenant_id=current_user.id)
+    langfuse_entry = TenantLangfuseService.filter_by_tenant(tenant_id=current_user_id)
    with DB.atomic():
        try:
            if not langfuse_entry:
                TenantLangfuseService.save(**langfuse_keys)
            else:
-                TenantLangfuseService.update_by_tenant(tenant_id=current_user.id, langfuse_keys=langfuse_keys)
+                TenantLangfuseService.update_by_tenant(tenant_id=current_user_id, langfuse_keys=langfuse_keys)
            return get_json_result(data=langfuse_keys)
        except Exception as e:
-            server_error_response(e)
+            return server_error_response(e)


@manager.route("/api_key", methods=["GET"])  # noqa: F821
@login_required
@validate_request()
 def get_api_key():
-    langfuse_entry = TenantLangfuseService.filter_by_tenant_with_info(tenant_id=current_user.id)
+    current_user_id = current_user.id
+    langfuse_entry = TenantLangfuseService.filter_by_tenant_with_info(tenant_id=current_user_id)
    if not langfuse_entry:
        return get_json_result(message="Have not record any Langfuse keys.")

@ -72,7 +74,7 @@ def get_api_key():
    except langfuse.api.core.api_error.ApiError as api_err:
        return get_json_result(message=f"Error from Langfuse: {api_err}")
    except Exception as e:
-        server_error_response(e)
+        return server_error_response(e)

    langfuse_entry["project_id"] = langfuse.api.projects.get().dict()["data"][0]["id"]
    langfuse_entry["project_name"] = langfuse.api.projects.get().dict()["data"][0]["name"]
@ -84,7 +86,8 @@ def get_api_key():
@login_required
@validate_request()
 def delete_api_key():
-    langfuse_entry = TenantLangfuseService.filter_by_tenant(tenant_id=current_user.id)
+    current_user_id = current_user.id
+    langfuse_entry = TenantLangfuseService.filter_by_tenant(tenant_id=current_user_id)
    if not langfuse_entry:
        return get_json_result(message="Have not record any Langfuse keys.")

@ -93,4 +96,4 @@ def delete_api_key():
            TenantLangfuseService.delete_model(langfuse_entry)
            return get_json_result(data=True)
        except Exception as e:
-            server_error_response(e)
+            return server_error_response(e)
--- a/api/apps/llm_app.py
+++ b/api/apps/llm_app.py
@ -74,7 +74,7 @@ async def set_api_key():
            assert factory in ChatModel, f"Chat model from {factory} is not supported yet."
            mdl = ChatModel[factory](req["api_key"], llm.llm_name, base_url=req.get("base_url"), **extra)
            try:
-                m, tc = mdl.chat(None, [{"role": "user", "content": "Hello! How are you doing!"}], {"temperature": 0.9, "max_tokens": 50})
+                m, tc = await mdl.async_chat(None, [{"role": "user", "content": "Hello! How are you doing!"}], {"temperature": 0.9, "max_tokens": 50})
                if m.find("**ERROR**") >= 0:
                    raise Exception(m)
                chat_passed = True
@ -217,7 +217,7 @@ async def add_llm():
            **extra,
        )
        try:
-            m, tc = mdl.chat(None, [{"role": "user", "content": "Hello! How are you doing!"}], {"temperature": 0.9})
+            m, tc = await mdl.async_chat(None, [{"role": "user", "content": "Hello! How are you doing!"}], {"temperature": 0.9})
            if not tc and m.find("**ERROR**:") >= 0:
                raise Exception(m)
        except Exception as e:
--- a/api/apps/sdk/session.py
+++ b/api/apps/sdk/session.py
@ -26,9 +26,10 @@ from api.db.db_models import APIToken
 from api.db.services.api_service import API4ConversationService
 from api.db.services.canvas_service import UserCanvasService, completion_openai
 from api.db.services.canvas_service import completion as agent_completion
-from api.db.services.conversation_service import ConversationService, iframe_completion
-from api.db.services.conversation_service import completion as rag_completion
-from api.db.services.dialog_service import DialogService, ask, chat, gen_mindmap, meta_filter
+from api.db.services.conversation_service import ConversationService
+from api.db.services.conversation_service import async_iframe_completion as iframe_completion
+from api.db.services.conversation_service import async_completion as rag_completion
+from api.db.services.dialog_service import DialogService, async_ask, async_chat, gen_mindmap, meta_filter
 from api.db.services.document_service import DocumentService
 from api.db.services.knowledgebase_service import KnowledgebaseService
 from api.db.services.llm_service import LLMBundle
@ -141,7 +142,7 @@ async def chat_completion(tenant_id, chat_id):
        return resp
    else:
        answer = None
-        for ans in rag_completion(tenant_id, chat_id, **req):
+        async for ans in rag_completion(tenant_id, chat_id, **req):
            answer = ans
            break
        return get_result(data=answer)
@ -245,7 +246,7 @@ async def chat_completion_openai_like(tenant_id, chat_id):
        # The value for the usage field on all chunks except for the last one will be null.
        # The usage field on the last chunk contains token usage statistics for the entire request.
        # The choices field on the last chunk will always be an empty array [].
-        def streamed_response_generator(chat_id, dia, msg):
+        async def streamed_response_generator(chat_id, dia, msg):
            token_used = 0
            answer_cache = ""
            reasoning_cache = ""
@ -274,7 +275,7 @@ async def chat_completion_openai_like(tenant_id, chat_id):
            }

            try:
-                for ans in chat(dia, msg, True, toolcall_session=toolcall_session, tools=tools, quote=need_reference):
+                async for ans in async_chat(dia, msg, True, toolcall_session=toolcall_session, tools=tools, quote=need_reference):
                    last_ans = ans
                    answer = ans["answer"]

@ -342,7 +343,7 @@ async def chat_completion_openai_like(tenant_id, chat_id):
        return resp
    else:
        answer = None
-        for ans in chat(dia, msg, False, toolcall_session=toolcall_session, tools=tools, quote=need_reference):
+        async for ans in async_chat(dia, msg, False, toolcall_session=toolcall_session, tools=tools, quote=need_reference):
            # focus answer content only
            answer = ans
            break
@ -733,10 +734,10 @@ async def ask_about(tenant_id):
            return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
    uid = tenant_id

-    def stream():
+    async def stream():
        nonlocal req, uid
        try:
-            for ans in ask(req["question"], req["kb_ids"], uid):
+            async for ans in async_ask(req["question"], req["kb_ids"], uid):
                yield "data:" + json.dumps({"code": 0, "message": "", "data": ans}, ensure_ascii=False) + "\n\n"
        except Exception as e:
            yield "data:" + json.dumps(
@ -827,7 +828,7 @@ async def chatbot_completions(dialog_id):
        resp.headers.add_header("Content-Type", "text/event-stream; charset=utf-8")
        return resp

-    for answer in iframe_completion(dialog_id, **req):
+    async for answer in iframe_completion(dialog_id, **req):
        return get_result(data=answer)


@ -918,10 +919,10 @@ async def ask_about_embedded():
        if search_app := SearchService.get_detail(search_id):
            search_config = search_app.get("search_config", {})

-    def stream():
+    async def stream():
        nonlocal req, uid
        try:
-            for ans in ask(req["question"], req["kb_ids"], uid, search_config=search_config):
+            async for ans in async_ask(req["question"], req["kb_ids"], uid, search_config=search_config):
                yield "data:" + json.dumps({"code": 0, "message": "", "data": ans}, ensure_ascii=False) + "\n\n"
        except Exception as e:
            yield "data:" + json.dumps(