mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Refa: cleanup synchronous functions in chat_model and implement synchronization for conversation and dialog chats (#11779)
### What problem does this PR solve? Cleanup synchronous functions in chat_model and implement synchronization for conversation and dialog chats. ### Type of change - [x] Refactoring - [x] Performance Improvement
This commit is contained in:
@ -23,7 +23,7 @@ from quart import Response, request
|
||||
from api.apps import current_user, login_required
|
||||
from api.db.db_models import APIToken
|
||||
from api.db.services.conversation_service import ConversationService, structure_answer
|
||||
from api.db.services.dialog_service import DialogService, ask, chat, gen_mindmap
|
||||
from api.db.services.dialog_service import DialogService, async_ask, async_chat, gen_mindmap
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from api.db.services.search_service import SearchService
|
||||
from api.db.services.tenant_llm_service import TenantLLMService
|
||||
@ -218,10 +218,10 @@ async def completion():
|
||||
dia.llm_setting = chat_model_config
|
||||
|
||||
is_embedded = bool(chat_model_id)
|
||||
def stream():
|
||||
async def stream():
|
||||
nonlocal dia, msg, req, conv
|
||||
try:
|
||||
for ans in chat(dia, msg, True, **req):
|
||||
async for ans in async_chat(dia, msg, True, **req):
|
||||
ans = structure_answer(conv, ans, message_id, conv.id)
|
||||
yield "data:" + json.dumps({"code": 0, "message": "", "data": ans}, ensure_ascii=False) + "\n\n"
|
||||
if not is_embedded:
|
||||
@ -241,7 +241,7 @@ async def completion():
|
||||
|
||||
else:
|
||||
answer = None
|
||||
for ans in chat(dia, msg, **req):
|
||||
async for ans in async_chat(dia, msg, **req):
|
||||
answer = structure_answer(conv, ans, message_id, conv.id)
|
||||
if not is_embedded:
|
||||
ConversationService.update_by_id(conv.id, conv.to_dict())
|
||||
@ -406,10 +406,10 @@ async def ask_about():
|
||||
if search_app:
|
||||
search_config = search_app.get("search_config", {})
|
||||
|
||||
def stream():
|
||||
async def stream():
|
||||
nonlocal req, uid
|
||||
try:
|
||||
for ans in ask(req["question"], req["kb_ids"], uid, search_config=search_config):
|
||||
async for ans in async_ask(req["question"], req["kb_ids"], uid, search_config=search_config):
|
||||
yield "data:" + json.dumps({"code": 0, "message": "", "data": ans}, ensure_ascii=False) + "\n\n"
|
||||
except Exception as e:
|
||||
yield "data:" + json.dumps({"code": 500, "message": str(e), "data": {"answer": "**ERROR**: " + str(e), "reference": []}}, ensure_ascii=False) + "\n\n"
|
||||
|
||||
@ -34,8 +34,9 @@ async def set_api_key():
|
||||
if not all([secret_key, public_key, host]):
|
||||
return get_error_data_result(message="Missing required fields")
|
||||
|
||||
current_user_id = current_user.id
|
||||
langfuse_keys = dict(
|
||||
tenant_id=current_user.id,
|
||||
tenant_id=current_user_id,
|
||||
secret_key=secret_key,
|
||||
public_key=public_key,
|
||||
host=host,
|
||||
@ -45,23 +46,24 @@ async def set_api_key():
|
||||
if not langfuse.auth_check():
|
||||
return get_error_data_result(message="Invalid Langfuse keys")
|
||||
|
||||
langfuse_entry = TenantLangfuseService.filter_by_tenant(tenant_id=current_user.id)
|
||||
langfuse_entry = TenantLangfuseService.filter_by_tenant(tenant_id=current_user_id)
|
||||
with DB.atomic():
|
||||
try:
|
||||
if not langfuse_entry:
|
||||
TenantLangfuseService.save(**langfuse_keys)
|
||||
else:
|
||||
TenantLangfuseService.update_by_tenant(tenant_id=current_user.id, langfuse_keys=langfuse_keys)
|
||||
TenantLangfuseService.update_by_tenant(tenant_id=current_user_id, langfuse_keys=langfuse_keys)
|
||||
return get_json_result(data=langfuse_keys)
|
||||
except Exception as e:
|
||||
server_error_response(e)
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route("/api_key", methods=["GET"]) # noqa: F821
|
||||
@login_required
|
||||
@validate_request()
|
||||
def get_api_key():
|
||||
langfuse_entry = TenantLangfuseService.filter_by_tenant_with_info(tenant_id=current_user.id)
|
||||
current_user_id = current_user.id
|
||||
langfuse_entry = TenantLangfuseService.filter_by_tenant_with_info(tenant_id=current_user_id)
|
||||
if not langfuse_entry:
|
||||
return get_json_result(message="Have not record any Langfuse keys.")
|
||||
|
||||
@ -72,7 +74,7 @@ def get_api_key():
|
||||
except langfuse.api.core.api_error.ApiError as api_err:
|
||||
return get_json_result(message=f"Error from Langfuse: {api_err}")
|
||||
except Exception as e:
|
||||
server_error_response(e)
|
||||
return server_error_response(e)
|
||||
|
||||
langfuse_entry["project_id"] = langfuse.api.projects.get().dict()["data"][0]["id"]
|
||||
langfuse_entry["project_name"] = langfuse.api.projects.get().dict()["data"][0]["name"]
|
||||
@ -84,7 +86,8 @@ def get_api_key():
|
||||
@login_required
|
||||
@validate_request()
|
||||
def delete_api_key():
|
||||
langfuse_entry = TenantLangfuseService.filter_by_tenant(tenant_id=current_user.id)
|
||||
current_user_id = current_user.id
|
||||
langfuse_entry = TenantLangfuseService.filter_by_tenant(tenant_id=current_user_id)
|
||||
if not langfuse_entry:
|
||||
return get_json_result(message="Have not record any Langfuse keys.")
|
||||
|
||||
@ -93,4 +96,4 @@ def delete_api_key():
|
||||
TenantLangfuseService.delete_model(langfuse_entry)
|
||||
return get_json_result(data=True)
|
||||
except Exception as e:
|
||||
server_error_response(e)
|
||||
return server_error_response(e)
|
||||
|
||||
@ -74,7 +74,7 @@ async def set_api_key():
|
||||
assert factory in ChatModel, f"Chat model from {factory} is not supported yet."
|
||||
mdl = ChatModel[factory](req["api_key"], llm.llm_name, base_url=req.get("base_url"), **extra)
|
||||
try:
|
||||
m, tc = mdl.chat(None, [{"role": "user", "content": "Hello! How are you doing!"}], {"temperature": 0.9, "max_tokens": 50})
|
||||
m, tc = await mdl.async_chat(None, [{"role": "user", "content": "Hello! How are you doing!"}], {"temperature": 0.9, "max_tokens": 50})
|
||||
if m.find("**ERROR**") >= 0:
|
||||
raise Exception(m)
|
||||
chat_passed = True
|
||||
@ -217,7 +217,7 @@ async def add_llm():
|
||||
**extra,
|
||||
)
|
||||
try:
|
||||
m, tc = mdl.chat(None, [{"role": "user", "content": "Hello! How are you doing!"}], {"temperature": 0.9})
|
||||
m, tc = await mdl.async_chat(None, [{"role": "user", "content": "Hello! How are you doing!"}], {"temperature": 0.9})
|
||||
if not tc and m.find("**ERROR**:") >= 0:
|
||||
raise Exception(m)
|
||||
except Exception as e:
|
||||
|
||||
@ -26,9 +26,10 @@ from api.db.db_models import APIToken
|
||||
from api.db.services.api_service import API4ConversationService
|
||||
from api.db.services.canvas_service import UserCanvasService, completion_openai
|
||||
from api.db.services.canvas_service import completion as agent_completion
|
||||
from api.db.services.conversation_service import ConversationService, iframe_completion
|
||||
from api.db.services.conversation_service import completion as rag_completion
|
||||
from api.db.services.dialog_service import DialogService, ask, chat, gen_mindmap, meta_filter
|
||||
from api.db.services.conversation_service import ConversationService
|
||||
from api.db.services.conversation_service import async_iframe_completion as iframe_completion
|
||||
from api.db.services.conversation_service import async_completion as rag_completion
|
||||
from api.db.services.dialog_service import DialogService, async_ask, async_chat, gen_mindmap, meta_filter
|
||||
from api.db.services.document_service import DocumentService
|
||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
@ -141,7 +142,7 @@ async def chat_completion(tenant_id, chat_id):
|
||||
return resp
|
||||
else:
|
||||
answer = None
|
||||
for ans in rag_completion(tenant_id, chat_id, **req):
|
||||
async for ans in rag_completion(tenant_id, chat_id, **req):
|
||||
answer = ans
|
||||
break
|
||||
return get_result(data=answer)
|
||||
@ -245,7 +246,7 @@ async def chat_completion_openai_like(tenant_id, chat_id):
|
||||
# The value for the usage field on all chunks except for the last one will be null.
|
||||
# The usage field on the last chunk contains token usage statistics for the entire request.
|
||||
# The choices field on the last chunk will always be an empty array [].
|
||||
def streamed_response_generator(chat_id, dia, msg):
|
||||
async def streamed_response_generator(chat_id, dia, msg):
|
||||
token_used = 0
|
||||
answer_cache = ""
|
||||
reasoning_cache = ""
|
||||
@ -274,7 +275,7 @@ async def chat_completion_openai_like(tenant_id, chat_id):
|
||||
}
|
||||
|
||||
try:
|
||||
for ans in chat(dia, msg, True, toolcall_session=toolcall_session, tools=tools, quote=need_reference):
|
||||
async for ans in async_chat(dia, msg, True, toolcall_session=toolcall_session, tools=tools, quote=need_reference):
|
||||
last_ans = ans
|
||||
answer = ans["answer"]
|
||||
|
||||
@ -342,7 +343,7 @@ async def chat_completion_openai_like(tenant_id, chat_id):
|
||||
return resp
|
||||
else:
|
||||
answer = None
|
||||
for ans in chat(dia, msg, False, toolcall_session=toolcall_session, tools=tools, quote=need_reference):
|
||||
async for ans in async_chat(dia, msg, False, toolcall_session=toolcall_session, tools=tools, quote=need_reference):
|
||||
# focus answer content only
|
||||
answer = ans
|
||||
break
|
||||
@ -733,10 +734,10 @@ async def ask_about(tenant_id):
|
||||
return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
|
||||
uid = tenant_id
|
||||
|
||||
def stream():
|
||||
async def stream():
|
||||
nonlocal req, uid
|
||||
try:
|
||||
for ans in ask(req["question"], req["kb_ids"], uid):
|
||||
async for ans in async_ask(req["question"], req["kb_ids"], uid):
|
||||
yield "data:" + json.dumps({"code": 0, "message": "", "data": ans}, ensure_ascii=False) + "\n\n"
|
||||
except Exception as e:
|
||||
yield "data:" + json.dumps(
|
||||
@ -827,7 +828,7 @@ async def chatbot_completions(dialog_id):
|
||||
resp.headers.add_header("Content-Type", "text/event-stream; charset=utf-8")
|
||||
return resp
|
||||
|
||||
for answer in iframe_completion(dialog_id, **req):
|
||||
async for answer in iframe_completion(dialog_id, **req):
|
||||
return get_result(data=answer)
|
||||
|
||||
|
||||
@ -918,10 +919,10 @@ async def ask_about_embedded():
|
||||
if search_app := SearchService.get_detail(search_id):
|
||||
search_config = search_app.get("search_config", {})
|
||||
|
||||
def stream():
|
||||
async def stream():
|
||||
nonlocal req, uid
|
||||
try:
|
||||
for ans in ask(req["question"], req["kb_ids"], uid, search_config=search_config):
|
||||
async for ans in async_ask(req["question"], req["kb_ids"], uid, search_config=search_config):
|
||||
yield "data:" + json.dumps({"code": 0, "message": "", "data": ans}, ensure_ascii=False) + "\n\n"
|
||||
except Exception as e:
|
||||
yield "data:" + json.dumps(
|
||||
|
||||
Reference in New Issue
Block a user