mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
add stream chat (#811)
### What problem does this PR solve? #709 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -14,6 +14,7 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
import re
|
||||
from copy import deepcopy
|
||||
|
||||
from api.db import LLMType
|
||||
from api.db.db_models import Dialog, Conversation
|
||||
@ -71,7 +72,7 @@ def message_fit_in(msg, max_length=4000):
|
||||
return max_length, msg
|
||||
|
||||
|
||||
def chat(dialog, messages, **kwargs):
|
||||
def chat(dialog, messages, stream=True, **kwargs):
|
||||
assert messages[-1]["role"] == "user", "The last content of this conversation is not from user."
|
||||
llm = LLMService.query(llm_name=dialog.llm_id)
|
||||
if not llm:
|
||||
@ -82,7 +83,10 @@ def chat(dialog, messages, **kwargs):
|
||||
else: max_tokens = llm[0].max_tokens
|
||||
kbs = KnowledgebaseService.get_by_ids(dialog.kb_ids)
|
||||
embd_nms = list(set([kb.embd_id for kb in kbs]))
|
||||
assert len(embd_nms) == 1, "Knowledge bases use different embedding models."
|
||||
if len(embd_nms) != 1:
|
||||
if stream:
|
||||
yield {"answer": "**ERROR**: Knowledge bases use different embedding models.", "reference": []}
|
||||
return {"answer": "**ERROR**: Knowledge bases use different embedding models.", "reference": []}
|
||||
|
||||
questions = [m["content"] for m in messages if m["role"] == "user"]
|
||||
embd_mdl = LLMBundle(dialog.tenant_id, LLMType.EMBEDDING, embd_nms[0])
|
||||
@ -94,7 +98,9 @@ def chat(dialog, messages, **kwargs):
|
||||
if field_map:
|
||||
chat_logger.info("Use SQL to retrieval:{}".format(questions[-1]))
|
||||
ans = use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl, prompt_config.get("quote", True))
|
||||
if ans: return ans
|
||||
if ans:
|
||||
yield ans
|
||||
return
|
||||
|
||||
for p in prompt_config["parameters"]:
|
||||
if p["key"] == "knowledge":
|
||||
@ -118,8 +124,9 @@ def chat(dialog, messages, **kwargs):
|
||||
"{}->{}".format(" ".join(questions), "\n->".join(knowledges)))
|
||||
|
||||
if not knowledges and prompt_config.get("empty_response"):
|
||||
return {
|
||||
"answer": prompt_config["empty_response"], "reference": kbinfos}
|
||||
if stream:
|
||||
yield {"answer": prompt_config["empty_response"], "reference": kbinfos}
|
||||
return {"answer": prompt_config["empty_response"], "reference": kbinfos}
|
||||
|
||||
kwargs["knowledge"] = "\n".join(knowledges)
|
||||
gen_conf = dialog.llm_setting
|
||||
@ -130,33 +137,45 @@ def chat(dialog, messages, **kwargs):
|
||||
gen_conf["max_tokens"] = min(
|
||||
gen_conf["max_tokens"],
|
||||
max_tokens - used_token_count)
|
||||
answer = chat_mdl.chat(
|
||||
prompt_config["system"].format(
|
||||
**kwargs), msg, gen_conf)
|
||||
chat_logger.info("User: {}|Assistant: {}".format(
|
||||
msg[-1]["content"], answer))
|
||||
|
||||
if knowledges and (prompt_config.get("quote", True) and kwargs.get("quote", True)):
|
||||
answer, idx = retrievaler.insert_citations(answer,
|
||||
[ck["content_ltks"]
|
||||
for ck in kbinfos["chunks"]],
|
||||
[ck["vector"]
|
||||
for ck in kbinfos["chunks"]],
|
||||
embd_mdl,
|
||||
tkweight=1 - dialog.vector_similarity_weight,
|
||||
vtweight=dialog.vector_similarity_weight)
|
||||
idx = set([kbinfos["chunks"][int(i)]["doc_id"] for i in idx])
|
||||
recall_docs = [
|
||||
d for d in kbinfos["doc_aggs"] if d["doc_id"] in idx]
|
||||
if not recall_docs: recall_docs = kbinfos["doc_aggs"]
|
||||
kbinfos["doc_aggs"] = recall_docs
|
||||
def decorate_answer(answer):
|
||||
nonlocal prompt_config, knowledges, kwargs, kbinfos
|
||||
if knowledges and (prompt_config.get("quote", True) and kwargs.get("quote", True)):
|
||||
answer, idx = retrievaler.insert_citations(answer,
|
||||
[ck["content_ltks"]
|
||||
for ck in kbinfos["chunks"]],
|
||||
[ck["vector"]
|
||||
for ck in kbinfos["chunks"]],
|
||||
embd_mdl,
|
||||
tkweight=1 - dialog.vector_similarity_weight,
|
||||
vtweight=dialog.vector_similarity_weight)
|
||||
idx = set([kbinfos["chunks"][int(i)]["doc_id"] for i in idx])
|
||||
recall_docs = [
|
||||
d for d in kbinfos["doc_aggs"] if d["doc_id"] in idx]
|
||||
if not recall_docs: recall_docs = kbinfos["doc_aggs"]
|
||||
kbinfos["doc_aggs"] = recall_docs
|
||||
|
||||
for c in kbinfos["chunks"]:
|
||||
if c.get("vector"):
|
||||
del c["vector"]
|
||||
if answer.lower().find("invalid key") >= 0 or answer.lower().find("invalid api")>=0:
|
||||
answer += " Please set LLM API-Key in 'User Setting -> Model Providers -> API-Key'"
|
||||
return {"answer": answer, "reference": kbinfos}
|
||||
refs = deepcopy(kbinfos)
|
||||
for c in refs["chunks"]:
|
||||
if c.get("vector"):
|
||||
del c["vector"]
|
||||
if answer.lower().find("invalid key") >= 0 or answer.lower().find("invalid api")>=0:
|
||||
answer += " Please set LLM API-Key in 'User Setting -> Model Providers -> API-Key'"
|
||||
return {"answer": answer, "reference": refs}
|
||||
|
||||
if stream:
|
||||
answer = ""
|
||||
for ans in chat_mdl.chat_streamly(prompt_config["system"].format(**kwargs), msg, gen_conf):
|
||||
answer = ans
|
||||
yield {"answer": answer, "reference": {}}
|
||||
yield decorate_answer(answer)
|
||||
else:
|
||||
answer = chat_mdl.chat(
|
||||
prompt_config["system"].format(
|
||||
**kwargs), msg, gen_conf)
|
||||
chat_logger.info("User: {}|Assistant: {}".format(
|
||||
msg[-1]["content"], answer))
|
||||
return decorate_answer(answer)
|
||||
|
||||
|
||||
def use_sql(question, field_map, tenant_id, chat_mdl, quota=True):
|
||||
|
||||
@ -43,7 +43,7 @@ class DocumentService(CommonService):
|
||||
docs = cls.model.select().where(
|
||||
(cls.model.kb_id == kb_id),
|
||||
(fn.LOWER(cls.model.name).contains(keywords.lower()))
|
||||
)
|
||||
)
|
||||
else:
|
||||
docs = cls.model.select().where(cls.model.kb_id == kb_id)
|
||||
count = docs.count()
|
||||
@ -75,7 +75,7 @@ class DocumentService(CommonService):
|
||||
def delete(cls, doc):
|
||||
e, kb = KnowledgebaseService.get_by_id(doc.kb_id)
|
||||
if not KnowledgebaseService.update_by_id(
|
||||
kb.id, {"doc_num": kb.doc_num - 1}):
|
||||
kb.id, {"doc_num": max(0, kb.doc_num - 1)}):
|
||||
raise RuntimeError("Database error (Knowledgebase)!")
|
||||
return cls.delete_by_id(doc.id)
|
||||
|
||||
|
||||
@ -172,8 +172,18 @@ class LLMBundle(object):
|
||||
|
||||
def chat(self, system, history, gen_conf):
|
||||
txt, used_tokens = self.mdl.chat(system, history, gen_conf)
|
||||
if TenantLLMService.increase_usage(
|
||||
if not TenantLLMService.increase_usage(
|
||||
self.tenant_id, self.llm_type, used_tokens, self.llm_name):
|
||||
database_logger.error(
|
||||
"Can't update token usage for {}/CHAT".format(self.tenant_id))
|
||||
return txt
|
||||
|
||||
def chat_streamly(self, system, history, gen_conf):
|
||||
for txt in self.mdl.chat_streamly(system, history, gen_conf):
|
||||
if isinstance(txt, int):
|
||||
if not TenantLLMService.increase_usage(
|
||||
self.tenant_id, self.llm_type, txt, self.llm_name):
|
||||
database_logger.error(
|
||||
"Can't update token usage for {}/CHAT".format(self.tenant_id))
|
||||
return
|
||||
yield txt
|
||||
|
||||
Reference in New Issue
Block a user