mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Refa: change LLM chat output from full to delta (incremental) (#6534)
### What problem does this PR solve? Change LLM chat output from full to delta (incremental) ### Type of change - [x] Refactoring
This commit is contained in:
@ -324,15 +324,18 @@ class LLMBundle:
|
||||
if self.langfuse:
|
||||
generation = self.trace.generation(name="chat_streamly", model=self.llm_name, input={"system": system, "history": history})
|
||||
|
||||
output = ""
|
||||
ans = ""
|
||||
for txt in self.mdl.chat_streamly(system, history, gen_conf):
|
||||
if isinstance(txt, int):
|
||||
if self.langfuse:
|
||||
generation.end(output={"output": output})
|
||||
generation.end(output={"output": ans})
|
||||
|
||||
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, txt, self.llm_name):
|
||||
logging.error("LLMBundle.chat_streamly can't update token usage for {}/CHAT llm_name: {}, content: {}".format(self.tenant_id, self.llm_name, txt))
|
||||
return
|
||||
return ans
|
||||
|
||||
output = txt
|
||||
yield txt
|
||||
if txt.endswith("</think>"):
|
||||
ans = ans.rstrip("</think>")
|
||||
|
||||
ans += txt
|
||||
yield ans
|
||||
|
||||
Reference in New Issue
Block a user