Fix: incorrect async chat streamly output (#11679)

### What problem does this PR solve?

Incorrect async chat streamly output. #11677.

Disable beartype for #11666.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Yongteng Lei
2025-12-03 11:15:45 +08:00
committed by GitHub
parent 83fac6d0a0
commit 5c81e01de5
5 changed files with 17 additions and 8 deletions

View File

@ -14,5 +14,5 @@
# limitations under the License. # limitations under the License.
# #
from beartype.claw import beartype_this_package # from beartype.claw import beartype_this_package
beartype_this_package() # beartype_this_package()

View File

@ -14,5 +14,5 @@
# limitations under the License. # limitations under the License.
# #
from beartype.claw import beartype_this_package # from beartype.claw import beartype_this_package
beartype_this_package() # beartype_this_package()

View File

@ -385,6 +385,7 @@ class LLMBundle(LLM4Tenant):
async def async_chat_streamly(self, system: str, history: list, gen_conf: dict = {}, **kwargs): async def async_chat_streamly(self, system: str, history: list, gen_conf: dict = {}, **kwargs):
total_tokens = 0 total_tokens = 0
ans = ""
if self.is_tools and self.mdl.is_tools: if self.is_tools and self.mdl.is_tools:
stream_fn = getattr(self.mdl, "async_chat_streamly_with_tools", None) stream_fn = getattr(self.mdl, "async_chat_streamly_with_tools", None)
else: else:
@ -397,7 +398,15 @@ class LLMBundle(LLM4Tenant):
if isinstance(txt, int): if isinstance(txt, int):
total_tokens = txt total_tokens = txt
break break
yield txt
if txt.endswith("</think>"):
ans = ans[: -len("</think>")]
if not self.verbose_tool_use:
txt = re.sub(r"<tool_call>.*?</tool_call>", "", txt, flags=re.DOTALL)
ans += txt
yield ans
if total_tokens and not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, total_tokens, self.llm_name): if total_tokens and not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, total_tokens, self.llm_name):
logging.error("LLMBundle.async_chat_streamly can't update token usage for {}/CHAT llm_name: {}, used_tokens: {}".format(self.tenant_id, self.llm_name, total_tokens)) logging.error("LLMBundle.async_chat_streamly can't update token usage for {}/CHAT llm_name: {}, used_tokens: {}".format(self.tenant_id, self.llm_name, total_tokens))
return return

View File

@ -14,5 +14,5 @@
# limitations under the License. # limitations under the License.
# #
from beartype.claw import beartype_this_package # from beartype.claw import beartype_this_package
beartype_this_package() # beartype_this_package()

View File

@ -37,4 +37,4 @@ __all__ = [
"Document", "Document",
"Chunk", "Chunk",
"Agent" "Agent"
] ]