Fix: incorrect async chat streamly output (#11679)

### What problem does this PR solve? Incorrect async chat streamly output. #11677. Disable beartype for #11666. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
2026-01-04 03:25:30 +08:00 · 2025-12-03 11:15:45 +08:00
parent 83fac6d0a0
commit 5c81e01de5
5 changed files with 17 additions and 8 deletions
--- a/agent/init.py
+++ b/agent/init.py
@ -14,5 +14,5 @@
 #  limitations under the License.
 #

-from beartype.claw import beartype_this_package
-beartype_this_package()
+# from beartype.claw import beartype_this_package
+# beartype_this_package()
--- a/api/init.py
+++ b/api/init.py
@ -14,5 +14,5 @@
 #  limitations under the License.
 #

-from beartype.claw import beartype_this_package
-beartype_this_package()
+# from beartype.claw import beartype_this_package
+# beartype_this_package()
--- a/api/db/services/llm_service.py
+++ b/api/db/services/llm_service.py
@ -385,6 +385,7 @@ class LLMBundle(LLM4Tenant):

    async def async_chat_streamly(self, system: str, history: list, gen_conf: dict = {}, **kwargs):
        total_tokens = 0
+        ans = ""
        if self.is_tools and self.mdl.is_tools:
            stream_fn = getattr(self.mdl, "async_chat_streamly_with_tools", None)
        else:
@ -397,7 +398,15 @@ class LLMBundle(LLM4Tenant):
                if isinstance(txt, int):
                    total_tokens = txt
                    break
-                yield txt
+
+                if txt.endswith("</think>"):
+                    ans = ans[: -len("</think>")]
+
+                if not self.verbose_tool_use:
+                    txt = re.sub(r"<tool_call>.*?</tool_call>", "", txt, flags=re.DOTALL)
+
+                ans += txt
+                yield ans
            if total_tokens and not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, total_tokens, self.llm_name):
                logging.error("LLMBundle.async_chat_streamly can't update token usage for {}/CHAT llm_name: {}, used_tokens: {}".format(self.tenant_id, self.llm_name, total_tokens))
            return
--- a/rag/init.py
+++ b/rag/init.py
@ -14,5 +14,5 @@
 #  limitations under the License.
 #

-from beartype.claw import beartype_this_package
-beartype_this_package()
+# from beartype.claw import beartype_this_package
+# beartype_this_package()
--- a/sdk/python/ragflow_sdk/init.py
+++ b/sdk/python/ragflow_sdk/init.py
@ -37,4 +37,4 @@ __all__ = [
    "Document",
    "Chunk",
    "Agent"
-]
+]