Revert "Refa: make RAGFlow more asynchronous 2" (#11669)

Reverts infiniflow/ragflow#11664
2025-12-08 20:42:30 +08:00 · 2025-12-02 19:42:05 +08:00
parent 1388c4420d
commit a6681d6366
14 changed files with 292 additions and 622 deletions
--- a/agent/component/llm.py
+++ b/agent/component/llm.py
@ -13,14 +13,12 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
-import asyncio
 import json
 import logging
 import os
 import re
-import threading
 from copy import deepcopy
-from typing import Any, Generator, AsyncGenerator
+from typing import Any, Generator
 import json_repair
 from functools import partial
 from common.constants import LLMType
@ -173,13 +171,6 @@ class LLM(ComponentBase):
            return self.chat_mdl.chat(msg[0]["content"], msg[1:], self._param.gen_conf(), **kwargs)
        return self.chat_mdl.chat(msg[0]["content"], msg[1:], self._param.gen_conf(), images=self.imgs, **kwargs)

-    async def _generate_async(self, msg: list[dict], **kwargs) -> str:
-        if not self.imgs and hasattr(self.chat_mdl, "async_chat"):
-            return await self.chat_mdl.async_chat(msg[0]["content"], msg[1:], self._param.gen_conf(), **kwargs)
-        if self.imgs and hasattr(self.chat_mdl, "async_chat"):
-            return await self.chat_mdl.async_chat(msg[0]["content"], msg[1:], self._param.gen_conf(), images=self.imgs, **kwargs)
-        return await asyncio.to_thread(self._generate, msg, **kwargs)
-
    def _generate_streamly(self, msg:list[dict], **kwargs) -> Generator[str, None, None]:
        ans = ""
        last_idx = 0
@ -214,70 +205,6 @@ class LLM(ComponentBase):
            for txt in self.chat_mdl.chat_streamly(msg[0]["content"], msg[1:], self._param.gen_conf(), images=self.imgs, **kwargs):
                yield delta(txt)

-    async def _generate_streamly_async(self, msg: list[dict], **kwargs) -> AsyncGenerator[str, None]:
-        # Prefer async chat_streamly if available
-        async def delta_wrapper(txt_iter):
-            ans = ""
-            last_idx = 0
-            endswith_think = False
-
-            def delta(txt):
-                nonlocal ans, last_idx, endswith_think
-                delta_ans = txt[last_idx:]
-                ans = txt
-
-                if delta_ans.find("<think>") == 0:
-                    last_idx += len("<think>")
-                    return "<think>"
-                elif delta_ans.find("<think>") > 0:
-                    delta_ans = txt[last_idx:last_idx + delta_ans.find("<think>")]
-                    last_idx += delta_ans.find("<think>")
-                    return delta_ans
-                elif delta_ans.endswith("</think>"):
-                    endswith_think = True
-                elif endswith_think:
-                    endswith_think = False
-                    return "</think>"
-
-                last_idx = len(ans)
-                if ans.endswith("</think>"):
-                    last_idx -= len("</think>")
-                return re.sub(r"(<think>|</think>)", "", delta_ans)
-
-            async for t in txt_iter:
-                yield delta(t)
-
-        if not self.imgs and hasattr(self.chat_mdl, "async_chat_streamly"):
-            async for t in delta_wrapper(self.chat_mdl.async_chat_streamly(msg[0]["content"], msg[1:], self._param.gen_conf(), **kwargs)):
-                yield t
-            return
-        if self.imgs and hasattr(self.chat_mdl, "async_chat_streamly"):
-            async for t in delta_wrapper(self.chat_mdl.async_chat_streamly(msg[0]["content"], msg[1:], self._param.gen_conf(), images=self.imgs, **kwargs)):
-                yield t
-            return
-
-        # Fallback: run sync stream in thread, bridge results
-        loop = asyncio.get_running_loop()
-        queue: asyncio.Queue = asyncio.Queue()
-
-        def worker():
-            try:
-                for item in self._generate_streamly(msg, **kwargs):
-                    loop.call_soon_threadsafe(queue.put_nowait, item)
-            except Exception as e:
-                loop.call_soon_threadsafe(queue.put_nowait, e)
-            finally:
-                loop.call_soon_threadsafe(queue.put_nowait, StopAsyncIteration)
-
-        threading.Thread(target=worker, daemon=True).start()
-        while True:
-            item = await queue.get()
-            if item is StopAsyncIteration:
-                break
-            if isinstance(item, Exception):
-                raise item
-            yield item
-
    async def _stream_output_async(self, prompt, msg):
        _, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
        answer = ""