Refa: make RAGFlow more asynchronous (#11601)

### What problem does this PR solve? Try to make this more asynchronous. Verified in chat and agent scenarios, reducing blocking behavior. #11551, #11579. However, the impact of these changes still requires further investigation to ensure everything works as expected. ### Type of change - [x] Refactoring
2025-12-08 12:32:30 +08:00 · 2025-12-01 14:24:06 +08:00
parent 6ea4248bdc
commit b6c4722687
36 changed files with 1162 additions and 359 deletions
--- a/agent/canvas.py
+++ b/agent/canvas.py
@ -13,6 +13,9 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
+import asyncio
+import base64
+import inspect
 import json
 import logging
 import re
@ -79,6 +82,7 @@ class Graph:
        self.dsl = json.loads(dsl)
        self._tenant_id = tenant_id
        self.task_id = task_id if task_id else get_uuid()
+        self._thread_pool = ThreadPoolExecutor(max_workers=5)
        self.load()

    def load(self):
@ -357,6 +361,7 @@ class Canvas(Graph):

    async def run(self, **kwargs):
        st = time.perf_counter()
+        self._loop = asyncio.get_running_loop()
        self.message_id = get_uuid()
        created_at = int(time.time())
        self.add_user_input(kwargs.get("query"))
@ -372,7 +377,7 @@ class Canvas(Graph):
        for k in kwargs.keys():
            if k in ["query", "user_id", "files"] and kwargs[k]:
                if k == "files":
-                    self.globals[f"sys.{k}"] = FileService.get_files(kwargs[k])
+                    self.globals[f"sys.{k}"] = await self.get_files_async(kwargs[k])
                else:
                    self.globals[f"sys.{k}"] = kwargs[k]
        if not self.globals["sys.conversation_turns"] :
@ -402,31 +407,39 @@ class Canvas(Graph):
        yield decorate("workflow_started", {"inputs": kwargs.get("inputs")})
        self.retrieval.append({"chunks": {}, "doc_aggs": {}})

-        def _run_batch(f, t):
+        async def _run_batch(f, t):
            if self.is_canceled():
                msg = f"Task {self.task_id} has been canceled during batch execution."
                logging.info(msg)
                raise TaskCanceledException(msg)

-            with ThreadPoolExecutor(max_workers=5) as executor:
-                thr = []
-                i = f
-                while i < t:
-                    cpn = self.get_component_obj(self.path[i])
-                    if cpn.component_name.lower() in ["begin", "userfillup"]:
-                        thr.append(executor.submit(cpn.invoke, inputs=kwargs.get("inputs", {})))
-                        i += 1
+            loop = asyncio.get_running_loop()
+            tasks = []
+            i = f
+            while i < t:
+                cpn = self.get_component_obj(self.path[i])
+                task_fn = None
+
+                if cpn.component_name.lower() in ["begin", "userfillup"]:
+                    task_fn = partial(cpn.invoke, inputs=kwargs.get("inputs", {}))
+                    i += 1
+                else:
+                    for _, ele in cpn.get_input_elements().items():
+                        if isinstance(ele, dict) and ele.get("_cpn_id") and ele.get("_cpn_id") not in self.path[:i] and self.path[0].lower().find("userfillup") < 0:
+                            self.path.pop(i)
+                            t -= 1
+                            break
                    else:
-                        for _, ele in cpn.get_input_elements().items():
-                            if isinstance(ele, dict) and ele.get("_cpn_id") and ele.get("_cpn_id") not in self.path[:i] and self.path[0].lower().find("userfillup") < 0:
-                                self.path.pop(i)
-                                t -= 1
-                                break
-                        else:
-                            thr.append(executor.submit(cpn.invoke, **cpn.get_input()))
-                            i += 1
-                for t in thr:
-                    t.result()
+                        task_fn = partial(cpn.invoke, **cpn.get_input())
+                        i += 1
+
+                if task_fn is None:
+                    continue
+
+                tasks.append(loop.run_in_executor(self._thread_pool, task_fn))
+
+            if tasks:
+                await asyncio.gather(*tasks)

        def _node_finished(cpn_obj):
            return decorate("node_finished",{
@ -453,7 +466,7 @@ class Canvas(Graph):
                    "component_type": self.get_component_type(self.path[i]),
                    "thoughts": self.get_component_thoughts(self.path[i])
                })
-            _run_batch(idx, to)
+            await _run_batch(idx, to)
            to = len(self.path)
            # post processing of components invocation
            for i in range(idx, to):
@ -462,16 +475,29 @@ class Canvas(Graph):
                if cpn_obj.component_name.lower() == "message":
                    if isinstance(cpn_obj.output("content"), partial):
                        _m = ""
-                        for m in cpn_obj.output("content")():
-                            if not m:
-                                continue
-                            if m == "<think>":
-                                yield decorate("message", {"content": "", "start_to_think": True})
-                            elif m == "</think>":
-                                yield decorate("message", {"content": "", "end_to_think": True})
-                            else:
-                                yield decorate("message", {"content": m})
-                                _m += m
+                        stream = cpn_obj.output("content")()
+                        if inspect.isasyncgen(stream):
+                            async for m in stream:
+                                if not m:
+                                    continue
+                                if m == "<think>":
+                                    yield decorate("message", {"content": "", "start_to_think": True})
+                                elif m == "</think>":
+                                    yield decorate("message", {"content": "", "end_to_think": True})
+                                else:
+                                    yield decorate("message", {"content": m})
+                                    _m += m
+                        else:
+                            for m in stream:
+                                if not m:
+                                    continue
+                                if m == "<think>":
+                                    yield decorate("message", {"content": "", "start_to_think": True})
+                                elif m == "</think>":
+                                    yield decorate("message", {"content": "", "end_to_think": True})
+                                else:
+                                    yield decorate("message", {"content": m})
+                                    _m += m
                        cpn_obj.set_output("content", _m)
                        cite = re.search(r"\[ID:[ 0-9]+\]", _m)
                    else:
@ -621,6 +647,31 @@ class Canvas(Graph):
    def get_component_input_elements(self, cpnnm):
        return self.components[cpnnm]["obj"].get_input_elements()

+    async def get_files_async(self, files: Union[None, list[dict]]) -> list[str]:
+        if not files:
+            return  []
+        def image_to_base64(file):
+            return "data:{};base64,{}".format(file["mime_type"],
+                                        base64.b64encode(FileService.get_blob(file["created_by"], file["id"])).decode("utf-8"))
+        loop = asyncio.get_running_loop()
+        tasks = []
+        for file in files:
+            if file["mime_type"].find("image") >=0:
+                tasks.append(loop.run_in_executor(self._thread_pool, image_to_base64, file))
+                continue
+            tasks.append(loop.run_in_executor(self._thread_pool, FileService.parse, file["name"], FileService.get_blob(file["created_by"], file["id"]), True, file["created_by"]))
+        return await asyncio.gather(*tasks)
+
+    def get_files(self, files: Union[None, list[dict]]) -> list[str]:
+        """
+        Synchronous wrapper for get_files_async, used by sync component invoke paths.
+        """
+        loop = getattr(self, "_loop", None)
+        if loop and loop.is_running():
+            return asyncio.run_coroutine_threadsafe(self.get_files_async(files), loop).result()
+
+        return asyncio.run(self.get_files_async(files))
+
    def tool_use_callback(self, agent_id: str, func_name: str, params: dict, result: Any, elapsed_time=None):
        agent_ids = agent_id.split("-->")
        agent_name = self.get_component_name(agent_ids[0])
--- a/agent/component/llm.py
+++ b/agent/component/llm.py
@ -205,6 +205,55 @@ class LLM(ComponentBase):
            for txt in self.chat_mdl.chat_streamly(msg[0]["content"], msg[1:], self._param.gen_conf(), images=self.imgs, **kwargs):
                yield delta(txt)

+    async def _stream_output_async(self, prompt, msg):
+        _, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
+        answer = ""
+        last_idx = 0
+        endswith_think = False
+
+        def delta(txt):
+            nonlocal answer, last_idx, endswith_think
+            delta_ans = txt[last_idx:]
+            answer = txt
+
+            if delta_ans.find("<think>") == 0:
+                last_idx += len("<think>")
+                return "<think>"
+            elif delta_ans.find("<think>") > 0:
+                delta_ans = txt[last_idx:last_idx + delta_ans.find("<think>")]
+                last_idx += delta_ans.find("<think>")
+                return delta_ans
+            elif delta_ans.endswith("</think>"):
+                endswith_think = True
+            elif endswith_think:
+                endswith_think = False
+                return "</think>"
+
+            last_idx = len(answer)
+            if answer.endswith("</think>"):
+                last_idx -= len("</think>")
+            return re.sub(r"(<think>|</think>)", "", delta_ans)
+
+        stream_kwargs = {"images": self.imgs} if self.imgs else {}
+        async for ans in self.chat_mdl.async_chat_streamly(msg[0]["content"], msg[1:], self._param.gen_conf(), **stream_kwargs):
+            if self.check_if_canceled("LLM streaming"):
+                return
+
+            if isinstance(ans, int):
+                continue
+
+            if ans.find("**ERROR**") >= 0:
+                if self.get_exception_default_value():
+                    self.set_output("content", self.get_exception_default_value())
+                    yield self.get_exception_default_value()
+                else:
+                    self.set_output("_ERROR", ans)
+                return
+
+            yield delta(ans)
+
+        self.set_output("content", answer)
+
    @timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
    def _invoke(self, **kwargs):
        if self.check_if_canceled("LLM processing"):
@ -250,7 +299,7 @@ class LLM(ComponentBase):
        downstreams = self._canvas.get_component(self._id)["downstream"] if self._canvas.get_component(self._id) else []
        ex = self.exception_handler()
        if any([self._canvas.get_component_obj(cid).component_name.lower()=="message" for cid in downstreams]) and not (ex and ex["goto"]):
-            self.set_output("content", partial(self._stream_output, prompt, msg))
+            self.set_output("content", partial(self._stream_output_async, prompt, msg))
            return

        for _ in range(self._param.max_retries+1):
--- a/agent/component/message.py
+++ b/agent/component/message.py
@ -13,6 +13,8 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
+import asyncio
+import inspect
 import json
 import os
 import random
@ -66,8 +68,12 @@ class Message(ComponentBase):
                v = ""
            ans = ""
            if isinstance(v, partial):
-                for t in v():
-                    ans += t
+                iter_obj = v()
+                if inspect.isasyncgen(iter_obj):
+                    ans = asyncio.run(self._consume_async_gen(iter_obj))
+                else:
+                    for t in iter_obj:
+                        ans += t
            elif isinstance(v, list) and delimiter:
                ans = delimiter.join([str(vv) for vv in v])
            elif not isinstance(v, str):
@ -89,7 +95,13 @@ class Message(ComponentBase):
            _kwargs[_n] = v
        return script, _kwargs

-    def _stream(self, rand_cnt:str):
+    async def _consume_async_gen(self, agen):
+        buf = ""
+        async for t in agen:
+            buf += t
+        return buf
+
+    async def _stream(self, rand_cnt:str):
        s = 0
        all_content = ""
        cache = {}
@ -111,15 +123,27 @@ class Message(ComponentBase):
                v = ""
            if isinstance(v, partial):
                cnt = ""
-                for t in v():
-                    if self.check_if_canceled("Message streaming"):
-                        return
+                iter_obj = v()
+                if inspect.isasyncgen(iter_obj):
+                    async for t in iter_obj:
+                        if self.check_if_canceled("Message streaming"):
+                            return

-                    all_content += t
-                    cnt += t
-                    yield t
+                        all_content += t
+                        cnt += t
+                        yield t
+                else:
+                    for t in iter_obj:
+                        if self.check_if_canceled("Message streaming"):
+                            return
+
+                        all_content += t
+                        cnt += t
+                        yield t
                self.set_input_value(exp, cnt)
                continue
+            elif inspect.isawaitable(v):
+                v = await v
            elif not isinstance(v, str):
                try:
                    v = json.dumps(v, ensure_ascii=False)
@ -181,7 +205,7 @@ class Message(ComponentBase):

        import pypandoc
        doc_id = get_uuid()
-        
+
        if self._param.output_format.lower() not in {"markdown", "html", "pdf", "docx"}:
            self._param.output_format = "markdown"

@ -231,11 +255,11 @@ class Message(ComponentBase):

            settings.STORAGE_IMPL.put(self._canvas._tenant_id, doc_id, binary_content)
            self.set_output("attachment", {
-                "doc_id":doc_id, 
-                "format":self._param.output_format, 
+                "doc_id":doc_id,
+                "format":self._param.output_format,
                "file_name":f"{doc_id[:8]}.{self._param.output_format}"})

            logging.info(f"Converted content uploaded as {doc_id} (format={self._param.output_format})")

        except Exception as e:
-            logging.error(f"Error converting content to {self._param.output_format}: {e}")
+            logging.error(f"Error converting content to {self._param.output_format}: {e}")