Made task_executor async to speedup parsing (#5530)

### What problem does this PR solve? Made task_executor async to speedup parsing ### Type of change - [x] Performance Improvement
2026-01-31 07:36:46 +08:00 · 2025-03-03 18:59:49 +08:00
parent abac2ca2c5
commit c813c1ff4c
22 changed files with 576 additions and 1005 deletions
--- a/graphrag/general/graph_extractor.py
+++ b/graphrag/general/graph_extractor.py
@ -5,15 +5,15 @@ Reference:
 - [graphrag](https://github.com/microsoft/graphrag)
 """

-import logging
 import re
 from typing import Any, Callable
 from dataclasses import dataclass
 import tiktoken
+import trio

 from graphrag.general.extractor import Extractor, ENTITY_EXTRACTION_MAX_GLEANINGS, DEFAULT_ENTITY_TYPES
 from graphrag.general.graph_prompt import GRAPH_EXTRACTION_PROMPT, CONTINUE_PROMPT, LOOP_PROMPT
-from graphrag.utils import ErrorHandlerFn, perform_variable_replacements
+from graphrag.utils import ErrorHandlerFn, perform_variable_replacements, chat_limiter
 from rag.llm.chat_model import Base as CompletionLLM
 import networkx as nx
 from rag.utils import num_tokens_from_string
@ -102,53 +102,47 @@ class GraphExtractor(Extractor):
            self._entity_types_key: ",".join(DEFAULT_ENTITY_TYPES),
        }

-    def _process_single_content(self,
-                                chunk_key_dp: tuple[str, str]
-                                ):
+    async def _process_single_content(self, chunk_key_dp: tuple[str, str], chunk_seq: int, num_chunks: int, out_results):
        token_count = 0
-
        chunk_key = chunk_key_dp[0]
        content = chunk_key_dp[1]
        variables = {
            **self._prompt_variables,
            self._input_text_key: content,
        }
-        try:
-            gen_conf = {"temperature": 0.3}
-            hint_prompt = perform_variable_replacements(self._extraction_prompt, variables=variables)
-            response = self._chat(hint_prompt, [{"role": "user", "content": "Output:"}], gen_conf)
-            token_count += num_tokens_from_string(hint_prompt + response)
-
-            results = response or ""
-            history = [{"role": "system", "content": hint_prompt}, {"role": "user", "content": response}]
-
-            # Repeat to ensure we maximize entity count
-            for i in range(self._max_gleanings):
-                text = perform_variable_replacements(CONTINUE_PROMPT, history=history, variables=variables)
-                history.append({"role": "user", "content": text})
-                response = self._chat("", history, gen_conf)
-                token_count += num_tokens_from_string("\n".join([m["content"] for m in history]) + response)
-                results += response or ""
-
-                # if this is the final glean, don't bother updating the continuation flag
-                if i >= self._max_gleanings - 1:
-                    break
-                history.append({"role": "assistant", "content": response})
-                history.append({"role": "user", "content": LOOP_PROMPT})
-                continuation = self._chat("", history, {"temperature": 0.8})
-                token_count += num_tokens_from_string("\n".join([m["content"] for m in history]) + response)
-                if continuation != "YES":
-                    break
-
-            record_delimiter = variables.get(self._record_delimiter_key, DEFAULT_RECORD_DELIMITER)
-            tuple_delimiter = variables.get(self._tuple_delimiter_key, DEFAULT_TUPLE_DELIMITER)
-            records = [re.sub(r"^\(|\)$", "", r.strip()) for r in results.split(record_delimiter)]
-            records = [r for r in records if r.strip()]
-            maybe_nodes, maybe_edges = self._entities_and_relations(chunk_key, records, tuple_delimiter)
-            return maybe_nodes, maybe_edges, token_count
-        except Exception as e:
-            logging.exception("error extracting graph")
-            return e, None, None
+        gen_conf = {"temperature": 0.3}
+        hint_prompt = perform_variable_replacements(self._extraction_prompt, variables=variables)
+        async with chat_limiter:
+            response = await trio.to_thread.run_sync(lambda: self._chat(hint_prompt, [{"role": "user", "content": "Output:"}], gen_conf))
+        token_count += num_tokens_from_string(hint_prompt + response)

+        results = response or ""
+        history = [{"role": "system", "content": hint_prompt}, {"role": "user", "content": response}]

+        # Repeat to ensure we maximize entity count
+        for i in range(self._max_gleanings):
+            text = perform_variable_replacements(CONTINUE_PROMPT, history=history, variables=variables)
+            history.append({"role": "user", "content": text})
+            async with chat_limiter:
+                response = await trio.to_thread.run_sync(lambda: self._chat("", history, gen_conf))
+            token_count += num_tokens_from_string("\n".join([m["content"] for m in history]) + response)
+            results += response or ""

+            # if this is the final glean, don't bother updating the continuation flag
+            if i >= self._max_gleanings - 1:
+                break
+            history.append({"role": "assistant", "content": response})
+            history.append({"role": "user", "content": LOOP_PROMPT})
+            async with chat_limiter:
+                continuation = await trio.to_thread.run_sync(lambda: self._chat("", history, {"temperature": 0.8}))
+            token_count += num_tokens_from_string("\n".join([m["content"] for m in history]) + response)
+            if continuation != "YES":
+                break
+        record_delimiter = variables.get(self._record_delimiter_key, DEFAULT_RECORD_DELIMITER)
+        tuple_delimiter = variables.get(self._tuple_delimiter_key, DEFAULT_TUPLE_DELIMITER)
+        records = [re.sub(r"^\(|\)$", "", r.strip()) for r in results.split(record_delimiter)]
+        records = [r for r in records if r.strip()]
+        maybe_nodes, maybe_edges = self._entities_and_relations(chunk_key, records, tuple_delimiter)
+        out_results.append((maybe_nodes, maybe_edges, token_count))
+        if self.callback:
+            self.callback(0.5+0.1*len(out_results)/num_chunks, msg = f"Entities extraction of chunk {chunk_seq} {len(out_results)}/{num_chunks} done, {len(maybe_nodes)} nodes, {len(maybe_edges)} edges, {token_count} tokens.")