Made task_executor async to speedup parsing (#5530)

### What problem does this PR solve? Made task_executor async to speedup parsing ### Type of change - [x] Performance Improvement
2026-01-31 15:45:08 +08:00 · 2025-03-03 18:59:49 +08:00
parent abac2ca2c5
commit c813c1ff4c
22 changed files with 576 additions and 1005 deletions
--- a/graphrag/general/claim_extractor.py
+++ b/graphrag/general/claim_extractor.py
@ -1,268 +0,0 @@
-# Copyright (c) 2024 Microsoft Corporation.
-# Licensed under the MIT License
-"""
-Reference:
- - [graphrag](https://github.com/microsoft/graphrag)
-"""
-
-import logging
-import argparse
-import json
-import re
-import traceback
-from dataclasses import dataclass
-from typing import Any
-
-import tiktoken
-
-from graphrag.general.claim_prompt import CLAIM_EXTRACTION_PROMPT, CONTINUE_PROMPT, LOOP_PROMPT
-from graphrag.general.extractor import Extractor
-from rag.llm.chat_model import Base as CompletionLLM
-from graphrag.utils import ErrorHandlerFn, perform_variable_replacements
-
-DEFAULT_TUPLE_DELIMITER = "<|>"
-DEFAULT_RECORD_DELIMITER = "##"
-DEFAULT_COMPLETION_DELIMITER = "<|COMPLETE|>"
-CLAIM_MAX_GLEANINGS = 1
-
-
-@dataclass
-class ClaimExtractorResult:
-    """Claim extractor result class definition."""
-
-    output: list[dict]
-    source_docs: dict[str, Any]
-
-
-class ClaimExtractor(Extractor):
-    """Claim extractor class definition."""
-
-    _extraction_prompt: str
-    _summary_prompt: str
-    _output_formatter_prompt: str
-    _input_text_key: str
-    _input_entity_spec_key: str
-    _input_claim_description_key: str
-    _tuple_delimiter_key: str
-    _record_delimiter_key: str
-    _completion_delimiter_key: str
-    _max_gleanings: int
-    _on_error: ErrorHandlerFn
-
-    def __init__(
-        self,
-        llm_invoker: CompletionLLM,
-        extraction_prompt: str | None = None,
-        input_text_key: str | None = None,
-        input_entity_spec_key: str | None = None,
-        input_claim_description_key: str | None = None,
-        input_resolved_entities_key: str | None = None,
-        tuple_delimiter_key: str | None = None,
-        record_delimiter_key: str | None = None,
-        completion_delimiter_key: str | None = None,
-        encoding_model: str | None = None,
-        max_gleanings: int | None = None,
-        on_error: ErrorHandlerFn | None = None,
-    ):
-        """Init method definition."""
-        self._llm = llm_invoker
-        self._extraction_prompt = extraction_prompt or CLAIM_EXTRACTION_PROMPT
-        self._input_text_key = input_text_key or "input_text"
-        self._input_entity_spec_key = input_entity_spec_key or "entity_specs"
-        self._tuple_delimiter_key = tuple_delimiter_key or "tuple_delimiter"
-        self._record_delimiter_key = record_delimiter_key or "record_delimiter"
-        self._completion_delimiter_key = (
-            completion_delimiter_key or "completion_delimiter"
-        )
-        self._input_claim_description_key = (
-            input_claim_description_key or "claim_description"
-        )
-        self._input_resolved_entities_key = (
-            input_resolved_entities_key or "resolved_entities"
-        )
-        self._max_gleanings = (
-            max_gleanings if max_gleanings is not None else CLAIM_MAX_GLEANINGS
-        )
-        self._on_error = on_error or (lambda _e, _s, _d: None)
-
-        # Construct the looping arguments
-        encoding = tiktoken.get_encoding(encoding_model or "cl100k_base")
-        yes = encoding.encode("YES")
-        no = encoding.encode("NO")
-        self._loop_args = {"logit_bias": {yes[0]: 100, no[0]: 100}, "max_tokens": 1}
-
-    def __call__(
-        self, inputs: dict[str, Any], prompt_variables: dict | None = None
-    ) -> ClaimExtractorResult:
-        """Call method definition."""
-        if prompt_variables is None:
-            prompt_variables = {}
-        texts = inputs[self._input_text_key]
-        entity_spec = str(inputs[self._input_entity_spec_key])
-        claim_description = inputs[self._input_claim_description_key]
-        resolved_entities = inputs.get(self._input_resolved_entities_key, {})
-        source_doc_map = {}
-
-        prompt_args = {
-            self._input_entity_spec_key: entity_spec,
-            self._input_claim_description_key: claim_description,
-            self._tuple_delimiter_key: prompt_variables.get(self._tuple_delimiter_key)
-            or DEFAULT_TUPLE_DELIMITER,
-            self._record_delimiter_key: prompt_variables.get(self._record_delimiter_key)
-            or DEFAULT_RECORD_DELIMITER,
-            self._completion_delimiter_key: prompt_variables.get(
-                self._completion_delimiter_key
-            )
-            or DEFAULT_COMPLETION_DELIMITER,
-        }
-
-        all_claims: list[dict] = []
-        for doc_index, text in enumerate(texts):
-            document_id = f"d{doc_index}"
-            try:
-                claims = self._process_document(prompt_args, text, doc_index)
-                all_claims += [
-                    self._clean_claim(c, document_id, resolved_entities) for c in claims
-                ]
-                source_doc_map[document_id] = text
-            except Exception as e:
-                logging.exception("error extracting claim")
-                self._on_error(
-                    e,
-                    traceback.format_exc(),
-                    {"doc_index": doc_index, "text": text},
-                )
-                continue
-
-        return ClaimExtractorResult(
-            output=all_claims,
-            source_docs=source_doc_map,
-        )
-
-    def _clean_claim(
-        self, claim: dict, document_id: str, resolved_entities: dict
-    ) -> dict:
-        # clean the parsed claims to remove any claims with status = False
-        obj = claim.get("object_id", claim.get("object"))
-        subject = claim.get("subject_id", claim.get("subject"))
-
-        # If subject or object in resolved entities, then replace with resolved entity
-        obj = resolved_entities.get(obj, obj)
-        subject = resolved_entities.get(subject, subject)
-        claim["object_id"] = obj
-        claim["subject_id"] = subject
-        claim["doc_id"] = document_id
-        return claim
-
-    def _process_document(
-        self, prompt_args: dict, doc, doc_index: int
-    ) -> list[dict]:
-        record_delimiter = prompt_args.get(
-            self._record_delimiter_key, DEFAULT_RECORD_DELIMITER
-        )
-        completion_delimiter = prompt_args.get(
-            self._completion_delimiter_key, DEFAULT_COMPLETION_DELIMITER
-        )
-        variables = {
-                        self._input_text_key: doc,
-                        **prompt_args,
-                    }
-        text = perform_variable_replacements(self._extraction_prompt, variables=variables)
-        gen_conf = {"temperature": 0.5}
-        results = self._chat(text, [{"role": "user", "content": "Output:"}], gen_conf)
-        claims = results.strip().removesuffix(completion_delimiter)
-        history = [{"role": "system", "content": text}, {"role": "assistant", "content": results}]
-
-        # Repeat to ensure we maximize entity count
-        for i in range(self._max_gleanings):
-            text = perform_variable_replacements(CONTINUE_PROMPT, history=history, variables=variables)
-            history.append({"role": "user", "content": text})
-            extension = self._chat("", history, gen_conf)
-            claims += record_delimiter + extension.strip().removesuffix(
-                completion_delimiter
-            )
-
-            # If this isn't the last loop, check to see if we should continue
-            if i >= self._max_gleanings - 1:
-                break
-
-            history.append({"role": "assistant", "content": extension})
-            history.append({"role": "user", "content": LOOP_PROMPT})
-            continuation = self._chat("", history, self._loop_args)
-            if continuation != "YES":
-                break
-
-        result = self._parse_claim_tuples(claims, prompt_args)
-        for r in result:
-            r["doc_id"] = f"{doc_index}"
-        return result
-
-    def _parse_claim_tuples(
-        self, claims: str, prompt_variables: dict
-    ) -> list[dict[str, Any]]:
-        """Parse claim tuples."""
-        record_delimiter = prompt_variables.get(
-            self._record_delimiter_key, DEFAULT_RECORD_DELIMITER
-        )
-        completion_delimiter = prompt_variables.get(
-            self._completion_delimiter_key, DEFAULT_COMPLETION_DELIMITER
-        )
-        tuple_delimiter = prompt_variables.get(
-            self._tuple_delimiter_key, DEFAULT_TUPLE_DELIMITER
-        )
-
-        def pull_field(index: int, fields: list[str]) -> str | None:
-            return fields[index].strip() if len(fields) > index else None
-
-        result: list[dict[str, Any]] = []
-        claims_values = (
-            claims.strip().removesuffix(completion_delimiter).split(record_delimiter)
-        )
-        for claim in claims_values:
-            claim = claim.strip().removeprefix("(").removesuffix(")")
-            claim = re.sub(r".*Output:", "", claim)
-
-            # Ignore the completion delimiter
-            if claim == completion_delimiter:
-                continue
-
-            claim_fields = claim.split(tuple_delimiter)
-            o = {
-                "subject_id": pull_field(0, claim_fields),
-                "object_id": pull_field(1, claim_fields),
-                "type": pull_field(2, claim_fields),
-                "status": pull_field(3, claim_fields),
-                "start_date": pull_field(4, claim_fields),
-                "end_date": pull_field(5, claim_fields),
-                "description": pull_field(6, claim_fields),
-                "source_text": pull_field(7, claim_fields),
-                "doc_id": pull_field(8, claim_fields),
-            }
-            if any([not o["subject_id"], not o["object_id"], o["subject_id"].lower() == "none", o["object_id"] == "none"]):
-                continue
-            result.append(o)
-        return result
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('-t', '--tenant_id', default=False, help="Tenant ID", action='store', required=True)
-    parser.add_argument('-d', '--doc_id', default=False, help="Document ID", action='store', required=True)
-    args = parser.parse_args()
-
-    from api.db import LLMType
-    from api.db.services.llm_service import LLMBundle
-    from api import settings
-    from api.db.services.knowledgebase_service import KnowledgebaseService
-
-    kb_ids = KnowledgebaseService.get_kb_ids(args.tenant_id)
-
-    ex = ClaimExtractor(LLMBundle(args.tenant_id, LLMType.CHAT))
-    docs = [d["content_with_weight"] for d in settings.retrievaler.chunk_list(args.doc_id, args.tenant_id, kb_ids, max_count=12, fields=["content_with_weight"])]
-    info = {
-        "input_text": docs,
-        "entity_specs": "organization, person",
-        "claim_description": ""
-    }
-    claim = ex(info)
-    logging.info(json.dumps(claim.output, ensure_ascii=False, indent=2))
--- a/graphrag/general/claim_prompt.py
+++ b/graphrag/general/claim_prompt.py
@ -1,71 +0,0 @@
-# Copyright (c) 2024 Microsoft Corporation.
-# Licensed under the MIT License
-"""
-Reference:
- - [graphrag](https://github.com/microsoft/graphrag)
-"""
-
-CLAIM_EXTRACTION_PROMPT = """
-################
-Target activity-
-################
-You are an intelligent assistant that helps a human analyst to analyze claims against certain entities presented in a text document.
-
-################
-Goal-
-################
-Given a text document that is potentially relevant to this activity, an entity specification, and a claim description, extract all entities that match the entity specification and all claims against those entities.
-
-################
-Steps-
-################
- - 1. Extract all named entities that match the predefined entity specification. Entity specification can either be a list of entity names or a list of entity types.
- - 2. For each entity identified in step 1, extract all claims associated with the entity. Claims need to match the specified claim description, and the entity should be the subject of the claim.
-    For each claim, extract the following information:
-    - Subject: name of the entity that is subject of the claim, capitalized. The subject entity is one that committed the action described in the claim. Subject needs to be one of the named entities identified in step 1.
-    - Object: name of the entity that is object of the claim, capitalized. The object entity is one that either reports/handles or is affected by the action described in the claim. If object entity is unknown, use **NONE**.
-    - Claim Type: overall category of the claim, capitalized. Name it in a way that can be repeated across multiple text inputs, so that similar claims share the same claim type
-    - Claim Status: **TRUE**, **FALSE**, or **SUSPECTED**. TRUE means the claim is confirmed, FALSE means the claim is found to be False, SUSPECTED means the claim is not verified.
-    - Claim Description: Detailed description explaining the reasoning behind the claim, together with all the related evidence and references.
-    - Claim Date: Period (start_date, end_date) when the claim was made. Both start_date and end_date should be in ISO-8601 format. If the claim was made on a single date rather than a date range, set the same date for both start_date and end_date. If date is unknown, return **NONE**.
-    - Claim Source Text: List of **all** quotes from the original text that are relevant to the claim.
-
- - 3. Format each claim as (<subject_entity>{tuple_delimiter}<object_entity>{tuple_delimiter}<claim_type>{tuple_delimiter}<claim_status>{tuple_delimiter}<claim_start_date>{tuple_delimiter}<claim_end_date>{tuple_delimiter}<claim_description>{tuple_delimiter}<claim_source>)
- - 4. Return output in language of the 'Text' as a single list of all the claims identified in steps 1 and 2. Use **{record_delimiter}** as the list delimiter.
- - 5. If there's nothing satisfy the above requirements, just keep output empty.
- - 6. When finished, output {completion_delimiter}
-
-################
-Examples-
-################
-Example 1:
-Entity specification: organization
-Claim description: red flags associated with an entity
-Text: According to an article on 2022/01/10, Company A was fined for bid rigging while participating in multiple public tenders published by Government Agency B. The company is owned by Person C who was suspected of engaging in corruption activities in 2015.
-Output:
-(COMPANY A{tuple_delimiter}GOVERNMENT AGENCY B{tuple_delimiter}ANTI-COMPETITIVE PRACTICES{tuple_delimiter}TRUE{tuple_delimiter}2022-01-10T00:00:00{tuple_delimiter}2022-01-10T00:00:00{tuple_delimiter}Company A was found to engage in anti-competitive practices because it was fined for bid rigging in multiple public tenders published by Government Agency B according to an article published on 2022/01/10{tuple_delimiter}According to an article published on 2022/01/10, Company A was fined for bid rigging while participating in multiple public tenders published by Government Agency B.)
-{completion_delimiter}
-
-###########################
-Example 2:
-Entity specification: Company A, Person C
-Claim description: red flags associated with an entity
-Text: According to an article on 2022/01/10, Company A was fined for bid rigging while participating in multiple public tenders published by Government Agency B. The company is owned by Person C who was suspected of engaging in corruption activities in 2015.
-Output:
-(COMPANY A{tuple_delimiter}GOVERNMENT AGENCY B{tuple_delimiter}ANTI-COMPETITIVE PRACTICES{tuple_delimiter}TRUE{tuple_delimiter}2022-01-10T00:00:00{tuple_delimiter}2022-01-10T00:00:00{tuple_delimiter}Company A was found to engage in anti-competitive practices because it was fined for bid rigging in multiple public tenders published by Government Agency B according to an article published on 2022/01/10{tuple_delimiter}According to an article published on 2022/01/10, Company A was fined for bid rigging while participating in multiple public tenders published by Government Agency B.)
-{record_delimiter}
-(PERSON C{tuple_delimiter}NONE{tuple_delimiter}CORRUPTION{tuple_delimiter}SUSPECTED{tuple_delimiter}2015-01-01T00:00:00{tuple_delimiter}2015-12-30T00:00:00{tuple_delimiter}Person C was suspected of engaging in corruption activities in 2015{tuple_delimiter}The company is owned by Person C who was suspected of engaging in corruption activities in 2015)
-{completion_delimiter}
-
-################
-Real Data-
-################
-Use the following input for your answer.
-Entity specification: {entity_specs}
-Claim description: {claim_description}
-Text: {input_text}
-Output:"""
-
-
-CONTINUE_PROMPT = "MANY entities were missed in the last extraction.  Add them below using the same format(see 'Steps', start with the 'Output').\nOutput: "
-LOOP_PROMPT = "It appears some entities may have still been missed.  Answer YES {tuple_delimiter} NO if there are still entities that need to be added.\n"
--- a/graphrag/general/community_reports_extractor.py
+++ b/graphrag/general/community_reports_extractor.py
@ -17,9 +17,10 @@ from graphrag.general.community_report_prompt import COMMUNITY_REPORT_PROMPT
 from graphrag.general.extractor import Extractor
 from graphrag.general.leiden import add_community_info2graph
 from rag.llm.chat_model import Base as CompletionLLM
-from graphrag.utils import perform_variable_replacements, dict_has_keys_with_types
+from graphrag.utils import perform_variable_replacements, dict_has_keys_with_types, chat_limiter
 from rag.utils import num_tokens_from_string
 from timeit import default_timer as timer
+import trio


@dataclass
@ -52,7 +53,7 @@ class CommunityReportsExtractor(Extractor):
        self._extraction_prompt = COMMUNITY_REPORT_PROMPT
        self._max_report_length = max_report_length or 1500

-    def __call__(self, graph: nx.Graph, callback: Callable | None = None):
+    async def __call__(self, graph: nx.Graph, callback: Callable | None = None):
        for node_degree in graph.degree:
            graph.nodes[str(node_degree[0])]["rank"] = int(node_degree[1])

@ -86,28 +87,25 @@ class CommunityReportsExtractor(Extractor):
                }
                text = perform_variable_replacements(self._extraction_prompt, variables=prompt_variables)
                gen_conf = {"temperature": 0.3}
-                try:
-                    response = self._chat(text, [{"role": "user", "content": "Output:"}], gen_conf)
-                    token_count += num_tokens_from_string(text + response)
-                    response = re.sub(r"^[^\{]*", "", response)
-                    response = re.sub(r"[^\}]*$", "", response)
-                    response = re.sub(r"\{\{", "{", response)
-                    response = re.sub(r"\}\}", "}", response)
-                    logging.debug(response)
-                    response = json.loads(response)
-                    if not dict_has_keys_with_types(response, [
-                                ("title", str),
-                                ("summary", str),
-                                ("findings", list),
-                                ("rating", float),
-                                ("rating_explanation", str),
-                            ]):
-                        continue
-                    response["weight"] = weight
-                    response["entities"] = ents
-                except Exception:
-                    logging.exception("CommunityReportsExtractor got exception")
+                async with chat_limiter:
+                    response = await trio.to_thread.run_sync(lambda: self._chat(text, [{"role": "user", "content": "Output:"}], gen_conf))
+                token_count += num_tokens_from_string(text + response)
+                response = re.sub(r"^[^\{]*", "", response)
+                response = re.sub(r"[^\}]*$", "", response)
+                response = re.sub(r"\{\{", "{", response)
+                response = re.sub(r"\}\}", "}", response)
+                logging.debug(response)
+                response = json.loads(response)
+                if not dict_has_keys_with_types(response, [
+                            ("title", str),
+                            ("summary", str),
+                            ("findings", list),
+                            ("rating", float),
+                            ("rating_explanation", str),
+                        ]):
                    continue
+                response["weight"] = weight
+                response["entities"] = ents

                add_community_info2graph(graph, ents, response["title"])
                res_str.append(self._get_text_output(response))
--- a/graphrag/general/extractor.py
+++ b/graphrag/general/extractor.py
@ -14,16 +14,15 @@
 #  limitations under the License.
 #
 import logging
-import os
 import re
 from collections import defaultdict, Counter
-from concurrent.futures import ThreadPoolExecutor
 from copy import deepcopy
 from typing import Callable
+import trio

 from graphrag.general.graph_prompt import SUMMARIZE_DESCRIPTIONS_PROMPT
 from graphrag.utils import get_llm_cache, set_llm_cache, handle_single_entity_extraction, \
-    handle_single_relationship_extraction, split_string_by_multi_markers, flat_uniq_list
+    handle_single_relationship_extraction, split_string_by_multi_markers, flat_uniq_list, chat_limiter
 from rag.llm.chat_model import Base as CompletionLLM
 from rag.utils import truncate

@ -91,54 +90,50 @@ class Extractor:
                )
        return dict(maybe_nodes), dict(maybe_edges)

-    def __call__(
+    async def __call__(
        self, chunks: list[tuple[str, str]],
            callback: Callable | None = None
    ):

-        results = []
-        max_workers = int(os.environ.get('GRAPH_EXTRACTOR_MAX_WORKERS', 10))
-        with ThreadPoolExecutor(max_workers=max_workers) as exe:
-            threads = []
+        self.callback = callback
+        start_ts = trio.current_time()
+        out_results = []
+        async with trio.open_nursery() as nursery:
            for i, (cid, ck) in enumerate(chunks):
                ck = truncate(ck, int(self._llm.max_length*0.8))
-                threads.append(
-                    exe.submit(self._process_single_content, (cid, ck)))
-
-            for i, _ in enumerate(threads):
-                n, r, tc = _.result()
-                if not isinstance(n, Exception):
-                    results.append((n, r))
-                    if callback:
-                        callback(0.5 + 0.1 * i / len(threads), f"Entities extraction progress ... {i + 1}/{len(threads)} ({tc} tokens)")
-                elif callback:
-                    callback(msg="Knowledge graph extraction error:{}".format(str(n)))
+                nursery.start_soon(self._process_single_content, (cid, ck), i, len(chunks), out_results)

        maybe_nodes = defaultdict(list)
        maybe_edges = defaultdict(list)
-        for m_nodes, m_edges in results:
+        sum_token_count = 0
+        for m_nodes, m_edges, token_count in out_results:
            for k, v in m_nodes.items():
                maybe_nodes[k].extend(v)
            for k, v in m_edges.items():
                maybe_edges[tuple(sorted(k))].extend(v)
-        logging.info("Inserting entities into storage...")
+            sum_token_count += token_count
+        now = trio.current_time()
+        if callback:
+            callback(msg = f"Entities and relationships extraction done, {len(maybe_nodes)} nodes, {len(maybe_edges)} edges, {sum_token_count} tokens, {now-start_ts:.2f}s.")
+        start_ts = now
+        logging.info("Entities merging...")
        all_entities_data = []
-        with ThreadPoolExecutor(max_workers=max_workers) as exe:
-            threads = []
+        async with trio.open_nursery() as nursery:
            for en_nm, ents in maybe_nodes.items():
-                threads.append(
-                    exe.submit(self._merge_nodes, en_nm, ents))
-            for t in threads:
-                n = t.result()
-                if not isinstance(n, Exception):
-                    all_entities_data.append(n)
-                elif callback:
-                    callback(msg="Knowledge graph nodes merging error: {}".format(str(n)))
+                nursery.start_soon(self._merge_nodes, en_nm, ents, all_entities_data)
+        now = trio.current_time()
+        if callback:
+            callback(msg = f"Entities merging done, {now-start_ts:.2f}s.")

-        logging.info("Inserting relationships into storage...")
+        start_ts = now
+        logging.info("Relationships merging...")
        all_relationships_data = []
-        for (src, tgt), rels in maybe_edges.items():
-            all_relationships_data.append(self._merge_edges(src, tgt, rels))
+        async with trio.open_nursery() as nursery:
+            for (src, tgt), rels in maybe_edges.items():
+                nursery.start_soon(self._merge_edges, src, tgt, rels, all_relationships_data)
+        now = trio.current_time()
+        if callback:
+            callback(msg = f"Relationships merging done, {now-start_ts:.2f}s.")

        if not len(all_entities_data) and not len(all_relationships_data):
            logging.warning(
@ -152,7 +147,7 @@ class Extractor:

        return all_entities_data, all_relationships_data

-    def _merge_nodes(self, entity_name: str, entities: list[dict]):
+    async def _merge_nodes(self, entity_name: str, entities: list[dict], all_relationships_data):
        if not entities:
            return
        already_entity_types = []
@ -176,26 +171,22 @@ class Extractor:
            sorted(set([dp["description"] for dp in entities] + already_description))
        )
        already_source_ids = flat_uniq_list(entities, "source_id")
-        try:
-            description = self._handle_entity_relation_summary(
-                entity_name, description
-            )
-            node_data = dict(
-                entity_type=entity_type,
-                description=description,
-                source_id=already_source_ids,
-            )
-            node_data["entity_name"] = entity_name
-            self._set_entity_(entity_name, node_data)
-            return node_data
-        except Exception as e:
-            return e
+        description = await self._handle_entity_relation_summary(entity_name, description)
+        node_data = dict(
+            entity_type=entity_type,
+            description=description,
+            source_id=already_source_ids,
+        )
+        node_data["entity_name"] = entity_name
+        self._set_entity_(entity_name, node_data)
+        all_relationships_data.append(node_data)

-    def _merge_edges(
+    async def _merge_edges(
            self,
            src_id: str,
            tgt_id: str,
-            edges_data: list[dict]
+            edges_data: list[dict],
+            all_relationships_data
    ):
        if not edges_data:
            return
@ -226,7 +217,7 @@ class Extractor:
                        "description": description,
                        "entity_type": 'UNKNOWN'
                    })
-        description = self._handle_entity_relation_summary(
+        description = await self._handle_entity_relation_summary(
            f"({src_id}, {tgt_id})", description
        )
        edge_data = dict(
@ -238,10 +229,9 @@ class Extractor:
            source_id=source_id
        )
        self._set_relation_(src_id, tgt_id, edge_data)
+        all_relationships_data.append(edge_data)

-        return edge_data
-
-    def _handle_entity_relation_summary(
+    async def _handle_entity_relation_summary(
            self,
            entity_or_relation_name: str,
            description: str
@ -256,5 +246,6 @@ class Extractor:
        )
        use_prompt = prompt_template.format(**context_base)
        logging.info(f"Trigger summary: {entity_or_relation_name}")
-        summary = self._chat(use_prompt, [{"role": "user", "content": "Output: "}], {"temperature": 0.8})
+        async with chat_limiter:
+            summary = await trio.to_thread.run_sync(lambda: self._chat(use_prompt, [{"role": "user", "content": "Output: "}], {"temperature": 0.8}))
        return summary
--- a/graphrag/general/graph_extractor.py
+++ b/graphrag/general/graph_extractor.py
@ -5,15 +5,15 @@ Reference:
 - [graphrag](https://github.com/microsoft/graphrag)
 """

-import logging
 import re
 from typing import Any, Callable
 from dataclasses import dataclass
 import tiktoken
+import trio

 from graphrag.general.extractor import Extractor, ENTITY_EXTRACTION_MAX_GLEANINGS, DEFAULT_ENTITY_TYPES
 from graphrag.general.graph_prompt import GRAPH_EXTRACTION_PROMPT, CONTINUE_PROMPT, LOOP_PROMPT
-from graphrag.utils import ErrorHandlerFn, perform_variable_replacements
+from graphrag.utils import ErrorHandlerFn, perform_variable_replacements, chat_limiter
 from rag.llm.chat_model import Base as CompletionLLM
 import networkx as nx
 from rag.utils import num_tokens_from_string
@ -102,53 +102,47 @@ class GraphExtractor(Extractor):
            self._entity_types_key: ",".join(DEFAULT_ENTITY_TYPES),
        }

-    def _process_single_content(self,
-                                chunk_key_dp: tuple[str, str]
-                                ):
+    async def _process_single_content(self, chunk_key_dp: tuple[str, str], chunk_seq: int, num_chunks: int, out_results):
        token_count = 0
-
        chunk_key = chunk_key_dp[0]
        content = chunk_key_dp[1]
        variables = {
            **self._prompt_variables,
            self._input_text_key: content,
        }
-        try:
-            gen_conf = {"temperature": 0.3}
-            hint_prompt = perform_variable_replacements(self._extraction_prompt, variables=variables)
-            response = self._chat(hint_prompt, [{"role": "user", "content": "Output:"}], gen_conf)
-            token_count += num_tokens_from_string(hint_prompt + response)
-
-            results = response or ""
-            history = [{"role": "system", "content": hint_prompt}, {"role": "user", "content": response}]
-
-            # Repeat to ensure we maximize entity count
-            for i in range(self._max_gleanings):
-                text = perform_variable_replacements(CONTINUE_PROMPT, history=history, variables=variables)
-                history.append({"role": "user", "content": text})
-                response = self._chat("", history, gen_conf)
-                token_count += num_tokens_from_string("\n".join([m["content"] for m in history]) + response)
-                results += response or ""
-
-                # if this is the final glean, don't bother updating the continuation flag
-                if i >= self._max_gleanings - 1:
-                    break
-                history.append({"role": "assistant", "content": response})
-                history.append({"role": "user", "content": LOOP_PROMPT})
-                continuation = self._chat("", history, {"temperature": 0.8})
-                token_count += num_tokens_from_string("\n".join([m["content"] for m in history]) + response)
-                if continuation != "YES":
-                    break
-
-            record_delimiter = variables.get(self._record_delimiter_key, DEFAULT_RECORD_DELIMITER)
-            tuple_delimiter = variables.get(self._tuple_delimiter_key, DEFAULT_TUPLE_DELIMITER)
-            records = [re.sub(r"^\(|\)$", "", r.strip()) for r in results.split(record_delimiter)]
-            records = [r for r in records if r.strip()]
-            maybe_nodes, maybe_edges = self._entities_and_relations(chunk_key, records, tuple_delimiter)
-            return maybe_nodes, maybe_edges, token_count
-        except Exception as e:
-            logging.exception("error extracting graph")
-            return e, None, None
+        gen_conf = {"temperature": 0.3}
+        hint_prompt = perform_variable_replacements(self._extraction_prompt, variables=variables)
+        async with chat_limiter:
+            response = await trio.to_thread.run_sync(lambda: self._chat(hint_prompt, [{"role": "user", "content": "Output:"}], gen_conf))
+        token_count += num_tokens_from_string(hint_prompt + response)

+        results = response or ""
+        history = [{"role": "system", "content": hint_prompt}, {"role": "user", "content": response}]

+        # Repeat to ensure we maximize entity count
+        for i in range(self._max_gleanings):
+            text = perform_variable_replacements(CONTINUE_PROMPT, history=history, variables=variables)
+            history.append({"role": "user", "content": text})
+            async with chat_limiter:
+                response = await trio.to_thread.run_sync(lambda: self._chat("", history, gen_conf))
+            token_count += num_tokens_from_string("\n".join([m["content"] for m in history]) + response)
+            results += response or ""

+            # if this is the final glean, don't bother updating the continuation flag
+            if i >= self._max_gleanings - 1:
+                break
+            history.append({"role": "assistant", "content": response})
+            history.append({"role": "user", "content": LOOP_PROMPT})
+            async with chat_limiter:
+                continuation = await trio.to_thread.run_sync(lambda: self._chat("", history, {"temperature": 0.8}))
+            token_count += num_tokens_from_string("\n".join([m["content"] for m in history]) + response)
+            if continuation != "YES":
+                break
+        record_delimiter = variables.get(self._record_delimiter_key, DEFAULT_RECORD_DELIMITER)
+        tuple_delimiter = variables.get(self._tuple_delimiter_key, DEFAULT_TUPLE_DELIMITER)
+        records = [re.sub(r"^\(|\)$", "", r.strip()) for r in results.split(record_delimiter)]
+        records = [r for r in records if r.strip()]
+        maybe_nodes, maybe_edges = self._entities_and_relations(chunk_key, records, tuple_delimiter)
+        out_results.append((maybe_nodes, maybe_edges, token_count))
+        if self.callback:
+            self.callback(0.5+0.1*len(out_results)/num_chunks, msg = f"Entities extraction of chunk {chunk_seq} {len(out_results)}/{num_chunks} done, {len(maybe_nodes)} nodes, {len(maybe_edges)} edges, {token_count} tokens.")
--- a/graphrag/general/index.py
+++ b/graphrag/general/index.py
@ -17,6 +17,7 @@ import json
 import logging
 from functools import reduce, partial
 import networkx as nx
+import trio

 from api import settings
 from graphrag.general.community_reports_extractor import CommunityReportsExtractor
@ -41,18 +42,24 @@ class Dealer:
                 embed_bdl=None,
                 callback=None
                 ):
-        docids = list(set([docid for docid,_ in chunks]))
+        self.tenant_id = tenant_id
+        self.kb_id = kb_id
+        self.chunks = chunks
        self.llm_bdl = llm_bdl
        self.embed_bdl = embed_bdl
-        ext = extractor(self.llm_bdl, language=language,
+        self.ext = extractor(self.llm_bdl, language=language,
                        entity_types=entity_types,
                        get_entity=partial(get_entity, tenant_id, kb_id),
                        set_entity=partial(set_entity, tenant_id, kb_id, self.embed_bdl),
                        get_relation=partial(get_relation, tenant_id, kb_id),
                        set_relation=partial(set_relation, tenant_id, kb_id, self.embed_bdl)
                        )
-        ents, rels = ext(chunks, callback)
        self.graph = nx.Graph()
+        self.callback = callback
+
+    async def __call__(self):
+        docids = list(set([docid for docid, _ in self.chunks]))
+        ents, rels = await self.ext(self.chunks, self.callback)
        for en in ents:
            self.graph.add_node(en["entity_name"], entity_type=en["entity_type"])#, description=en["description"])

@ -64,16 +71,16 @@ class Dealer:
                #description=rel["description"]
            )

-        with RedisDistributedLock(kb_id, 60*60):
-            old_graph, old_doc_ids = get_graph(tenant_id, kb_id)
+        with RedisDistributedLock(self.kb_id, 60*60):
+            old_graph, old_doc_ids = get_graph(self.tenant_id, self.kb_id)
            if old_graph is not None:
                logging.info("Merge with an exiting graph...................")
                self.graph = reduce(graph_merge, [old_graph, self.graph])
-            update_nodes_pagerank_nhop_neighbour(tenant_id, kb_id, self.graph, 2)
+            update_nodes_pagerank_nhop_neighbour(self.tenant_id, self.kb_id, self.graph, 2)
            if old_doc_ids:
                docids.extend(old_doc_ids)
                docids = list(set(docids))
-            set_graph(tenant_id, kb_id, self.graph, docids)
+            set_graph(self.tenant_id, self.kb_id, self.graph, docids)


 class WithResolution(Dealer):
@ -84,47 +91,50 @@ class WithResolution(Dealer):
                 embed_bdl=None,
                 callback=None
                 ):
+        self.tenant_id = tenant_id
+        self.kb_id = kb_id
        self.llm_bdl = llm_bdl
        self.embed_bdl = embed_bdl
-
-        with RedisDistributedLock(kb_id, 60*60):
-            self.graph, doc_ids = get_graph(tenant_id, kb_id)
+        self.callback = callback
+    async def __call__(self):
+        with RedisDistributedLock(self.kb_id, 60*60):
+            self.graph, doc_ids = await trio.to_thread.run_sync(lambda: get_graph(self.tenant_id, self.kb_id))
            if not self.graph:
-                logging.error(f"Faild to fetch the graph. tenant_id:{kb_id}, kb_id:{kb_id}")
-                if callback:
-                    callback(-1, msg="Faild to fetch the graph.")
+                logging.error(f"Faild to fetch the graph. tenant_id:{self.kb_id}, kb_id:{self.kb_id}")
+                if self.callback:
+                    self.callback(-1, msg="Faild to fetch the graph.")
                return

-            if callback:
-                callback(msg="Fetch the existing graph.")
+            if self.callback:
+                self.callback(msg="Fetch the existing graph.")
            er = EntityResolution(self.llm_bdl,
-                                  get_entity=partial(get_entity, tenant_id, kb_id),
-                                  set_entity=partial(set_entity, tenant_id, kb_id, self.embed_bdl),
-                                  get_relation=partial(get_relation, tenant_id, kb_id),
-                                  set_relation=partial(set_relation, tenant_id, kb_id, self.embed_bdl))
-            reso = er(self.graph)
+                                  get_entity=partial(get_entity, self.tenant_id, self.kb_id),
+                                  set_entity=partial(set_entity, self.tenant_id, self.kb_id, self.embed_bdl),
+                                  get_relation=partial(get_relation, self.tenant_id, self.kb_id),
+                                  set_relation=partial(set_relation, self.tenant_id, self.kb_id, self.embed_bdl))
+            reso = await er(self.graph)
            self.graph = reso.graph
            logging.info("Graph resolution is done. Remove {} nodes.".format(len(reso.removed_entities)))
-            if callback:
-                callback(msg="Graph resolution is done. Remove {} nodes.".format(len(reso.removed_entities)))
-            update_nodes_pagerank_nhop_neighbour(tenant_id, kb_id, self.graph, 2)
-            set_graph(tenant_id, kb_id, self.graph, doc_ids)
+            if self.callback:
+                self.callback(msg="Graph resolution is done. Remove {} nodes.".format(len(reso.removed_entities)))
+            await trio.to_thread.run_sync(lambda: update_nodes_pagerank_nhop_neighbour(self.tenant_id, self.kb_id, self.graph, 2))
+            await trio.to_thread.run_sync(lambda: set_graph(self.tenant_id, self.kb_id, self.graph, doc_ids))

-        settings.docStoreConn.delete({
+        await trio.to_thread.run_sync(lambda: settings.docStoreConn.delete({
            "knowledge_graph_kwd": "relation",
-            "kb_id": kb_id,
+            "kb_id": self.kb_id,
            "from_entity_kwd": reso.removed_entities
-        }, search.index_name(tenant_id), kb_id)
-        settings.docStoreConn.delete({
+        }, search.index_name(self.tenant_id), self.kb_id))
+        await trio.to_thread.run_sync(lambda: settings.docStoreConn.delete({
            "knowledge_graph_kwd": "relation",
-            "kb_id": kb_id,
+            "kb_id": self.kb_id,
            "to_entity_kwd": reso.removed_entities
-        }, search.index_name(tenant_id), kb_id)
-        settings.docStoreConn.delete({
+        }, search.index_name(self.tenant_id), self.kb_id))
+        await trio.to_thread.run_sync(lambda: settings.docStoreConn.delete({
            "knowledge_graph_kwd": "entity",
-            "kb_id": kb_id,
+            "kb_id": self.kb_id,
            "entity_kwd": reso.removed_entities
-        }, search.index_name(tenant_id), kb_id)
+        }, search.index_name(self.tenant_id), self.kb_id))


 class WithCommunity(Dealer):
@ -136,38 +146,41 @@ class WithCommunity(Dealer):
                 callback=None
                 ):

+        self.tenant_id = tenant_id
+        self.kb_id = kb_id
        self.community_structure = None
        self.community_reports = None
        self.llm_bdl = llm_bdl
        self.embed_bdl = embed_bdl
-
-        with RedisDistributedLock(kb_id, 60*60):
-            self.graph, doc_ids = get_graph(tenant_id, kb_id)
+        self.callback = callback
+    async def __call__(self):
+        with RedisDistributedLock(self.kb_id, 60*60):
+            self.graph, doc_ids = get_graph(self.tenant_id, self.kb_id)
            if not self.graph:
-                logging.error(f"Faild to fetch the graph. tenant_id:{kb_id}, kb_id:{kb_id}")
-                if callback:
-                    callback(-1, msg="Faild to fetch the graph.")
+                logging.error(f"Faild to fetch the graph. tenant_id:{self.kb_id}, kb_id:{self.kb_id}")
+                if self.callback:
+                    self.callback(-1, msg="Faild to fetch the graph.")
                return
-            if callback:
-                callback(msg="Fetch the existing graph.")
+            if self.callback:
+                self.callback(msg="Fetch the existing graph.")

            cr = CommunityReportsExtractor(self.llm_bdl,
-                                  get_entity=partial(get_entity, tenant_id, kb_id),
-                                  set_entity=partial(set_entity, tenant_id, kb_id, self.embed_bdl),
-                                  get_relation=partial(get_relation, tenant_id, kb_id),
-                                  set_relation=partial(set_relation, tenant_id, kb_id, self.embed_bdl))
-            cr = cr(self.graph, callback=callback)
+                                  get_entity=partial(get_entity, self.tenant_id, self.kb_id),
+                                  set_entity=partial(set_entity, self.tenant_id, self.kb_id, self.embed_bdl),
+                                  get_relation=partial(get_relation, self.tenant_id, self.kb_id),
+                                  set_relation=partial(set_relation, self.tenant_id, self.kb_id, self.embed_bdl))
+            cr = await cr(self.graph, callback=self.callback)
            self.community_structure = cr.structured_output
            self.community_reports = cr.output
-            set_graph(tenant_id, kb_id, self.graph, doc_ids)
+            await trio.to_thread.run_sync(lambda: set_graph(self.tenant_id, self.kb_id, self.graph, doc_ids))

-        if callback:
-            callback(msg="Graph community extraction is done. Indexing {} reports.".format(len(cr.structured_output)))
+        if self.callback:
+            self.callback(msg="Graph community extraction is done. Indexing {} reports.".format(len(cr.structured_output)))

-        settings.docStoreConn.delete({
+        await trio.to_thread.run_sync(lambda: settings.docStoreConn.delete({
            "knowledge_graph_kwd": "community_report",
-            "kb_id": kb_id
-        }, search.index_name(tenant_id), kb_id)
+            "kb_id": self.kb_id
+        }, search.index_name(self.tenant_id), self.kb_id))

        for stru, rep in zip(self.community_structure, self.community_reports):
            obj = {
@ -183,7 +196,7 @@ class WithCommunity(Dealer):
                "weight_flt": stru["weight"],
                "entities_kwd": stru["entities"],
                "important_kwd": stru["entities"],
-                "kb_id": kb_id,
+                "kb_id": self.kb_id,
                "source_id": doc_ids,
                "available_int": 0
            }
@ -193,5 +206,5 @@ class WithCommunity(Dealer):
            #    chunk["q_%d_vec" % len(ebd[0])] = ebd[0]
            #except Exception as e:
            #    logging.exception(f"Fail to embed entity relation: {e}")
-            settings.docStoreConn.insert([{"id": chunk_id(chunk), **chunk}], search.index_name(tenant_id))
+            await trio.to_thread.run_sync(lambda: settings.docStoreConn.insert([{"id": chunk_id(chunk), **chunk}], search.index_name(self.tenant_id)))

--- a/graphrag/general/mind_map_extractor.py
+++ b/graphrag/general/mind_map_extractor.py
@ -16,16 +16,14 @@

 import logging
 import collections
-import os
 import re
-import traceback
 from typing import Any
-from concurrent.futures import ThreadPoolExecutor
 from dataclasses import dataclass
+import trio

 from graphrag.general.extractor import Extractor
 from graphrag.general.mind_map_prompt import MIND_MAP_EXTRACTION_PROMPT
-from graphrag.utils import ErrorHandlerFn, perform_variable_replacements
+from graphrag.utils import ErrorHandlerFn, perform_variable_replacements, chat_limiter
 from rag.llm.chat_model import Base as CompletionLLM
 import markdown_to_json
 from functools import reduce
@ -80,63 +78,47 @@ class MindMapExtractor(Extractor):
                )
        return arr

-    def __call__(
+    async def __call__(
            self, sections: list[str], prompt_variables: dict[str, Any] | None = None
    ) -> MindMapResult:
        """Call method definition."""
        if prompt_variables is None:
            prompt_variables = {}

-        try:
-            res = []
-            max_workers = int(os.environ.get('MINDMAP_EXTRACTOR_MAX_WORKERS', 12))
-            with ThreadPoolExecutor(max_workers=max_workers) as exe:
-                threads = []
-                token_count = max(self._llm.max_length * 0.8, self._llm.max_length - 512)
-                texts = []
-                cnt = 0
-                for i in range(len(sections)):
-                    section_cnt = num_tokens_from_string(sections[i])
-                    if cnt + section_cnt >= token_count and texts:
-                        threads.append(exe.submit(self._process_document, "".join(texts), prompt_variables))
-                        texts = []
-                        cnt = 0
-                    texts.append(sections[i])
-                    cnt += section_cnt
-                if texts:
-                    threads.append(exe.submit(self._process_document, "".join(texts), prompt_variables))
-
-                for i, _ in enumerate(threads):
-                    res.append(_.result())
-
-            if not res:
-                return MindMapResult(output={"id": "root", "children": []})
-
-            merge_json = reduce(self._merge, res)
-            if len(merge_json) > 1:
-                keys = [re.sub(r"\*+", "", k) for k, v in merge_json.items() if isinstance(v, dict)]
-                keyset = set(i for i in keys if i)
-                merge_json = {
-                    "id": "root",
-                    "children": [
-                        {
-                            "id": self._key(k),
-                            "children": self._be_children(v, keyset)
-                        }
-                        for k, v in merge_json.items() if isinstance(v, dict) and self._key(k)
-                    ]
-                }
-            else:
-                k = self._key(list(merge_json.keys())[0])
-                merge_json = {"id": k, "children": self._be_children(list(merge_json.items())[0][1], {k})}
-
-        except Exception as e:
-            logging.exception("error mind graph")
-            self._on_error(
-                e,
-                traceback.format_exc(), None
-            )
-            merge_json = {"error": str(e)}
+        res = []
+        token_count = max(self._llm.max_length * 0.8, self._llm.max_length - 512)
+        texts = []
+        cnt = 0
+        async with trio.open_nursery() as nursery:
+            for i in range(len(sections)):
+                section_cnt = num_tokens_from_string(sections[i])
+                if cnt + section_cnt >= token_count and texts:
+                    nursery.start_soon(self._process_document, "".join(texts), prompt_variables, res)
+                    texts = []
+                    cnt = 0
+                texts.append(sections[i])
+                cnt += section_cnt
+            if texts:
+                nursery.start_soon(self._process_document, "".join(texts), prompt_variables, res)
+        if not res:
+            return MindMapResult(output={"id": "root", "children": []})
+        merge_json = reduce(self._merge, res)
+        if len(merge_json) > 1:
+            keys = [re.sub(r"\*+", "", k) for k, v in merge_json.items() if isinstance(v, dict)]
+            keyset = set(i for i in keys if i)
+            merge_json = {
+                "id": "root",
+                "children": [
+                    {
+                        "id": self._key(k),
+                        "children": self._be_children(v, keyset)
+                    }
+                    for k, v in merge_json.items() if isinstance(v, dict) and self._key(k)
+                ]
+            }
+        else:
+            k = self._key(list(merge_json.keys())[0])
+            merge_json = {"id": k, "children": self._be_children(list(merge_json.items())[0][1], {k})}

        return MindMapResult(output=merge_json)

@ -181,8 +163,8 @@ class MindMapExtractor(Extractor):

        return self._list_to_kv(to_ret)

-    def _process_document(
-            self, text: str, prompt_variables: dict[str, str]
+    async def _process_document(
+            self, text: str, prompt_variables: dict[str, str], out_res
    ) -> str:
        variables = {
            **prompt_variables,
@ -190,8 +172,9 @@ class MindMapExtractor(Extractor):
        }
        text = perform_variable_replacements(self._mind_map_prompt, variables=variables)
        gen_conf = {"temperature": 0.5}
-        response = self._chat(text, [{"role": "user", "content": "Output:"}], gen_conf)
+        async with chat_limiter:
+            response = await trio.to_thread.run_sync(lambda: self._chat(text, [{"role": "user", "content": "Output:"}], gen_conf))
        response = re.sub(r"```[^\n]*", "", response)
        logging.debug(response)
        logging.debug(self._todict(markdown_to_json.dictify(response)))
-        return self._todict(markdown_to_json.dictify(response))
+        out_res.append(self._todict(markdown_to_json.dictify(response)))
--- a/graphrag/general/smoke.py
+++ b/graphrag/general/smoke.py
@ -18,6 +18,7 @@ import argparse
 import json

 import networkx as nx
+import trio

 from api import settings
 from api.db import LLMType
@ -54,10 +55,13 @@ if __name__ == "__main__":
    embed_bdl = LLMBundle(args.tenant_id, LLMType.EMBEDDING, kb.embd_id)

    dealer = Dealer(GraphExtractor, args.tenant_id, kb_id, llm_bdl, chunks, "English", embed_bdl=embed_bdl)
+    trio.run(dealer())
    print(json.dumps(nx.node_link_data(dealer.graph), ensure_ascii=False, indent=2))

    dealer = WithResolution(args.tenant_id, kb_id, llm_bdl, embed_bdl)
+    trio.run(dealer())
    dealer = WithCommunity(args.tenant_id, kb_id, llm_bdl, embed_bdl)
+    trio.run(dealer())

    print("------------------ COMMUNITY REPORT ----------------------\n", dealer.community_reports)
    print(json.dumps(dealer.community_structure, ensure_ascii=False, indent=2))