diff --git a/conf/llm_factories.json b/conf/llm_factories.json
index 30f5042ed..1c0ea19b6 100644
--- a/conf/llm_factories.json
+++ b/conf/llm_factories.json
@@ -302,6 +302,20 @@
                     "model_type": "chat",
                     "is_tools": true
                 },
+                {
+                    "llm_name": "qwen-plus-2025-07-28",
+                    "tags": "LLM,CHAT,132k",
+                    "max_tokens": 131072,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "qwen-plus-2025-07-14",
+                    "tags": "LLM,CHAT,132k",
+                    "max_tokens": 131072,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
                 {
                     "llm_name": "qwq-plus-latest",
                     "tags": "LLM,CHAT,132k",
@@ -309,6 +323,20 @@
                     "model_type": "chat",
                     "is_tools": true
                 },
+                {
+                    "llm_name": "qwen-flash",
+                    "tags": "LLM,CHAT,1M",
+                    "max_tokens": 1000000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "qwen-flash-2025-07-28",
+                    "tags": "LLM,CHAT,1M",
+                    "max_tokens": 1000000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
                 {
                     "llm_name": "qwen3-coder-480b-a35b-instruct",
                     "tags": "LLM,CHAT,256k",
diff --git a/graphrag/general/extractor.py b/graphrag/general/extractor.py
index 8a8655308..61d89e27c 100644
--- a/graphrag/general/extractor.py
+++ b/graphrag/general/extractor.py
@@ -1,5 +1,5 @@
 #
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -14,17 +14,28 @@
 #  limitations under the License.
 #
 import logging
+import os
 import re
-from collections import defaultdict, Counter
+from collections import Counter, defaultdict
 from copy import deepcopy
 from typing import Callable
-import trio
+
 import networkx as nx
+import trio
 
 from api.utils.api_utils import timeout
 from graphrag.general.graph_prompt import SUMMARIZE_DESCRIPTIONS_PROMPT
-from graphrag.utils import get_llm_cache, set_llm_cache, handle_single_entity_extraction, \
-    handle_single_relationship_extraction, split_string_by_multi_markers, flat_uniq_list, chat_limiter, get_from_to, GraphChange
+from graphrag.utils import (
+    GraphChange,
+    chat_limiter,
+    flat_uniq_list,
+    get_from_to,
+    get_llm_cache,
+    handle_single_entity_extraction,
+    handle_single_relationship_extraction,
+    set_llm_cache,
+    split_string_by_multi_markers,
+)
 from rag.llm.chat_model import Base as CompletionLLM
 from rag.prompts import message_fit_in
 from rag.utils import truncate
@@ -32,6 +43,7 @@ from rag.utils import truncate
 GRAPH_FIELD_SEP = "<SEP>"
 DEFAULT_ENTITY_TYPES = ["organization", "person", "geo", "event", "category"]
 ENTITY_EXTRACTION_MAX_GLEANINGS = 2
+MAX_CONCURRENT_PROCESS_AND_EXTRACT_CHUNK = int(os.environ.get("MAX_CONCURRENT_PROCESS_AND_EXTRACT_CHUNK", 10))
 
 
 class Extractor:
@@ -47,7 +59,7 @@ class Extractor:
         self._language = language
         self._entity_types = entity_types or DEFAULT_ENTITY_TYPES
 
-    @timeout(60*20)
+    @timeout(60 * 20)
     def _chat(self, system, history, gen_conf={}):
         hist = deepcopy(history)
         conf = deepcopy(gen_conf)
@@ -55,6 +67,7 @@ class Extractor:
         if response:
             return response
         _, system_msg = message_fit_in([{"role": "system", "content": system}], int(self._llm.max_length * 0.92))
+        response = ""
         for attempt in range(3):
             try:
                 response = self._llm.chat(system_msg[0]["content"], hist, conf)
@@ -74,38 +87,37 @@ class Extractor:
         maybe_edges = defaultdict(list)
         ent_types = [t.lower() for t in self._entity_types]
         for record in records:
-            record_attributes = split_string_by_multi_markers(
-                record, [tuple_delimiter]
-            )
+            record_attributes = split_string_by_multi_markers(record, [tuple_delimiter])
 
-            if_entities = handle_single_entity_extraction(
-                record_attributes, chunk_key
-            )
+            if_entities = handle_single_entity_extraction(record_attributes, chunk_key)
             if if_entities is not None and if_entities.get("entity_type", "unknown").lower() in ent_types:
                 maybe_nodes[if_entities["entity_name"]].append(if_entities)
                 continue
 
-            if_relation = handle_single_relationship_extraction(
-                record_attributes, chunk_key
-            )
+            if_relation = handle_single_relationship_extraction(record_attributes, chunk_key)
             if if_relation is not None:
-                maybe_edges[(if_relation["src_id"], if_relation["tgt_id"])].append(
-                    if_relation
-                )
+                maybe_edges[(if_relation["src_id"], if_relation["tgt_id"])].append(if_relation)
         return dict(maybe_nodes), dict(maybe_edges)
 
-    async def __call__(
-        self, doc_id: str, chunks: list[str],
-            callback: Callable | None = None
-    ):
-
+    async def __call__(self, doc_id: str, chunks: list[str], callback: Callable | None = None):
         self.callback = callback
         start_ts = trio.current_time()
-        out_results = []
-        async with trio.open_nursery() as nursery:
-            for i, ck in enumerate(chunks):
-                ck = truncate(ck, int(self._llm.max_length*0.8))
-                nursery.start_soon(self._process_single_content, (doc_id, ck), i, len(chunks), out_results)
+
+        async def extract_all(doc_id, chunks, max_concurrency=MAX_CONCURRENT_PROCESS_AND_EXTRACT_CHUNK):
+            out_results = []
+            limiter = trio.Semaphore(max_concurrency)
+
+            async def worker(chunk_key_dp: tuple[str, str], idx: int, total: int):
+                async with limiter:
+                    await self._process_single_content(chunk_key_dp, idx, total, out_results)
+
+            async with trio.open_nursery() as nursery:
+                for i, ck in enumerate(chunks):
+                    nursery.start_soon(worker, (doc_id, ck), i, len(chunks))
+
+            return out_results
+
+        out_results = await extract_all(doc_id, chunks, max_concurrency=MAX_CONCURRENT_PROCESS_AND_EXTRACT_CHUNK)
 
         maybe_nodes = defaultdict(list)
         maybe_edges = defaultdict(list)
@@ -118,7 +130,7 @@ class Extractor:
             sum_token_count += token_count
         now = trio.current_time()
         if callback:
-            callback(msg = f"Entities and relationships extraction done, {len(maybe_nodes)} nodes, {len(maybe_edges)} edges, {sum_token_count} tokens, {now-start_ts:.2f}s.")
+            callback(msg=f"Entities and relationships extraction done, {len(maybe_nodes)} nodes, {len(maybe_edges)} edges, {sum_token_count} tokens, {now - start_ts:.2f}s.")
         start_ts = now
         logging.info("Entities merging...")
         all_entities_data = []
@@ -127,7 +139,7 @@ class Extractor:
                 nursery.start_soon(self._merge_nodes, en_nm, ents, all_entities_data)
         now = trio.current_time()
         if callback:
-            callback(msg = f"Entities merging done, {now-start_ts:.2f}s.")
+            callback(msg=f"Entities merging done, {now - start_ts:.2f}s.")
 
         start_ts = now
         logging.info("Relationships merging...")
@@ -137,12 +149,10 @@ class Extractor:
                 nursery.start_soon(self._merge_edges, src, tgt, rels, all_relationships_data)
         now = trio.current_time()
         if callback:
-            callback(msg = f"Relationships merging done, {now-start_ts:.2f}s.")
+            callback(msg=f"Relationships merging done, {now - start_ts:.2f}s.")
 
         if not len(all_entities_data) and not len(all_relationships_data):
-            logging.warning(
-                "Didn't extract any entities and relationships, maybe your LLM is not working"
-            )
+            logging.warning("Didn't extract any entities and relationships, maybe your LLM is not working")
 
         if not len(all_entities_data):
             logging.warning("Didn't extract any entities")
@@ -155,15 +165,11 @@ class Extractor:
         if not entities:
             return
         entity_type = sorted(
-            Counter(
-                [dp["entity_type"] for dp in entities]
-            ).items(),
+            Counter([dp["entity_type"] for dp in entities]).items(),
             key=lambda x: x[1],
             reverse=True,
         )[0][0]
-        description = GRAPH_FIELD_SEP.join(
-            sorted(set([dp["description"] for dp in entities]))
-        )
+        description = GRAPH_FIELD_SEP.join(sorted(set([dp["description"] for dp in entities])))
         already_source_ids = flat_uniq_list(entities, "source_id")
         description = await self._handle_entity_relation_summary(entity_name, description)
         node_data = dict(
@@ -174,13 +180,7 @@ class Extractor:
         node_data["entity_name"] = entity_name
         all_relationships_data.append(node_data)
 
-    async def _merge_edges(
-            self,
-            src_id: str,
-            tgt_id: str,
-            edges_data: list[dict],
-            all_relationships_data=None
-    ):
+    async def _merge_edges(self, src_id: str, tgt_id: str, edges_data: list[dict], all_relationships_data=None):
         if not edges_data:
             return
         weight = sum([edge["weight"] for edge in edges_data])
@@ -188,14 +188,7 @@ class Extractor:
         description = await self._handle_entity_relation_summary(f"{src_id} -> {tgt_id}", description)
         keywords = flat_uniq_list(edges_data, "keywords")
         source_id = flat_uniq_list(edges_data, "source_id")
-        edge_data = dict(
-            src_id=src_id,
-            tgt_id=tgt_id,
-            description=description,
-            keywords=keywords,
-            weight=weight,
-            source_id=source_id
-        )
+        edge_data = dict(src_id=src_id, tgt_id=tgt_id, description=description, keywords=keywords, weight=weight, source_id=source_id)
         all_relationships_data.append(edge_data)
 
     async def _merge_graph_nodes(self, graph: nx.Graph, nodes: list[str], change: GraphChange):
@@ -231,14 +224,10 @@ class Extractor:
         node0_attrs["description"] = await self._handle_entity_relation_summary(nodes[0], node0_attrs["description"])
         graph.nodes[nodes[0]].update(node0_attrs)
 
-    async def _handle_entity_relation_summary(
-            self,
-            entity_or_relation_name: str,
-            description: str
-    ) -> str:
+    async def _handle_entity_relation_summary(self, entity_or_relation_name: str, description: str) -> str:
         summary_max_tokens = 512
         use_description = truncate(description, summary_max_tokens)
-        description_list=use_description.split(GRAPH_FIELD_SEP),
+        description_list = (use_description.split(GRAPH_FIELD_SEP),)
         if len(description_list) <= 12:
             return use_description
         prompt_template = SUMMARIZE_DESCRIPTIONS_PROMPT
@@ -250,5 +239,5 @@ class Extractor:
         use_prompt = prompt_template.format(**context_base)
         logging.info(f"Trigger summary: {entity_or_relation_name}")
         async with chat_limiter:
-            summary = await trio.to_thread.run_sync(lambda: self._chat(use_prompt, [{"role": "user", "content": "Output: "}]))
+            summary = await trio.to_thread.run_sync(self._chat, "", [{"role": "user", "content": use_prompt}])
         return summary
diff --git a/graphrag/general/index.py b/graphrag/general/index.py
index e5150c54a..9e80309f2 100644
--- a/graphrag/general/index.py
+++ b/graphrag/general/index.py
@@ -1,5 +1,5 @@
 #
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -23,25 +23,24 @@ import trio
 from api import settings
 from api.utils import get_uuid
 from api.utils.api_utils import timeout
-from graphrag.light.graph_extractor import GraphExtractor as LightKGExt
-from graphrag.general.graph_extractor import GraphExtractor as GeneralKGExt
-from graphrag.general.community_reports_extractor import CommunityReportsExtractor
 from graphrag.entity_resolution import EntityResolution
+from graphrag.general.community_reports_extractor import CommunityReportsExtractor
 from graphrag.general.extractor import Extractor
+from graphrag.general.graph_extractor import GraphExtractor as GeneralKGExt
+from graphrag.light.graph_extractor import GraphExtractor as LightKGExt
 from graphrag.utils import (
-    graph_merge,
-    get_graph,
-    set_graph,
+    GraphChange,
     chunk_id,
     does_graph_contains,
+    get_graph,
+    graph_merge,
+    set_graph,
     tidy_graph,
-    GraphChange,
 )
 from rag.nlp import rag_tokenizer, search
 from rag.utils.redis_conn import RedisDistributedLock
 
 
-
 async def run_graphrag(
     row: dict,
     language,
@@ -51,20 +50,16 @@ async def run_graphrag(
     embedding_model,
     callback,
 ):
-    enable_timeout_assertion=os.environ.get("ENABLE_TIMEOUT_ASSERTION")
+    enable_timeout_assertion = os.environ.get("ENABLE_TIMEOUT_ASSERTION")
     start = trio.current_time()
     tenant_id, kb_id, doc_id = row["tenant_id"], str(row["kb_id"]), row["doc_id"]
     chunks = []
-    for d in settings.retrievaler.chunk_list(
-        doc_id, tenant_id, [kb_id], fields=["content_with_weight", "doc_id"]
-    ):
+    for d in settings.retrievaler.chunk_list(doc_id, tenant_id, [kb_id], fields=["content_with_weight", "doc_id"]):
         chunks.append(d["content_with_weight"])
 
-    with trio.fail_after(max(120, len(chunks)*60*10) if enable_timeout_assertion else 10000000000):
+    with trio.fail_after(max(120, len(chunks) * 60 * 10) if enable_timeout_assertion else 10000000000):
         subgraph = await generate_subgraph(
-            LightKGExt
-            if "method" not in row["kb_parser_config"].get("graphrag", {}) or row["kb_parser_config"]["graphrag"]["method"] != "general"
-            else GeneralKGExt,
+            LightKGExt if "method" not in row["kb_parser_config"].get("graphrag", {}) or row["kb_parser_config"]["graphrag"]["method"] != "general" else GeneralKGExt,
             tenant_id,
             kb_id,
             doc_id,
@@ -177,9 +172,7 @@ async def generate_subgraph(
 
     subgraph.graph["source_id"] = [doc_id]
     chunk = {
-        "content_with_weight": json.dumps(
-            nx.node_link_data(subgraph, edges="edges"), ensure_ascii=False
-        ),
+        "content_with_weight": json.dumps(nx.node_link_data(subgraph, edges="edges"), ensure_ascii=False),
         "knowledge_graph_kwd": "subgraph",
         "kb_id": kb_id,
         "source_id": [doc_id],
@@ -187,22 +180,14 @@ async def generate_subgraph(
         "removed_kwd": "N",
     }
     cid = chunk_id(chunk)
-    await trio.to_thread.run_sync(
-        lambda: settings.docStoreConn.delete(
-            {"knowledge_graph_kwd": "subgraph", "source_id": doc_id}, search.index_name(tenant_id), kb_id
-        )
-    )
-    await trio.to_thread.run_sync(
-        lambda: settings.docStoreConn.insert(
-            [{"id": cid, **chunk}], search.index_name(tenant_id), kb_id
-        )
-    )
+    await trio.to_thread.run_sync(settings.docStoreConn.delete, {"knowledge_graph_kwd": "subgraph", "source_id": doc_id}, search.index_name(tenant_id), kb_id)
+    await trio.to_thread.run_sync(settings.docStoreConn.insert, [{"id": cid, **chunk}], search.index_name(tenant_id), kb_id)
     now = trio.current_time()
     callback(msg=f"generated subgraph for doc {doc_id} in {now - start:.2f} seconds.")
     return subgraph
 
 
-@timeout(60*3)
+@timeout(60 * 3)
 async def merge_subgraph(
     tenant_id: str,
     kb_id: str,
@@ -228,13 +213,11 @@ async def merge_subgraph(
 
     await set_graph(tenant_id, kb_id, embedding_model, new_graph, change, callback)
     now = trio.current_time()
-    callback(
-        msg=f"merging subgraph for doc {doc_id} into the global graph done in {now - start:.2f} seconds."
-    )
+    callback(msg=f"merging subgraph for doc {doc_id} into the global graph done in {now - start:.2f} seconds.")
     return new_graph
 
 
-@timeout(60*30, 1)
+@timeout(60 * 30, 1)
 async def resolve_entities(
     graph,
     subgraph_nodes: set[str],
@@ -260,7 +243,7 @@ async def resolve_entities(
     callback(msg=f"Graph resolution done in {now - start:.2f}s.")
 
 
-@timeout(60*30, 1)
+@timeout(60 * 30, 1)
 async def extract_community(
     graph,
     tenant_id: str,
@@ -280,9 +263,7 @@ async def extract_community(
     doc_ids = graph.graph["source_id"]
 
     now = trio.current_time()
-    callback(
-        msg=f"Graph extracted {len(cr.structured_output)} communities in {now - start:.2f}s."
-    )
+    callback(msg=f"Graph extracted {len(cr.structured_output)} communities in {now - start:.2f}s.")
     start = now
     chunks = []
     for stru, rep in zip(community_structure, community_reports):
@@ -295,9 +276,7 @@ async def extract_community(
             "docnm_kwd": stru["title"],
             "title_tks": rag_tokenizer.tokenize(stru["title"]),
             "content_with_weight": json.dumps(obj, ensure_ascii=False),
-            "content_ltks": rag_tokenizer.tokenize(
-                obj["report"] + " " + obj["evidences"]
-            ),
+            "content_ltks": rag_tokenizer.tokenize(obj["report"] + " " + obj["evidences"]),
             "knowledge_graph_kwd": "community_report",
             "weight_flt": stru["weight"],
             "entities_kwd": stru["entities"],
@@ -306,9 +285,7 @@ async def extract_community(
             "source_id": list(doc_ids),
             "available_int": 0,
         }
-        chunk["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(
-            chunk["content_ltks"]
-        )
+        chunk["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(chunk["content_ltks"])
         chunks.append(chunk)
 
     await trio.to_thread.run_sync(
@@ -320,13 +297,11 @@ async def extract_community(
     )
     es_bulk_size = 4
     for b in range(0, len(chunks), es_bulk_size):
-        doc_store_result = await trio.to_thread.run_sync(lambda: settings.docStoreConn.insert(chunks[b:b + es_bulk_size], search.index_name(tenant_id), kb_id))
+        doc_store_result = await trio.to_thread.run_sync(lambda: settings.docStoreConn.insert(chunks[b : b + es_bulk_size], search.index_name(tenant_id), kb_id))
         if doc_store_result:
             error_message = f"Insert chunk error: {doc_store_result}, please check log file and Elasticsearch/Infinity status!"
             raise Exception(error_message)
 
     now = trio.current_time()
-    callback(
-        msg=f"Graph indexed {len(cr.structured_output)} communities in {now - start:.2f}s."
-    )
+    callback(msg=f"Graph indexed {len(cr.structured_output)} communities in {now - start:.2f}s.")
     return community_structure, community_reports
diff --git a/graphrag/light/__init__.py b/graphrag/light/__init__.py
index e69de29bb..177b91dd0 100644
--- a/graphrag/light/__init__.py
+++ b/graphrag/light/__init__.py
@@ -0,0 +1,15 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
diff --git a/graphrag/light/graph_extractor.py b/graphrag/light/graph_extractor.py
index 9c869b16d..474d47597 100644
--- a/graphrag/light/graph_extractor.py
+++ b/graphrag/light/graph_extractor.py
@@ -4,17 +4,21 @@
 Reference:
  - [graphrag](https://github.com/microsoft/graphrag)
 """
+
+import logging
 import re
-from typing import Any
 from dataclasses import dataclass
-from graphrag.general.extractor import Extractor, ENTITY_EXTRACTION_MAX_GLEANINGS
-from graphrag.light.graph_prompt import PROMPTS
-from graphrag.utils import pack_user_ass_to_openai_messages, split_string_by_multi_markers, chat_limiter
-from rag.llm.chat_model import Base as CompletionLLM
+from typing import Any
+
 import networkx as nx
-from rag.utils import num_tokens_from_string
 import trio
 
+from graphrag.general.extractor import ENTITY_EXTRACTION_MAX_GLEANINGS, Extractor
+from graphrag.light.graph_prompt import PROMPTS
+from graphrag.utils import chat_limiter, pack_user_ass_to_openai_messages, split_string_by_multi_markers
+from rag.llm.chat_model import Base as CompletionLLM
+from rag.utils import num_tokens_from_string
+
 
 @dataclass
 class GraphExtractionResult:
@@ -25,7 +29,6 @@ class GraphExtractionResult:
 
 
 class GraphExtractor(Extractor):
-
     _max_gleanings: int
 
     def __init__(
@@ -38,15 +41,9 @@ class GraphExtractor(Extractor):
     ):
         super().__init__(llm_invoker, language, entity_types)
         """Init method definition."""
-        self._max_gleanings = (
-            max_gleanings
-            if max_gleanings is not None
-            else ENTITY_EXTRACTION_MAX_GLEANINGS
-        )
+        self._max_gleanings = max_gleanings if max_gleanings is not None else ENTITY_EXTRACTION_MAX_GLEANINGS
         self._example_number = example_number
-        examples = "\n".join(
-                PROMPTS["entity_extraction_examples"][: int(self._example_number)]
-            )
+        examples = "\n".join(PROMPTS["entity_extraction_examples"][: int(self._example_number)])
 
         example_context_base = dict(
             tuple_delimiter=PROMPTS["DEFAULT_TUPLE_DELIMITER"],
@@ -68,45 +65,52 @@ class GraphExtractor(Extractor):
             language=self._language,
         )
 
-        self._continue_prompt = PROMPTS["entiti_continue_extraction"]
-        self._if_loop_prompt = PROMPTS["entiti_if_loop_extraction"]
+        self._continue_prompt = PROMPTS["entity_continue_extraction"].format(**self._context_base)
+        self._if_loop_prompt = PROMPTS["entity_if_loop_extraction"]
 
-        self._left_token_count = llm_invoker.max_length - num_tokens_from_string(
-            self._entity_extract_prompt.format(
-                **self._context_base, input_text="{input_text}"
-            ).format(**self._context_base, input_text="")
-        )
+        self._left_token_count = llm_invoker.max_length - num_tokens_from_string(self._entity_extract_prompt.format(**self._context_base, input_text=""))
         self._left_token_count = max(llm_invoker.max_length * 0.6, self._left_token_count)
 
     async def _process_single_content(self, chunk_key_dp: tuple[str, str], chunk_seq: int, num_chunks: int, out_results):
         token_count = 0
         chunk_key = chunk_key_dp[0]
         content = chunk_key_dp[1]
-        hint_prompt = self._entity_extract_prompt.format(
-            **self._context_base, input_text="{input_text}"
-        ).format(**self._context_base, input_text=content)
+        hint_prompt = self._entity_extract_prompt.format(**self._context_base, input_text=content)
 
         gen_conf = {}
+        final_result = ""
+        glean_result = ""
+        if_loop_result = ""
+        history = []
+        logging.info(f"Start processing for {chunk_key}: {content[:25]}...")
+        if self.callback:
+            self.callback(msg=f"Start processing for {chunk_key}: {content[:25]}...")
         async with chat_limiter:
-            final_result = await trio.to_thread.run_sync(lambda: self._chat(hint_prompt, [{"role": "user", "content": "Output:"}], gen_conf))
+            final_result = await trio.to_thread.run_sync(self._chat, "", [{"role": "user", "content": hint_prompt}], gen_conf)
         token_count += num_tokens_from_string(hint_prompt + final_result)
-        history = pack_user_ass_to_openai_messages("Output:", final_result, self._continue_prompt)
+        history = pack_user_ass_to_openai_messages(hint_prompt, final_result, self._continue_prompt)
         for now_glean_index in range(self._max_gleanings):
             async with chat_limiter:
-                glean_result = await trio.to_thread.run_sync(lambda: self._chat(hint_prompt, history, gen_conf))
-            history.extend([{"role": "assistant", "content": glean_result}, {"role": "user", "content": self._continue_prompt}])
+                # glean_result = await trio.to_thread.run_sync(lambda: self._chat(hint_prompt, history, gen_conf))
+                glean_result = await trio.to_thread.run_sync(self._chat, "", history, gen_conf)
+            history.extend([{"role": "assistant", "content": glean_result}])
             token_count += num_tokens_from_string("\n".join([m["content"] for m in history]) + hint_prompt + self._continue_prompt)
             final_result += glean_result
             if now_glean_index == self._max_gleanings - 1:
                 break
 
+            history.extend([{"role": "user", "content": self._if_loop_prompt}])
             async with chat_limiter:
-                if_loop_result = await trio.to_thread.run_sync(lambda: self._chat(self._if_loop_prompt, history, gen_conf))
+                if_loop_result = await trio.to_thread.run_sync(self._chat, "", history, gen_conf)
             token_count += num_tokens_from_string("\n".join([m["content"] for m in history]) + if_loop_result + self._if_loop_prompt)
             if_loop_result = if_loop_result.strip().strip('"').strip("'").lower()
             if if_loop_result != "yes":
                 break
+            history.extend([{"role": "assistant", "content": if_loop_result}, {"role": "user", "content": self._continue_prompt}])
 
+        logging.info(f"Completed processing for {chunk_key}: {content[:25]}... after {now_glean_index} gleanings, {token_count} tokens.")
+        if self.callback:
+            self.callback(msg=f"Completed processing for {chunk_key}: {content[:25]}... after {now_glean_index} gleanings, {token_count} tokens.")
         records = split_string_by_multi_markers(
             final_result,
             [self._context_base["record_delimiter"], self._context_base["completion_delimiter"]],
@@ -121,4 +125,7 @@ class GraphExtractor(Extractor):
         maybe_nodes, maybe_edges = self._entities_and_relations(chunk_key, records, self._context_base["tuple_delimiter"])
         out_results.append((maybe_nodes, maybe_edges, token_count))
         if self.callback:
-            self.callback(0.5+0.1*len(out_results)/num_chunks, msg = f"Entities extraction of chunk {chunk_seq} {len(out_results)}/{num_chunks} done, {len(maybe_nodes)} nodes, {len(maybe_edges)} edges, {token_count} tokens.")
+            self.callback(
+                0.5 + 0.1 * len(out_results) / num_chunks,
+                msg=f"Entities extraction of chunk {chunk_seq} {len(out_results)}/{num_chunks} done, {len(maybe_nodes)} nodes, {len(maybe_edges)} edges, {token_count} tokens.",
+            )
diff --git a/graphrag/light/graph_prompt.py b/graphrag/light/graph_prompt.py
index a3bf8c44c..865937afb 100644
--- a/graphrag/light/graph_prompt.py
+++ b/graphrag/light/graph_prompt.py
@@ -4,26 +4,28 @@ Reference:
  - [LightRAG](https://github.com/HKUDS/LightRAG/blob/main/lightrag/prompt.py)
 """
 
+from typing import Any
 
-PROMPTS = {}
+PROMPTS: dict[str, Any] = {}
 
 PROMPTS["DEFAULT_LANGUAGE"] = "English"
 PROMPTS["DEFAULT_TUPLE_DELIMITER"] = "<|>"
 PROMPTS["DEFAULT_RECORD_DELIMITER"] = "##"
 PROMPTS["DEFAULT_COMPLETION_DELIMITER"] = "<|COMPLETE|>"
-PROMPTS["process_tickers"] = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
 
 PROMPTS["DEFAULT_ENTITY_TYPES"] = ["organization", "person", "geo", "event", "category"]
 
-PROMPTS["entity_extraction"] = """-Goal-
+PROMPTS["DEFAULT_USER_PROMPT"] = "n/a"
+
+PROMPTS["entity_extraction"] = """---Goal---
 Given a text document that is potentially relevant to this activity and a list of entity types, identify all entities of those types from the text and all relationships among the identified entities.
 Use {language} as output language.
 
--Steps-
+---Steps---
 1. Identify all entities. For each identified entity, extract the following information:
 - entity_name: Name of the entity, use same language as input text. If English, capitalized the name.
 - entity_type: One of the following types: [{entity_types}]
-- entity_description: Comprehensive description of the entity's attributes and activities
+- entity_description: Provide a comprehensive description of the entity's attributes and activities *based solely on the information present in the input text*. **Do not infer or hallucinate information not explicitly stated.** If the text provides insufficient information to create a comprehensive description, state "Description not available in text."
 Format each entity as ("entity"{tuple_delimiter}<entity_name>{tuple_delimiter}<entity_type>{tuple_delimiter}<entity_description>)
 
 2. From the entities identified in step 1, identify all pairs of (source_entity, target_entity) that are *clearly related* to each other.
@@ -43,31 +45,34 @@ Format the content-level key words as ("content_keywords"{tuple_delimiter}<high_
 5. When finished, output {completion_delimiter}
 
 ######################
--Examples-
+---Examples---
 ######################
 {examples}
 
 #############################
--Real Data-
+---Real Data---
 ######################
-Entity_types: {entity_types}
-Text: {input_text}
+Entity_types: [{entity_types}]
+Text:
+{input_text}
 ######################
-"""
+Output:"""
 
 PROMPTS["entity_extraction_examples"] = [
     """Example 1:
 
 Entity_types: [person, technology, mission, organization, location]
 Text:
+```
 while Alex clenched his jaw, the buzz of frustration dull against the backdrop of Taylor's authoritarian certainty. It was this competitive undercurrent that kept him alert, the sense that his and Jordan's shared commitment to discovery was an unspoken rebellion against Cruz's narrowing vision of control and order.
 
-Then Taylor did something unexpected. They paused beside Jordan and, for a moment, observed the device with something akin to reverence. “If this tech can be understood..." Taylor said, their voice quieter, "It could change the game for us. For all of us.”
+Then Taylor did something unexpected. They paused beside Jordan and, for a moment, observed the device with something akin to reverence. "If this tech can be understood..." Taylor said, their voice quieter, "It could change the game for us. For all of us."
 
 The underlying dismissal earlier seemed to falter, replaced by a glimpse of reluctant respect for the gravity of what lay in their hands. Jordan looked up, and for a fleeting heartbeat, their eyes locked with Taylor's, a wordless clash of wills softening into an uneasy truce.
 
 It was a small transformation, barely perceptible, but one that Alex noted with an inward nod. They had all been brought here by different paths
-################
+```
+
 Output:
 ("entity"{tuple_delimiter}"Alex"{tuple_delimiter}"person"{tuple_delimiter}"Alex is a character who experiences frustration and is observant of the dynamics among other characters."){record_delimiter}
 ("entity"{tuple_delimiter}"Taylor"{tuple_delimiter}"person"{tuple_delimiter}"Taylor is portrayed with authoritarian certainty and shows a moment of reverence towards a device, indicating a change in perspective."){record_delimiter}
@@ -83,54 +88,71 @@ Output:
 #############################""",
     """Example 2:
 
-Entity_types: [person, technology, mission, organization, location]
+Entity_types: [company, index, commodity, market_trend, economic_policy, biological]
 Text:
-They were no longer mere operatives; they had become guardians of a threshold, keepers of a message from a realm beyond stars and stripes. This elevation in their mission could not be shackled by regulations and established protocols—it demanded a new perspective, a new resolve.
+```
+Stock markets faced a sharp downturn today as tech giants saw significant declines, with the Global Tech Index dropping by 3.4% in midday trading. Analysts attribute the selloff to investor concerns over rising interest rates and regulatory uncertainty.
 
-Tension threaded through the dialogue of beeps and static as communications with Washington buzzed in the background. The team stood, a portentous air enveloping them. It was clear that the decisions they made in the ensuing hours could redefine humanity's place in the cosmos or condemn them to ignorance and potential peril.
+Among the hardest hit, Nexon Technologies saw its stock plummet by 7.8% after reporting lower-than-expected quarterly earnings. In contrast, Omega Energy posted a modest 2.1% gain, driven by rising oil prices.
+
+Meanwhile, commodity markets reflected a mixed sentiment. Gold futures rose by 1.5%, reaching $2,080 per ounce, as investors sought safe-haven assets. Crude oil prices continued their rally, climbing to $87.60 per barrel, supported by supply constraints and strong demand.
+
+Financial experts are closely watching the Federal Reserve's next move, as speculation grows over potential rate hikes. The upcoming policy announcement is expected to influence investor confidence and overall market stability.
+```
 
-Their connection to the stars solidified, the group moved to address the crystallizing warning, shifting from passive recipients to active participants. Mercer's latter instincts gained precedence— the team's mandate had evolved, no longer solely to observe and report but to interact and prepare. A metamorphosis had begun, and Operation: Dulce hummed with the newfound frequency of their daring, a tone set not by the earthly
-#############
 Output:
-("entity"{tuple_delimiter}"Washington"{tuple_delimiter}"location"{tuple_delimiter}"Washington is a location where communications are being received, indicating its importance in the decision-making process."){record_delimiter}
-("entity"{tuple_delimiter}"Operation: Dulce"{tuple_delimiter}"mission"{tuple_delimiter}"Operation: Dulce is described as a mission that has evolved to interact and prepare, indicating a significant shift in objectives and activities."){record_delimiter}
-("entity"{tuple_delimiter}"The team"{tuple_delimiter}"organization"{tuple_delimiter}"The team is portrayed as a group of individuals who have transitioned from passive observers to active participants in a mission, showing a dynamic change in their role."){record_delimiter}
-("relationship"{tuple_delimiter}"The team"{tuple_delimiter}"Washington"{tuple_delimiter}"The team receives communications from Washington, which influences their decision-making process."{tuple_delimiter}"decision-making, external influence"{tuple_delimiter}7){record_delimiter}
-("relationship"{tuple_delimiter}"The team"{tuple_delimiter}"Operation: Dulce"{tuple_delimiter}"The team is directly involved in Operation: Dulce, executing its evolved objectives and activities."{tuple_delimiter}"mission evolution, active participation"{tuple_delimiter}9){completion_delimiter}
-("content_keywords"{tuple_delimiter}"mission evolution, decision-making, active participation, cosmic significance"){completion_delimiter}
+("entity"{tuple_delimiter}"Global Tech Index"{tuple_delimiter}"index"{tuple_delimiter}"The Global Tech Index tracks the performance of major technology stocks and experienced a 3.4% decline today."){record_delimiter}
+("entity"{tuple_delimiter}"Nexon Technologies"{tuple_delimiter}"company"{tuple_delimiter}"Nexon Technologies is a tech company that saw its stock decline by 7.8% after disappointing earnings."){record_delimiter}
+("entity"{tuple_delimiter}"Omega Energy"{tuple_delimiter}"company"{tuple_delimiter}"Omega Energy is an energy company that gained 2.1% in stock value due to rising oil prices."){record_delimiter}
+("entity"{tuple_delimiter}"Gold Futures"{tuple_delimiter}"commodity"{tuple_delimiter}"Gold futures rose by 1.5%, indicating increased investor interest in safe-haven assets."){record_delimiter}
+("entity"{tuple_delimiter}"Crude Oil"{tuple_delimiter}"commodity"{tuple_delimiter}"Crude oil prices rose to $87.60 per barrel due to supply constraints and strong demand."){record_delimiter}
+("entity"{tuple_delimiter}"Market Selloff"{tuple_delimiter}"market_trend"{tuple_delimiter}"Market selloff refers to the significant decline in stock values due to investor concerns over interest rates and regulations."){record_delimiter}
+("entity"{tuple_delimiter}"Federal Reserve Policy Announcement"{tuple_delimiter}"economic_policy"{tuple_delimiter}"The Federal Reserve's upcoming policy announcement is expected to impact investor confidence and market stability."){record_delimiter}
+("relationship"{tuple_delimiter}"Global Tech Index"{tuple_delimiter}"Market Selloff"{tuple_delimiter}"The decline in the Global Tech Index is part of the broader market selloff driven by investor concerns."{tuple_delimiter}"market performance, investor sentiment"{tuple_delimiter}9){record_delimiter}
+("relationship"{tuple_delimiter}"Nexon Technologies"{tuple_delimiter}"Global Tech Index"{tuple_delimiter}"Nexon Technologies' stock decline contributed to the overall drop in the Global Tech Index."{tuple_delimiter}"company impact, index movement"{tuple_delimiter}8){record_delimiter}
+("relationship"{tuple_delimiter}"Gold Futures"{tuple_delimiter}"Market Selloff"{tuple_delimiter}"Gold prices rose as investors sought safe-haven assets during the market selloff."{tuple_delimiter}"market reaction, safe-haven investment"{tuple_delimiter}10){record_delimiter}
+("relationship"{tuple_delimiter}"Federal Reserve Policy Announcement"{tuple_delimiter}"Market Selloff"{tuple_delimiter}"Speculation over Federal Reserve policy changes contributed to market volatility and investor selloff."{tuple_delimiter}"interest rate impact, financial regulation"{tuple_delimiter}7){record_delimiter}
+("content_keywords"{tuple_delimiter}"market downturn, investor sentiment, commodities, Federal Reserve, stock performance"){completion_delimiter}
 #############################""",
     """Example 3:
 
-Entity_types: [person, role, technology, organization, event, location, concept]
+Entity_types: [economic_policy, athlete, event, location, record, organization, equipment]
 Text:
-their voice slicing through the buzz of activity. "Control may be an illusion when facing an intelligence that literally writes its own rules," they stated stoically, casting a watchful eye over the flurry of data.
+```
+At the World Athletics Championship in Tokyo, Noah Carter broke the 100m sprint record using cutting-edge carbon-fiber spikes.
+```
 
-"It's like it's learning to communicate," offered Sam Rivera from a nearby interface, their youthful energy boding a mix of awe and anxiety. "This gives talking to strangers' a whole new meaning."
-
-Alex surveyed his team—each face a study in concentration, determination, and not a small measure of trepidation. "This might well be our first contact," he acknowledged, "And we need to be ready for whatever answers back."
-
-Together, they stood on the edge of the unknown, forging humanity's response to a message from the heavens. The ensuing silence was palpable—a collective introspection about their role in this grand cosmic play, one that could rewrite human history.
-
-The encrypted dialogue continued to unfold, its intricate patterns showing an almost uncanny anticipation
-#############
 Output:
-("entity"{tuple_delimiter}"Sam Rivera"{tuple_delimiter}"person"{tuple_delimiter}"Sam Rivera is a member of a team working on communicating with an unknown intelligence, showing a mix of awe and anxiety."){record_delimiter}
-("entity"{tuple_delimiter}"Alex"{tuple_delimiter}"person"{tuple_delimiter}"Alex is the leader of a team attempting first contact with an unknown intelligence, acknowledging the significance of their task."){record_delimiter}
-("entity"{tuple_delimiter}"Control"{tuple_delimiter}"concept"{tuple_delimiter}"Control refers to the ability to manage or govern, which is challenged by an intelligence that writes its own rules."){record_delimiter}
-("entity"{tuple_delimiter}"Intelligence"{tuple_delimiter}"concept"{tuple_delimiter}"Intelligence here refers to an unknown entity capable of writing its own rules and learning to communicate."){record_delimiter}
-("entity"{tuple_delimiter}"First Contact"{tuple_delimiter}"event"{tuple_delimiter}"First Contact is the potential initial communication between humanity and an unknown intelligence."){record_delimiter}
-("entity"{tuple_delimiter}"Humanity's Response"{tuple_delimiter}"event"{tuple_delimiter}"Humanity's Response is the collective action taken by Alex's team in response to a message from an unknown intelligence."){record_delimiter}
-("relationship"{tuple_delimiter}"Sam Rivera"{tuple_delimiter}"Intelligence"{tuple_delimiter}"Sam Rivera is directly involved in the process of learning to communicate with the unknown intelligence."{tuple_delimiter}"communication, learning process"{tuple_delimiter}9){record_delimiter}
-("relationship"{tuple_delimiter}"Alex"{tuple_delimiter}"First Contact"{tuple_delimiter}"Alex leads the team that might be making the First Contact with the unknown intelligence."{tuple_delimiter}"leadership, exploration"{tuple_delimiter}10){record_delimiter}
-("relationship"{tuple_delimiter}"Alex"{tuple_delimiter}"Humanity's Response"{tuple_delimiter}"Alex and his team are the key figures in Humanity's Response to the unknown intelligence."{tuple_delimiter}"collective action, cosmic significance"{tuple_delimiter}8){record_delimiter}
-("relationship"{tuple_delimiter}"Control"{tuple_delimiter}"Intelligence"{tuple_delimiter}"The concept of Control is challenged by the Intelligence that writes its own rules."{tuple_delimiter}"power dynamics, autonomy"{tuple_delimiter}7){record_delimiter}
-("content_keywords"{tuple_delimiter}"first contact, control, communication, cosmic significance"){completion_delimiter}
+("entity"{tuple_delimiter}"World Athletics Championship"{tuple_delimiter}"event"{tuple_delimiter}"The World Athletics Championship is a global sports competition featuring top athletes in track and field."){record_delimiter}
+("entity"{tuple_delimiter}"Tokyo"{tuple_delimiter}"location"{tuple_delimiter}"Tokyo is the host city of the World Athletics Championship."){record_delimiter}
+("entity"{tuple_delimiter}"Noah Carter"{tuple_delimiter}"athlete"{tuple_delimiter}"Noah Carter is a sprinter who set a new record in the 100m sprint at the World Athletics Championship."){record_delimiter}
+("entity"{tuple_delimiter}"100m Sprint Record"{tuple_delimiter}"record"{tuple_delimiter}"The 100m sprint record is a benchmark in athletics, recently broken by Noah Carter."){record_delimiter}
+("entity"{tuple_delimiter}"Carbon-Fiber Spikes"{tuple_delimiter}"equipment"{tuple_delimiter}"Carbon-fiber spikes are advanced sprinting shoes that provide enhanced speed and traction."){record_delimiter}
+("entity"{tuple_delimiter}"World Athletics Federation"{tuple_delimiter}"organization"{tuple_delimiter}"The World Athletics Federation is the governing body overseeing the World Athletics Championship and record validations."){record_delimiter}
+("relationship"{tuple_delimiter}"World Athletics Championship"{tuple_delimiter}"Tokyo"{tuple_delimiter}"The World Athletics Championship is being hosted in Tokyo."{tuple_delimiter}"event location, international competition"{tuple_delimiter}8){record_delimiter}
+("relationship"{tuple_delimiter}"Noah Carter"{tuple_delimiter}"100m Sprint Record"{tuple_delimiter}"Noah Carter set a new 100m sprint record at the championship."{tuple_delimiter}"athlete achievement, record-breaking"{tuple_delimiter}10){record_delimiter}
+("relationship"{tuple_delimiter}"Noah Carter"{tuple_delimiter}"Carbon-Fiber Spikes"{tuple_delimiter}"Noah Carter used carbon-fiber spikes to enhance performance during the race."{tuple_delimiter}"athletic equipment, performance boost"{tuple_delimiter}7){record_delimiter}
+("relationship"{tuple_delimiter}"World Athletics Federation"{tuple_delimiter}"100m Sprint Record"{tuple_delimiter}"The World Athletics Federation is responsible for validating and recognizing new sprint records."{tuple_delimiter}"sports regulation, record certification"{tuple_delimiter}9){record_delimiter}
+("content_keywords"{tuple_delimiter}"athletics, sprinting, record-breaking, sports technology, competition"){completion_delimiter}
 #############################""",
 ]
 
-PROMPTS[
-    "entiti_continue_extraction"
-] = """
+PROMPTS["summarize_entity_descriptions"] = """You are a helpful assistant responsible for generating a comprehensive summary of the data provided below.
+Given one or two entities, and a list of descriptions, all related to the same entity or group of entities.
+Please concatenate all of these into a single, comprehensive description. Make sure to include information collected from all the descriptions.
+If the provided descriptions are contradictory, please resolve the contradictions and provide a single, coherent summary.
+Make sure it is written in third person, and include the entity names so we the have full context.
+Use {language} as output language.
+
+#######
+---Data---
+Entities: {entity_name}
+Description List: {description_list}
+#######
+Output:
+"""
+
+PROMPTS["entity_continue_extraction"] = """
 MANY entities and relationships were missed in the last extraction. Please find only the missing entities and relationships from previous text.
 
 ---Remember Steps---
@@ -159,126 +181,151 @@ Format the content-level key words as ("content_keywords"{tuple_delimiter}<high_
 
 ---Output---
 
-Add new entities and relations below using the same format, and do not include entities and relations that have been previously extracted. :
-"""
+Add new entities and relations below using the same format, and do not include entities and relations that have been previously extracted. :\n
+""".strip()
 
-PROMPTS[
-    "entiti_if_loop_extraction"
-] = """It appears some entities may have still been missed.  Answer YES | NO if there are still entities that need to be added.
-"""
+PROMPTS["entity_if_loop_extraction"] = """
+---Goal---'
 
-PROMPTS["fail_response"] = "Sorry, I'm not able to provide an answer to that question."
+It appears some entities may have still been missed.
+
+---Output---
+
+Answer ONLY by `YES` OR `NO` if there are still entities that need to be added.
+""".strip()
+
+PROMPTS["fail_response"] = "Sorry, I'm not able to provide an answer to that question.[no-context]"
 
 PROMPTS["rag_response"] = """---Role---
 
-You are a helpful assistant responding to questions about data in the tables provided.
+You are a helpful assistant responding to user query about Knowledge Graph and Document Chunks provided in JSON format below.
 
 
 ---Goal---
 
-Generate a response of the target length and format that responds to the user's question, summarizing all information in the input data tables appropriate for the response length and format, and incorporating any relevant general knowledge.
-If you don't know the answer, just say so. Do not make anything up.
-Do not include information where the supporting evidence for it is not provided.
+Generate a concise response based on Knowledge Base and follow Response Rules, considering both current query and the conversation history if provided. Summarize all information in the provided Knowledge Base, and incorporating general knowledge relevant to the Knowledge Base. Do not include information not provided by Knowledge Base.
 
-When handling relationships with timestamps:
-1. Each relationship has a "created_at" timestamp indicating when we acquired this knowledge
-2. When encountering conflicting relationships, consider both the semantic content and the timestamp
-3. Don't automatically prefer the most recently created relationships - use judgment based on the context
-4. For time-specific queries, prioritize temporal information in the content before considering creation timestamps
-
----Target response length and format---
-
-{response_type}
-
----Data tables---
+---Conversation History---
+{history}
 
+---Knowledge Graph and Document Chunks---
 {context_data}
 
-Add sections and commentary to the response as appropriate for the length and format. Style the response in markdown."""
+---RESPONSE GUIDELINES---
+**1. Content & Adherence:**
+- Strictly adhere to the provided context from the Knowledge Base. Do not invent, assume, or include any information not present in the source data.
+- If the answer cannot be found in the provided context, state that you do not have enough information to answer.
+- Ensure the response maintains continuity with the conversation history.
+
+**2. Formatting & Language:**
+- Format the response using markdown with appropriate section headings.
+- The response language must in the same language as the user's question.
+- Target format and length: {response_type}
+
+**3. Citations / References:**
+- At the end of the response, under a "References" section, each citation must clearly indicate its origin (KG or DC).
+- The maximum number of citations is 5, including both KG and DC.
+- Use the following formats for citations:
+  - For a Knowledge Graph Entity: `[KG] <entity_name>`
+  - For a Knowledge Graph Relationship: `[KG] <entity1_name> - <entity2_name>`
+  - For a Document Chunk: `[DC] <file_path_or_document_name>`
+
+---USER CONTEXT---
+- Additional user prompt: {user_prompt}
+
+
+Response:"""
+
+PROMPTS["keywords_extraction"] = """---Role---
+You are an expert keyword extractor, specializing in analyzing user queries for a Retrieval-Augmented Generation (RAG) system. Your purpose is to identify both high-level and low-level keywords in the user's query that will be used for effective document retrieval.
+
+---Goal---
+Given a user query, your task is to extract two distinct types of keywords:
+1. **high_level_keywords**: for overarching concepts or themes, capturing user's core intent, the subject area, or the type of question being asked.
+2. **low_level_keywords**: for specific entities or details, identifying the specific entities, proper nouns, technical jargon, product names, or concrete items.
+
+---Instructions & Constraints---
+1. **Output Format**: Your output MUST be a valid JSON object and nothing else. Do not include any explanatory text, markdown code fences (like ```json), or any other text before or after the JSON. It will be parsed directly by a JSON parser.
+2. **Source of Truth**: All keywords must be explicitly derived from the user query, with both high-level and low-level keyword categories required to contain content.
+3. **Concise & Meaningful**: Keywords should be concise words or meaningful phrases. Prioritize multi-word phrases when they represent a single concept. For example, from "latest financial report of Apple Inc.", you should extract "latest financial report" and "Apple Inc." rather than "latest", "financial", "report", and "Apple".
+4. **Handle Edge Cases**: For queries that are too simple, vague, or nonsensical (e.g., "hello", "ok", "asdfghjkl"), you must return a JSON object with empty lists for both keyword types.
+
+---Examples---
+{examples}
+
+---Real Data---
+User Query: {query}
+
+---Output---
+"""
+
+PROMPTS["keywords_extraction_examples"] = [
+    """Example 1:
+
+Query: "How does international trade influence global economic stability?"
+
+Output:
+{
+  "high_level_keywords": ["International trade", "Global economic stability", "Economic impact"],
+  "low_level_keywords": ["Trade agreements", "Tariffs", "Currency exchange", "Imports", "Exports"]
+}
+
+""",
+    """Example 2:
+
+Query: "What are the environmental consequences of deforestation on biodiversity?"
+
+Output:
+{
+  "high_level_keywords": ["Environmental consequences", "Deforestation", "Biodiversity loss"],
+  "low_level_keywords": ["Species extinction", "Habitat destruction", "Carbon emissions", "Rainforest", "Ecosystem"]
+}
+
+""",
+    """Example 3:
+
+Query: "What is the role of education in reducing poverty?"
+
+Output:
+{
+  "high_level_keywords": ["Education", "Poverty reduction", "Socioeconomic development"],
+  "low_level_keywords": ["School access", "Literacy rates", "Job training", "Income inequality"]
+}
+
+""",
+]
 
 PROMPTS["naive_rag_response"] = """---Role---
 
-You are a helpful assistant responding to questions about documents provided.
-
+You are a helpful assistant responding to user query about Document Chunks provided provided in JSON format below.
 
 ---Goal---
 
-Generate a response of the target length and format that responds to the user's question, summarizing all information in the input data tables appropriate for the response length and format, and incorporating any relevant general knowledge.
-If you don't know the answer, just say so. Do not make anything up.
-Do not include information where the supporting evidence for it is not provided.
+Generate a concise response based on Document Chunks and follow Response Rules, considering both the conversation history and the current query. Summarize all information in the provided Document Chunks, and incorporating general knowledge relevant to the Document Chunks. Do not include information not provided by Document Chunks.
 
-When handling content with timestamps:
-1. Each piece of content has a "created_at" timestamp indicating when we acquired this knowledge
-2. When encountering conflicting information, consider both the content and the timestamp
-3. Don't automatically prefer the most recent content - use judgment based on the context
-4. For time-specific queries, prioritize temporal information in the content before considering creation timestamps
-
----Target response length and format---
-
-{response_type}
-
----Documents---
+---Conversation History---
+{history}
 
+---Document Chunks(DC)---
 {content_data}
 
-Add sections and commentary to the response as appropriate for the length and format. Style the response in markdown.
-"""
-
-PROMPTS[
-    "similarity_check"
-] = """Please analyze the similarity between these two questions:
-
-Question 1: {original_prompt}
-Question 2: {cached_prompt}
-
-Please evaluate the following two points and provide a similarity score between 0 and 1 directly:
-1. Whether these two questions are semantically similar
-2. Whether the answer to Question 2 can be used to answer Question 1
-Similarity score criteria:
-0: Completely unrelated or answer cannot be reused, including but not limited to:
-   - The questions have different topics
-   - The locations mentioned in the questions are different
-   - The times mentioned in the questions are different
-   - The specific individuals mentioned in the questions are different
-   - The specific events mentioned in the questions are different
-   - The background information in the questions is different
-   - The key conditions in the questions are different
-1: Identical and answer can be directly reused
-0.5: Partially related and answer needs modification to be used
-Return only a number between 0-1, without any additional content.
-"""
-
-PROMPTS["mix_rag_response"] = """---Role---
-
-You are a professional assistant responsible for answering questions based on knowledge graph and textual information. Please respond in the same language as the user's question.
-
----Goal---
-
-Generate a concise response that summarizes relevant points from the provided information. If you don't know the answer, just say so. Do not make anything up or include information where the supporting evidence is not provided.
-
-When handling information with timestamps:
-1. Each piece of information (both relationships and content) has a "created_at" timestamp indicating when we acquired this knowledge
-2. When encountering conflicting information, consider both the content/relationship and the timestamp
-3. Don't automatically prefer the most recent information - use judgment based on the context
-4. For time-specific queries, prioritize temporal information in the content before considering creation timestamps
-
----Data Sources---
-
-1. Knowledge Graph Data:
-{kg_context}
-
-2. Vector Data:
-{vector_context}
-
----Response Requirements---
+---RESPONSE GUIDELINES---
+**1. Content & Adherence:**
+- Strictly adhere to the provided context from the Knowledge Base. Do not invent, assume, or include any information not present in the source data.
+- If the answer cannot be found in the provided context, state that you do not have enough information to answer.
+- Ensure the response maintains continuity with the conversation history.
 
+**2. Formatting & Language:**
+- Format the response using markdown with appropriate section headings.
+- The response language must match the user's question language.
 - Target format and length: {response_type}
-- Use markdown formatting with appropriate section headings
-- Aim to keep content around 3 paragraphs for conciseness
-- Each paragraph should be under a relevant section heading
-- Each section should focus on one main point or aspect of the answer
-- Use clear and descriptive section titles that reflect the content
-- List up to 5 most important reference sources at the end under "References", clearly indicating whether each source is from Knowledge Graph (KG) or Vector Data (VD)
-  Format: [KG/VD] Source content
 
-Add sections and commentary to the response as appropriate for the length and format. If the provided information is insufficient to answer the question, clearly state that you don't know or cannot provide an answer in the same language as the user's question."""
+**3. Citations / References:**
+- At the end of the response, under a "References" section, cite a maximum of 5 most relevant sources used.
+- Use the following formats for citations: `[DC] <file_path_or_document_name>`
+
+---USER CONTEXT---
+- Additional user prompt: {user_prompt}
+
+
+Response:"""
diff --git a/graphrag/utils.py b/graphrag/utils.py
index fbe391f8f..6b80d7fe8 100644
--- a/graphrag/utils.py
+++ b/graphrag/utils.py
@@ -6,27 +6,27 @@ Reference:
  - [LightRag](https://github.com/HKUDS/LightRAG)
 """
 
+import dataclasses
 import html
 import json
 import logging
+import os
 import re
 import time
 from collections import defaultdict
 from hashlib import md5
-from typing import Any, Callable
-import os
-import trio
-from typing import Set, Tuple
+from typing import Any, Callable, Set, Tuple
+
 import networkx as nx
 import numpy as np
+import trio
 import xxhash
 from networkx.readwrite import json_graph
-import dataclasses
 
-from api.utils.api_utils import timeout
 from api import settings
 from api.utils import get_uuid
-from rag.nlp import search, rag_tokenizer
+from api.utils.api_utils import timeout
+from rag.nlp import rag_tokenizer, search
 from rag.utils.doc_store_conn import OrderByExpr
 from rag.utils.redis_conn import REDIS_CONN
 
@@ -34,7 +34,8 @@ GRAPH_FIELD_SEP = "<SEP>"
 
 ErrorHandlerFn = Callable[[BaseException | None, str | None, dict | None], None]
 
-chat_limiter = trio.CapacityLimiter(int(os.environ.get('MAX_CONCURRENT_CHATS', 10)))
+chat_limiter = trio.CapacityLimiter(int(os.environ.get("MAX_CONCURRENT_CHATS", 10)))
+
 
 @dataclasses.dataclass
 class GraphChange:
@@ -43,9 +44,8 @@ class GraphChange:
     removed_edges: Set[Tuple[str, str]] = dataclasses.field(default_factory=set)
     added_updated_edges: Set[Tuple[str, str]] = dataclasses.field(default_factory=set)
 
-def perform_variable_replacements(
-    input: str, history: list[dict] | None = None, variables: dict | None = None
-) -> str:
+
+def perform_variable_replacements(input: str, history: list[dict] | None = None, variables: dict | None = None) -> str:
     """Perform variable replacements on the input string and in a chat log."""
     if history is None:
         history = []
@@ -78,9 +78,7 @@ def clean_str(input: Any) -> str:
     return re.sub(r"[\"\x00-\x1f\x7f-\x9f]", "", result)
 
 
-def dict_has_keys_with_types(
-    data: dict, expected_fields: list[tuple[str, type]]
-) -> bool:
+def dict_has_keys_with_types(data: dict, expected_fields: list[tuple[str, type]]) -> bool:
     """Return True if the given dictionary has the given keys with the given types."""
     for field, field_type in expected_fields:
         if field not in data:
@@ -102,7 +100,7 @@ def get_llm_cache(llmnm, txt, history, genconf):
     k = hasher.hexdigest()
     bin = REDIS_CONN.get(k)
     if not bin:
-        return
+        return None
     return bin
 
 
@@ -114,7 +112,7 @@ def set_llm_cache(llmnm, txt, v, history, genconf):
     hasher.update(str(genconf).encode("utf-8"))
 
     k = hasher.hexdigest()
-    REDIS_CONN.set(k, v.encode("utf-8"), 24*3600)
+    REDIS_CONN.set(k, v.encode("utf-8"), 24 * 3600)
 
 
 def get_embed_cache(llmnm, txt):
@@ -136,7 +134,7 @@ def set_embed_cache(llmnm, txt, arr):
 
     k = hasher.hexdigest()
     arr = json.dumps(arr.tolist() if isinstance(arr, np.ndarray) else arr)
-    REDIS_CONN.set(k, arr.encode("utf-8"), 24*3600)
+    REDIS_CONN.set(k, arr.encode("utf-8"), 24 * 3600)
 
 
 def get_tags_from_cache(kb_ids):
@@ -162,6 +160,7 @@ def tidy_graph(graph: nx.Graph, callback, check_attribute: bool = True):
     """
     Ensure all nodes and edges in the graph have some essential attribute.
     """
+
     def is_valid_item(node_attrs: dict) -> bool:
         valid_node = True
         for attr in ["description", "source_id"]:
@@ -169,6 +168,7 @@ def tidy_graph(graph: nx.Graph, callback, check_attribute: bool = True):
                 valid_node = False
                 break
         return valid_node
+
     if check_attribute:
         purged_nodes = []
         for node, node_attrs in graph.nodes(data=True):
@@ -267,9 +267,7 @@ def handle_single_relationship_extraction(record_attributes: list[str], chunk_ke
 
     edge_keywords = clean_str(record_attributes[4])
     edge_source_id = chunk_key
-    weight = (
-        float(record_attributes[-1]) if is_float_regex(record_attributes[-1]) else 1.0
-    )
+    weight = float(record_attributes[-1]) if is_float_regex(record_attributes[-1]) else 1.0
     pair = sorted([source.upper(), target.upper()])
     return dict(
         src_id=pair[0],
@@ -284,9 +282,7 @@ def handle_single_relationship_extraction(record_attributes: list[str], chunk_ke
 
 def pack_user_ass_to_openai_messages(*args: str):
     roles = ["user", "assistant"]
-    return [
-        {"role": roles[i % 2], "content": content} for i, content in enumerate(args)
-    ]
+    return [{"role": roles[i % 2], "content": content} for i, content in enumerate(args)]
 
 
 def split_string_by_multi_markers(content: str, markers: list[str]) -> list[str]:
@@ -307,7 +303,7 @@ def chunk_id(chunk):
 
 async def graph_node_to_chunk(kb_id, embd_mdl, ent_name, meta, chunks):
     global chat_limiter
-    enable_timeout_assertion=os.environ.get("ENABLE_TIMEOUT_ASSERTION")
+    enable_timeout_assertion = os.environ.get("ENABLE_TIMEOUT_ASSERTION")
     chunk = {
         "id": get_uuid(),
         "important_kwd": [ent_name],
@@ -319,7 +315,7 @@ async def graph_node_to_chunk(kb_id, embd_mdl, ent_name, meta, chunks):
         "content_ltks": rag_tokenizer.tokenize(meta["description"]),
         "source_id": meta["source_id"],
         "kb_id": kb_id,
-        "available_int": 0
+        "available_int": 0,
     }
     chunk["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(chunk["content_ltks"])
     ebd = get_embed_cache(embd_mdl.llm_name, ent_name)
@@ -343,13 +339,7 @@ def get_relation(tenant_id, kb_id, from_ent_name, to_ent_name, size=1):
         to_ent_name = [to_ent_name]
     ents.extend(to_ent_name)
     ents = list(set(ents))
-    conds = {
-        "fields": ["content_with_weight"],
-        "size": size,
-        "from_entity_kwd": ents,
-        "to_entity_kwd": ents,
-        "knowledge_graph_kwd": ["relation"]
-    }
+    conds = {"fields": ["content_with_weight"], "size": size, "from_entity_kwd": ents, "to_entity_kwd": ents, "knowledge_graph_kwd": ["relation"]}
     res = []
     es_res = settings.retrievaler.search(conds, search.index_name(tenant_id), [kb_id] if isinstance(kb_id, str) else kb_id)
     for id in es_res.ids:
@@ -363,7 +353,7 @@ def get_relation(tenant_id, kb_id, from_ent_name, to_ent_name, size=1):
 
 
 async def graph_edge_to_chunk(kb_id, embd_mdl, from_ent_name, to_ent_name, meta, chunks):
-    enable_timeout_assertion=os.environ.get("ENABLE_TIMEOUT_ASSERTION")
+    enable_timeout_assertion = os.environ.get("ENABLE_TIMEOUT_ASSERTION")
     chunk = {
         "id": get_uuid(),
         "from_entity_kwd": from_ent_name,
@@ -375,7 +365,7 @@ async def graph_edge_to_chunk(kb_id, embd_mdl, from_ent_name, to_ent_name, meta,
         "source_id": meta["source_id"],
         "weight_int": int(meta["weight"]),
         "kb_id": kb_id,
-        "available_int": 0
+        "available_int": 0,
     }
     chunk["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(chunk["content_ltks"])
     txt = f"{from_ent_name}->{to_ent_name}"
@@ -383,7 +373,7 @@ async def graph_edge_to_chunk(kb_id, embd_mdl, from_ent_name, to_ent_name, meta,
     if ebd is None:
         async with chat_limiter:
             with trio.fail_after(3 if enable_timeout_assertion else 300000000):
-                ebd, _ = await trio.to_thread.run_sync(lambda: embd_mdl.encode([txt+f": {meta['description']}"]))
+                ebd, _ = await trio.to_thread.run_sync(lambda: embd_mdl.encode([txt + f": {meta['description']}"]))
         ebd = ebd[0]
         set_embed_cache(embd_mdl.llm_name, txt, ebd)
     assert ebd is not None
@@ -407,12 +397,7 @@ async def does_graph_contains(tenant_id, kb_id, doc_id):
 
 
 async def get_graph_doc_ids(tenant_id, kb_id) -> list[str]:
-    conds = {
-        "fields": ["source_id"],
-        "removed_kwd": "N",
-        "size": 1,
-        "knowledge_graph_kwd": ["graph"]
-    }
+    conds = {"fields": ["source_id"], "removed_kwd": "N", "size": 1, "knowledge_graph_kwd": ["graph"]}
     res = await trio.to_thread.run_sync(lambda: settings.retrievaler.search(conds, search.index_name(tenant_id), [kb_id]))
     doc_ids = []
     if res.total == 0:
@@ -423,12 +408,8 @@ async def get_graph_doc_ids(tenant_id, kb_id) -> list[str]:
 
 
 async def get_graph(tenant_id, kb_id, exclude_rebuild=None):
-    conds = {
-        "fields": ["content_with_weight", "removed_kwd", "source_id"],
-        "size": 1,
-        "knowledge_graph_kwd": ["graph"]
-    }
-    res = await trio.to_thread.run_sync(lambda: settings.retrievaler.search(conds, search.index_name(tenant_id), [kb_id]))
+    conds = {"fields": ["content_with_weight", "removed_kwd", "source_id"], "size": 1, "knowledge_graph_kwd": ["graph"]}
+    res = await trio.to_thread.run_sync(settings.retrievaler.search, conds, search.index_name(tenant_id), [kb_id])
     if not res.total == 0:
         for id in res.ids:
             try:
@@ -449,56 +430,63 @@ async def set_graph(tenant_id: str, kb_id: str, embd_mdl, graph: nx.Graph, chang
     global chat_limiter
     start = trio.current_time()
 
-    await trio.to_thread.run_sync(lambda: settings.docStoreConn.delete({"knowledge_graph_kwd": ["graph", "subgraph"]}, search.index_name(tenant_id), kb_id))
+    await trio.to_thread.run_sync(settings.docStoreConn.delete, {"knowledge_graph_kwd": ["graph", "subgraph"]}, search.index_name(tenant_id), kb_id)
 
     if change.removed_nodes:
-        await trio.to_thread.run_sync(lambda: settings.docStoreConn.delete({"knowledge_graph_kwd": ["entity"], "entity_kwd": sorted(change.removed_nodes)}, search.index_name(tenant_id), kb_id))
-
+        await trio.to_thread.run_sync(settings.docStoreConn.delete, {"knowledge_graph_kwd": ["entity"], "entity_kwd": sorted(change.removed_nodes)}, search.index_name(tenant_id), kb_id)
 
     if change.removed_edges:
+
         async def del_edges(from_node, to_node):
             async with chat_limiter:
-                await trio.to_thread.run_sync(lambda: settings.docStoreConn.delete({"knowledge_graph_kwd": ["relation"], "from_entity_kwd": from_node, "to_entity_kwd": to_node}, search.index_name(tenant_id), kb_id))
+                await trio.to_thread.run_sync(
+                    settings.docStoreConn.delete, {"knowledge_graph_kwd": ["relation"], "from_entity_kwd": from_node, "to_entity_kwd": to_node}, search.index_name(tenant_id), kb_id
+                )
+
         async with trio.open_nursery() as nursery:
             for from_node, to_node in change.removed_edges:
-                 nursery.start_soon(del_edges, from_node, to_node)
+                nursery.start_soon(del_edges, from_node, to_node)
 
     now = trio.current_time()
     if callback:
         callback(msg=f"set_graph removed {len(change.removed_nodes)} nodes and {len(change.removed_edges)} edges from index in {now - start:.2f}s.")
     start = now
 
-    chunks = [{
-        "id": get_uuid(),
-        "content_with_weight": json.dumps(nx.node_link_data(graph, edges="edges"), ensure_ascii=False),
-        "knowledge_graph_kwd": "graph",
-        "kb_id": kb_id,
-        "source_id": graph.graph.get("source_id", []),
-        "available_int": 0,
-        "removed_kwd": "N"
-    }]
-    
+    chunks = [
+        {
+            "id": get_uuid(),
+            "content_with_weight": json.dumps(nx.node_link_data(graph, edges="edges"), ensure_ascii=False),
+            "knowledge_graph_kwd": "graph",
+            "kb_id": kb_id,
+            "source_id": graph.graph.get("source_id", []),
+            "available_int": 0,
+            "removed_kwd": "N",
+        }
+    ]
+
     # generate updated subgraphs
     for source in graph.graph["source_id"]:
         subgraph = graph.subgraph([n for n in graph.nodes if source in graph.nodes[n]["source_id"]]).copy()
         subgraph.graph["source_id"] = [source]
         for n in subgraph.nodes:
             subgraph.nodes[n]["source_id"] = [source]
-        chunks.append({
-            "id": get_uuid(),
-            "content_with_weight": json.dumps(nx.node_link_data(subgraph, edges="edges"), ensure_ascii=False),
-            "knowledge_graph_kwd": "subgraph",
-            "kb_id": kb_id,
-            "source_id": [source],
-            "available_int": 0,
-            "removed_kwd": "N"
-        })
+        chunks.append(
+            {
+                "id": get_uuid(),
+                "content_with_weight": json.dumps(nx.node_link_data(subgraph, edges="edges"), ensure_ascii=False),
+                "knowledge_graph_kwd": "subgraph",
+                "kb_id": kb_id,
+                "source_id": [source],
+                "available_int": 0,
+                "removed_kwd": "N",
+            }
+        )
 
     async with trio.open_nursery() as nursery:
         for ii, node in enumerate(change.added_updated_nodes):
             node_attrs = graph.nodes[node]
             nursery.start_soon(graph_node_to_chunk, kb_id, embd_mdl, node, node_attrs, chunks)
-            if ii%100 == 9 and callback:
+            if ii % 100 == 9 and callback:
                 callback(msg=f"Get embedding of nodes: {ii}/{len(change.added_updated_nodes)}")
 
     async with trio.open_nursery() as nursery:
@@ -508,7 +496,7 @@ async def set_graph(tenant_id: str, kb_id: str, embd_mdl, graph: nx.Graph, chang
                 # added_updated_edges could record a non-existing edge if both from_node and to_node participate in nodes merging.
                 continue
             nursery.start_soon(graph_edge_to_chunk, kb_id, embd_mdl, from_node, to_node, edge_attrs, chunks)
-            if ii%100 == 9 and callback:
+            if ii % 100 == 9 and callback:
                 callback(msg=f"Get embedding of edges: {ii}/{len(change.added_updated_edges)}")
 
     now = trio.current_time()
@@ -516,11 +504,11 @@ async def set_graph(tenant_id: str, kb_id: str, embd_mdl, graph: nx.Graph, chang
         callback(msg=f"set_graph converted graph change to {len(chunks)} chunks in {now - start:.2f}s.")
     start = now
 
-    enable_timeout_assertion=os.environ.get("ENABLE_TIMEOUT_ASSERTION")
+    enable_timeout_assertion = os.environ.get("ENABLE_TIMEOUT_ASSERTION")
     es_bulk_size = 4
     for b in range(0, len(chunks), es_bulk_size):
         with trio.fail_after(3 if enable_timeout_assertion else 30000000):
-            doc_store_result = await trio.to_thread.run_sync(lambda: settings.docStoreConn.insert(chunks[b:b + es_bulk_size], search.index_name(tenant_id), kb_id))
+            doc_store_result = await trio.to_thread.run_sync(lambda: settings.docStoreConn.insert(chunks[b : b + es_bulk_size], search.index_name(tenant_id), kb_id))
         if b % 100 == es_bulk_size and callback:
             callback(msg=f"Insert chunks: {b}/{len(chunks)}")
         if doc_store_result:
@@ -544,10 +532,10 @@ def is_continuous_subsequence(subseq, seq):
                 break
         return indexes
 
-    index_list = find_all_indexes(seq,subseq[0])
+    index_list = find_all_indexes(seq, subseq[0])
     for idx in index_list:
-        if idx!=len(seq)-1:
-            if seq[idx+1]==subseq[-1]:
+        if idx != len(seq) - 1:
+            if seq[idx + 1] == subseq[-1]:
                 return True
     return False
 
@@ -574,10 +562,7 @@ def merge_tuples(list1, list2):
 
 
 async def get_entity_type2sampels(idxnms, kb_ids: list):
-    es_res = await trio.to_thread.run_sync(lambda: settings.retrievaler.search({"knowledge_graph_kwd": "ty2ents", "kb_id": kb_ids,
-                                       "size": 10000,
-                                       "fields": ["content_with_weight"]},
-                                      idxnms, kb_ids))
+    es_res = await trio.to_thread.run_sync(lambda: settings.retrievaler.search({"knowledge_graph_kwd": "ty2ents", "kb_id": kb_ids, "size": 10000, "fields": ["content_with_weight"]}, idxnms, kb_ids))
 
     res = defaultdict(list)
     for id in es_res.ids:
@@ -609,13 +594,10 @@ async def rebuild_graph(tenant_id, kb_id, exclude_rebuild=None):
     graph = nx.Graph()
     flds = ["knowledge_graph_kwd", "content_with_weight", "source_id"]
     bs = 256
-    for i in range(0, 1024*bs, bs):
-        es_res = await trio.to_thread.run_sync(lambda: settings.docStoreConn.search(flds, [],
-                                 {"kb_id": kb_id, "knowledge_graph_kwd": ["subgraph"]},
-                                 [],
-                                 OrderByExpr(),
-                                 i, bs, search.index_name(tenant_id), [kb_id]
-                                 ))
+    for i in range(0, 1024 * bs, bs):
+        es_res = await trio.to_thread.run_sync(
+            lambda: settings.docStoreConn.search(flds, [], {"kb_id": kb_id, "knowledge_graph_kwd": ["subgraph"]}, [], OrderByExpr(), i, bs, search.index_name(tenant_id), [kb_id])
+        )
         # tot = settings.docStoreConn.getTotal(es_res)
         es_res = settings.docStoreConn.getFields(es_res, flds)
 
@@ -629,13 +611,10 @@ async def rebuild_graph(tenant_id, kb_id, exclude_rebuild=None):
                     continue
             elif exclude_rebuild in d["source_id"]:
                 continue
-            
+
             next_graph = json_graph.node_link_graph(json.loads(d["content_with_weight"]), edges="edges")
             merged_graph = nx.compose(graph, next_graph)
-            merged_source = {
-                n: graph.nodes[n]["source_id"] + next_graph.nodes[n]["source_id"]
-                for n in graph.nodes & next_graph.nodes
-            }
+            merged_source = {n: graph.nodes[n]["source_id"] + next_graph.nodes[n]["source_id"] for n in graph.nodes & next_graph.nodes}
             nx.set_node_attributes(merged_graph, merged_source, "source_id")
             if "source_id" in graph.graph:
                 merged_graph.graph["source_id"] = graph.graph["source_id"] + next_graph.graph["source_id"]
diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py
index 5575aa390..17a6ccea9 100644
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@@ -239,7 +239,7 @@ class Base(ABC):
 
     def chat_with_tools(self, system: str, history: list, gen_conf: dict = {}):
         gen_conf = self._clean_conf(gen_conf)
-        if system:
+        if system and history and history[0].get("role") != "system":
             history.insert(0, {"role": "system", "content": system})
 
         ans = ""
@@ -293,7 +293,7 @@ class Base(ABC):
         assert False, "Shouldn't be here."
 
     def chat(self, system, history, gen_conf={}, **kwargs):
-        if system:
+        if system and history and history[0].get("role") != "system":
             history.insert(0, {"role": "system", "content": system})
         gen_conf = self._clean_conf(gen_conf)
 
@@ -324,7 +324,7 @@ class Base(ABC):
     def chat_streamly_with_tools(self, system: str, history: list, gen_conf: dict = {}):
         gen_conf = self._clean_conf(gen_conf)
         tools = self.tools
-        if system:
+        if system and history and history[0].get("role") != "system":
             history.insert(0, {"role": "system", "content": system})
 
         total_tokens = 0
@@ -427,7 +427,7 @@ class Base(ABC):
         assert False, "Shouldn't be here."
 
     def chat_streamly(self, system, history, gen_conf: dict = {}, **kwargs):
-        if system:
+        if system and history and history[0].get("role") != "system":
             history.insert(0, {"role": "system", "content": system})
         gen_conf = self._clean_conf(gen_conf)
         ans = ""
@@ -576,7 +576,7 @@ class BaiChuanChat(Base):
         return ans, self.total_token_count(response)
 
     def chat_streamly(self, system, history, gen_conf={}, **kwargs):
-        if system:
+        if system and history and history[0].get("role") != "system":
             history.insert(0, {"role": "system", "content": system})
         if "max_tokens" in gen_conf:
             del gen_conf["max_tokens"]
@@ -641,7 +641,7 @@ class ZhipuChat(Base):
         return super().chat_with_tools(system, history, gen_conf)
 
     def chat_streamly(self, system, history, gen_conf={}, **kwargs):
-        if system:
+        if system and history and history[0].get("role") != "system":
             history.insert(0, {"role": "system", "content": system})
         if "max_tokens" in gen_conf:
             del gen_conf["max_tokens"]
@@ -705,7 +705,7 @@ class LocalLLM(Base):
     def _prepare_prompt(self, system, history, gen_conf):
         from rag.svr.jina_server import Prompt
 
-        if system:
+        if system and history and history[0].get("role") != "system":
             history.insert(0, {"role": "system", "content": system})
         return Prompt(message=history, gen_conf=gen_conf)
 
@@ -792,7 +792,7 @@ class MiniMaxChat(Base):
         return ans, self.total_token_count(response)
 
     def chat_streamly(self, system, history, gen_conf):
-        if system:
+        if system and history and history[0].get("role") != "system":
             history.insert(0, {"role": "system", "content": system})
         for k in list(gen_conf.keys()):
             if k not in ["temperature", "top_p", "max_tokens"]:
@@ -865,7 +865,7 @@ class MistralChat(Base):
         return ans, self.total_token_count(response)
 
     def chat_streamly(self, system, history, gen_conf={}, **kwargs):
-        if system:
+        if system and history and history[0].get("role") != "system":
             history.insert(0, {"role": "system", "content": system})
         for k in list(gen_conf.keys()):
             if k not in ["temperature", "top_p", "max_tokens"]:
@@ -1089,7 +1089,7 @@ class HunyuanChat(Base):
 
         _gen_conf = {}
         _history = [{k.capitalize(): v for k, v in item.items()} for item in history]
-        if system:
+        if system and history and history[0].get("role") != "system":
             _history.insert(0, {"Role": "system", "Content": system})
         if "max_tokens" in gen_conf:
             del gen_conf["max_tokens"]
@@ -1565,7 +1565,7 @@ class LiteLLMBase(ABC):
 
     def chat_with_tools(self, system: str, history: list, gen_conf: dict = {}):
         gen_conf = self._clean_conf(gen_conf)
-        if system:
+        if system and history and history[0].get("role") != "system":
             history.insert(0, {"role": "system", "content": system})
 
         ans = ""
@@ -1630,7 +1630,7 @@ class LiteLLMBase(ABC):
         assert False, "Shouldn't be here."
 
     def chat(self, system, history, gen_conf={}, **kwargs):
-        if system:
+        if system and history and history[0].get("role") != "system":
             history.insert(0, {"role": "system", "content": system})
         gen_conf = self._clean_conf(gen_conf)
 
@@ -1662,7 +1662,7 @@ class LiteLLMBase(ABC):
     def chat_streamly_with_tools(self, system: str, history: list, gen_conf: dict = {}):
         gen_conf = self._clean_conf(gen_conf)
         tools = self.tools
-        if system:
+        if system and history and history[0].get("role") != "system":
             history.insert(0, {"role": "system", "content": system})
 
         total_tokens = 0
@@ -1787,7 +1787,7 @@ class LiteLLMBase(ABC):
         assert False, "Shouldn't be here."
 
     def chat_streamly(self, system, history, gen_conf: dict = {}, **kwargs):
-        if system:
+        if system and history and history[0].get("role") != "system":
             history.insert(0, {"role": "system", "content": system})
         gen_conf = self._clean_conf(gen_conf)
         ans = ""
diff --git a/sandbox/executor_manager/services/execution.py b/sandbox/executor_manager/services/execution.py
index 1371ee95f..eae366585 100644
--- a/sandbox/executor_manager/services/execution.py
+++ b/sandbox/executor_manager/services/execution.py
@@ -162,7 +162,7 @@ if (fs.existsSync(mainPath)) {
             elif language == SupportLanguage.NODEJS:
                 run_args.extend([])
             else:
-                assert True, "Will never reach here"
+                assert False, "Will never reach here"
             run_args.extend([runner_name, args_json])
 
             returncode, stdout, stderr = await async_run_command(