From 00bb6fbd28c2780e0c53f788b1dfd4f48e0a894c Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhu.sh@gmail.com>
Date: Tue, 23 Dec 2025 15:57:27 +0800
Subject: [PATCH] Fix: metadata issue & graphrag speeding up. (#12113)

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

---------

Co-authored-by: Liu An <asiro@qq.com>
---
 api/db/services/dialog_service.py     | 4 ++--
 api/db/services/tenant_llm_service.py | 2 +-
 graphrag/general/extractor.py         | 3 +--
 graphrag/general/index.py             | 2 +-
 graphrag/light/graph_extractor.py     | 4 ----
 rag/svr/task_executor.py              | 2 +-
 6 files changed, 6 insertions(+), 11 deletions(-)
diff --git a/api/db/services/dialog_service.py b/api/db/services/dialog_service.py
index e956b0a5b..4bc24210b 100644
--- a/api/db/services/dialog_service.py
+++ b/api/db/services/dialog_service.py
@@ -406,7 +406,7 @@ async def async_chat(dialog, messages, stream=True, **kwargs):
                     dialog.vector_similarity_weight,
                     doc_ids=attachments,
                     top=dialog.top_k,
-                    aggs=False,
+                    aggs=True,
                     rerank_mdl=rerank_mdl,
                     rank_feature=label_question(" ".join(questions), kbs),
                 )
@@ -769,7 +769,7 @@ async def async_ask(question, kb_ids, tenant_id, chat_llm_name=None, search_conf
         vector_similarity_weight=search_config.get("vector_similarity_weight", 0.3),
         top=search_config.get("top_k", 1024),
         doc_ids=doc_ids,
-        aggs=False,
+        aggs=True,
         rerank_mdl=rerank_mdl,
         rank_feature=label_question(question, kbs)
     )
diff --git a/api/db/services/tenant_llm_service.py b/api/db/services/tenant_llm_service.py
index 88689fdab..65771f60f 100644
--- a/api/db/services/tenant_llm_service.py
+++ b/api/db/services/tenant_llm_service.py
@@ -97,7 +97,7 @@ class TenantLLMService(CommonService):
         if llm_type == LLMType.EMBEDDING.value:
             mdlnm = tenant.embd_id if not llm_name else llm_name
         elif llm_type == LLMType.SPEECH2TEXT.value:
-            mdlnm = tenant.asr_id
+            mdlnm = tenant.asr_id if not llm_name else llm_name
         elif llm_type == LLMType.IMAGE2TEXT.value:
             mdlnm = tenant.img2txt_id if not llm_name else llm_name
         elif llm_type == LLMType.CHAT.value:
diff --git a/graphrag/general/extractor.py b/graphrag/general/extractor.py
index a965a30c4..9164b4e27 100644
--- a/graphrag/general/extractor.py
+++ b/graphrag/general/extractor.py
@@ -71,18 +71,17 @@ class Extractor:
         _, system_msg = message_fit_in([{"role": "system", "content": system}], int(self._llm.max_length * 0.92))
         response = ""
         for attempt in range(3):
-
             if task_id:
                 if has_canceled(task_id):
                     logging.info(f"Task {task_id} cancelled during entity resolution candidate processing.")
                     raise TaskCanceledException(f"Task {task_id} was cancelled")
-
             try:
                 response = asyncio.run(self._llm.async_chat(system_msg[0]["content"], hist, conf))
                 response = re.sub(r"^.*</think>", "", response, flags=re.DOTALL)
                 if response.find("**ERROR**") >= 0:
                     raise Exception(response)
                 set_llm_cache(self._llm.llm_name, system, response, history, gen_conf)
+                break
             except Exception as e:
                 logging.exception(e)
                 if attempt == 2:
diff --git a/graphrag/general/index.py b/graphrag/general/index.py
index 1bc9790d9..ea5d73325 100644
--- a/graphrag/general/index.py
+++ b/graphrag/general/index.py
@@ -198,7 +198,7 @@ async def run_graphrag_for_kb(
 
         for d in raw_chunks:
             content = d["content_with_weight"]
-            if num_tokens_from_string(current_chunk + content) < 1024:
+            if num_tokens_from_string(current_chunk + content) < 4096:
                 current_chunk += content
             else:
                 if current_chunk:
diff --git a/graphrag/light/graph_extractor.py b/graphrag/light/graph_extractor.py
index f507f4617..569cf7ed3 100644
--- a/graphrag/light/graph_extractor.py
+++ b/graphrag/light/graph_extractor.py
@@ -78,10 +78,6 @@ class GraphExtractor(Extractor):
         hint_prompt = self._entity_extract_prompt.format(**self._context_base, input_text=content)
 
         gen_conf = {}
-        final_result = ""
-        glean_result = ""
-        if_loop_result = ""
-        history = []
         logging.info(f"Start processing for {chunk_key}: {content[:25]}...")
         if self.callback:
             self.callback(msg=f"Start processing for {chunk_key}: {content[:25]}...")
diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py
index b36b6dd53..68817a66b 100644
--- a/rag/svr/task_executor.py
+++ b/rag/svr/task_executor.py
@@ -380,7 +380,7 @@ async def build_chunks(task, progress_callback):
                     cached = await gen_metadata(chat_mdl,
                                                 metadata_schema(task["parser_config"]["metadata"]),
                                                 d["content_with_weight"])
-                set_llm_cache(chat_mdl.llm_name, d["content_with_weight"], cached, "metadata")
+                set_llm_cache(chat_mdl.llm_name, d["content_with_weight"], cached, "metadata", {})
             if cached:
                 d["metadata_obj"] = cached
         tasks = []