Fix: metadata issue & graphrag speeding up. (#12113)

### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --------- Co-authored-by: Liu An <asiro@qq.com>
2026-02-08 03:25:06 +08:00 · 2025-12-23 15:57:27 +08:00
parent 063b06494a
commit 00bb6fbd28
6 changed files with 6 additions and 11 deletions
--- a/graphrag/general/extractor.py
+++ b/graphrag/general/extractor.py
@ -71,18 +71,17 @@ class Extractor:
        _, system_msg = message_fit_in([{"role": "system", "content": system}], int(self._llm.max_length * 0.92))
        response = ""
        for attempt in range(3):
-
            if task_id:
                if has_canceled(task_id):
                    logging.info(f"Task {task_id} cancelled during entity resolution candidate processing.")
                    raise TaskCanceledException(f"Task {task_id} was cancelled")
-
            try:
                response = asyncio.run(self._llm.async_chat(system_msg[0]["content"], hist, conf))
                response = re.sub(r"^.*</think>", "", response, flags=re.DOTALL)
                if response.find("**ERROR**") >= 0:
                    raise Exception(response)
                set_llm_cache(self._llm.llm_name, system, response, history, gen_conf)
+                break
            except Exception as e:
                logging.exception(e)
                if attempt == 2:
--- a/graphrag/general/index.py
+++ b/graphrag/general/index.py
@ -198,7 +198,7 @@ async def run_graphrag_for_kb(

        for d in raw_chunks:
            content = d["content_with_weight"]
-            if num_tokens_from_string(current_chunk + content) < 1024:
+            if num_tokens_from_string(current_chunk + content) < 4096:
                current_chunk += content
            else:
                if current_chunk:
--- a/graphrag/light/graph_extractor.py
+++ b/graphrag/light/graph_extractor.py
@ -78,10 +78,6 @@ class GraphExtractor(Extractor):
        hint_prompt = self._entity_extract_prompt.format(**self._context_base, input_text=content)

        gen_conf = {}
-        final_result = ""
-        glean_result = ""
-        if_loop_result = ""
-        history = []
        logging.info(f"Start processing for {chunk_key}: {content[:25]}...")
        if self.callback:
            self.callback(msg=f"Start processing for {chunk_key}: {content[:25]}...")