Optimize graphrag again (#6513)

### What problem does this PR solve? Removed set_entity and set_relation to avoid accessing doc engine during graph computation. Introduced GraphChange to avoid writing unchanged chunks. ### Type of change - [x] Performance Improvement
2026-01-30 23:26:36 +08:00 · 2025-03-26 15:34:42 +08:00
parent 7a677cb095
commit 6bf26e2a81
19 changed files with 466 additions and 530 deletions
--- a/graphrag/light/graph_extractor.py
+++ b/graphrag/light/graph_extractor.py
@ -5,7 +5,7 @@ Reference:
 - [graphrag](https://github.com/microsoft/graphrag)
 """
 import re
-from typing import Any, Callable
+from typing import Any
 from dataclasses import dataclass
 from graphrag.general.extractor import Extractor, ENTITY_EXTRACTION_MAX_GLEANINGS
 from graphrag.light.graph_prompt import PROMPTS
@ -33,14 +33,10 @@ class GraphExtractor(Extractor):
        llm_invoker: CompletionLLM,
        language: str | None = "English",
        entity_types: list[str] | None = None,
-        get_entity: Callable | None = None,
-        set_entity: Callable | None = None,
-        get_relation: Callable | None = None,
-        set_relation: Callable | None = None,
        example_number: int = 2,
        max_gleanings: int | None = None,
    ):
-        super().__init__(llm_invoker, language, entity_types, get_entity, set_entity, get_relation, set_relation)
+        super().__init__(llm_invoker, language, entity_types)
        """Init method definition."""
        self._max_gleanings = (
            max_gleanings