Optimize graphrag again (#6513)

### What problem does this PR solve?

Removed set_entity and set_relation to avoid accessing doc engine during
graph computation.
Introduced GraphChange to avoid writing unchanged chunks.

### Type of change

- [x] Performance Improvement
This commit is contained in:
Zhichang Yu
2025-03-26 15:34:42 +08:00
committed by GitHub
parent 7a677cb095
commit 6bf26e2a81
19 changed files with 466 additions and 530 deletions

View File

@ -100,7 +100,8 @@ def run(graph: nx.Graph, args: dict[str, Any]) -> dict[int, dict[str, dict]]:
logging.debug(
"Running leiden with max_cluster_size=%s, lcc=%s", max_cluster_size, use_lcc
)
if not graph.nodes():
nodes = set(graph.nodes())
if not nodes:
return {}
node_id_to_community_map = _compute_leiden_communities(
@ -120,7 +121,7 @@ def run(graph: nx.Graph, args: dict[str, Any]) -> dict[int, dict[str, dict]]:
result = {}
results_by_level[level] = result
for node_id, raw_community_id in node_id_to_community_map[level].items():
if node_id not in graph.nodes:
if node_id not in nodes:
logging.warning(f"Node {node_id} not found in the graph.")
continue
community_id = str(raw_community_id)