EntityResolution batch. Close #6570 (#6602)

### What problem does this PR solve?

EntityResolution batch

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Zhichang Yu
2025-03-27 16:40:36 +08:00
committed by GitHub
parent d2043ff9f2
commit 36b62e0fab
2 changed files with 28 additions and 20 deletions

View File

@ -69,26 +69,27 @@ async def run_graphrag(
embedding_model,
callback,
)
new_graph = None
if subgraph:
new_graph = await merge_subgraph(
tenant_id,
kb_id,
doc_id,
subgraph,
embedding_model,
callback,
)
if not subgraph:
return
subgraph_nodes = set(subgraph.nodes())
new_graph = await merge_subgraph(
tenant_id,
kb_id,
doc_id,
subgraph,
embedding_model,
callback,
)
assert new_graph is not None
if not with_resolution or not with_community:
return
if new_graph is None:
new_graph = await get_graph(tenant_id, kb_id)
if with_resolution and new_graph is not None:
if with_resolution:
await resolve_entities(
new_graph,
subgraph_nodes,
tenant_id,
kb_id,
doc_id,
@ -96,7 +97,7 @@ async def run_graphrag(
embedding_model,
callback,
)
if with_community and new_graph is not None:
if with_community:
await extract_community(
new_graph,
tenant_id,
@ -223,6 +224,7 @@ async def merge_subgraph(
async def resolve_entities(
graph,
subgraph_nodes: set[str],
tenant_id: str,
kb_id: str,
doc_id: str,
@ -241,7 +243,7 @@ async def resolve_entities(
er = EntityResolution(
llm_bdl,
)
reso = await er(graph, callback=callback)
reso = await er(graph, subgraph_nodes, callback=callback)
graph = reso.graph
change = reso.change
callback(msg=f"Graph resolution removed {len(change.removed_nodes)} nodes and {len(change.removed_edges)} edges.")