Optimized graphrag again (#5927)

### What problem does this PR solve?

Optimized graphrag again

### Type of change

- [x] Performance Improvement
This commit is contained in:
Zhichang Yu
2025-03-11 18:36:10 +08:00
committed by GitHub
parent 45318e7575
commit 939e668096
4 changed files with 117 additions and 101 deletions

View File

@ -489,15 +489,16 @@ async def update_nodes_pagerank_nhop_neighbour(tenant_id, kb_id, graph, n_hop):
return nbrs
pr = nx.pagerank(graph)
for n, p in pr.items():
graph.nodes[n]["pagerank"] = p
try:
await trio.to_thread.run_sync(lambda: settings.docStoreConn.update({"entity_kwd": n, "kb_id": kb_id},
{"rank_flt": p,
"n_hop_with_weight": json.dumps( (n), ensure_ascii=False)},
search.index_name(tenant_id), kb_id))
except Exception as e:
logging.exception(e)
try:
async with trio.open_nursery() as nursery:
for n, p in pr.items():
graph.nodes[n]["pagerank"] = p
nursery.start_soon(lambda: trio.to_thread.run_sync(lambda: settings.docStoreConn.update({"entity_kwd": n, "kb_id": kb_id},
{"rank_flt": p,
"n_hop_with_weight": json.dumps((n), ensure_ascii=False)},
search.index_name(tenant_id), kb_id)))
except Exception as e:
logging.exception(e)
ty2ents = defaultdict(list)
for p, r in sorted(pr.items(), key=lambda x: x[1], reverse=True):