Rebuild graph when it's out of time. (#4607)

### What problem does this PR solve?

#4543

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
- [x] Refactoring
This commit is contained in:
Kevin Hu
2025-01-23 17:26:20 +08:00
committed by GitHub
parent bbc1d02c96
commit 86892959a0
5 changed files with 55 additions and 10 deletions

View File

@ -251,11 +251,11 @@ class KGSearch(Dealer):
break
if ents:
ents = "\n-Entities-\n{}".format(pd.DataFrame(ents).to_csv())
ents = "\n---- Entities ----\n{}".format(pd.DataFrame(ents).to_csv())
else:
ents = ""
if relas:
relas = "\n-Relations-\n{}".format(pd.DataFrame(relas).to_csv())
relas = "\n---- Relations ----\n{}".format(pd.DataFrame(relas).to_csv())
else:
relas = ""
@ -296,7 +296,7 @@ class KGSearch(Dealer):
if not txts:
return ""
return "\n-Community Report-\n" + "\n".join(txts)
return "\n---- Community Report ----\n" + "\n".join(txts)
if __name__ == "__main__":

View File

@ -23,6 +23,7 @@ from networkx.readwrite import json_graph
from api import settings
from rag.nlp import search, rag_tokenizer
from rag.utils.doc_store_conn import OrderByExpr
from rag.utils.redis_conn import REDIS_CONN
ErrorHandlerFn = Callable[[BaseException | None, str | None, dict | None], None]
@ -363,7 +364,7 @@ def get_graph(tenant_id, kb_id):
res.field[id]["source_id"]
except Exception:
continue
return None, None
return rebuild_graph(tenant_id, kb_id)
def set_graph(tenant_id, kb_id, graph, docids):
@ -517,3 +518,36 @@ def flat_uniq_list(arr, key):
res.append(a)
return list(set(res))
def rebuild_graph(tenant_id, kb_id):
graph = nx.Graph()
src_ids = []
flds = ["entity_kwd", "entity_type_kwd", "from_entity_kwd", "to_entity_kwd", "weight_int", "knowledge_graph_kwd", "source_id"]
bs = 256
for i in range(0, 10000000, bs):
es_res = settings.docStoreConn.search(flds, [],
{"kb_id": kb_id, "knowledge_graph_kwd": ["entity", "relation"]},
[],
OrderByExpr(),
i, bs, search.index_name(tenant_id), [kb_id]
)
tot = settings.docStoreConn.getTotal(es_res)
if tot == 0:
return None, None
es_res = settings.docStoreConn.getFields(es_res, flds)
for id, d in es_res.items():
src_ids.extend(d.get("source_id", []))
if d["knowledge_graph_kwd"] == "entity":
graph.add_node(d["entity_kwd"], entity_type=d["entity_type_kwd"])
else:
graph.add_edge(
d["from_entity_kwd"],
d["to_entity_kwd"],
weight=int(d["weight_int"])
)
if len(es_res.keys()) < 128:
return graph, list(set(src_ids))
return graph, list(set(src_ids))