mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Rebuild graph when it's out of time. (#4607)
### What problem does this PR solve? #4543 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [x] Refactoring
This commit is contained in:
@ -251,11 +251,11 @@ class KGSearch(Dealer):
|
||||
break
|
||||
|
||||
if ents:
|
||||
ents = "\n-Entities-\n{}".format(pd.DataFrame(ents).to_csv())
|
||||
ents = "\n---- Entities ----\n{}".format(pd.DataFrame(ents).to_csv())
|
||||
else:
|
||||
ents = ""
|
||||
if relas:
|
||||
relas = "\n-Relations-\n{}".format(pd.DataFrame(relas).to_csv())
|
||||
relas = "\n---- Relations ----\n{}".format(pd.DataFrame(relas).to_csv())
|
||||
else:
|
||||
relas = ""
|
||||
|
||||
@ -296,7 +296,7 @@ class KGSearch(Dealer):
|
||||
|
||||
if not txts:
|
||||
return ""
|
||||
return "\n-Community Report-\n" + "\n".join(txts)
|
||||
return "\n---- Community Report ----\n" + "\n".join(txts)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@ -23,6 +23,7 @@ from networkx.readwrite import json_graph
|
||||
|
||||
from api import settings
|
||||
from rag.nlp import search, rag_tokenizer
|
||||
from rag.utils.doc_store_conn import OrderByExpr
|
||||
from rag.utils.redis_conn import REDIS_CONN
|
||||
|
||||
ErrorHandlerFn = Callable[[BaseException | None, str | None, dict | None], None]
|
||||
@ -363,7 +364,7 @@ def get_graph(tenant_id, kb_id):
|
||||
res.field[id]["source_id"]
|
||||
except Exception:
|
||||
continue
|
||||
return None, None
|
||||
return rebuild_graph(tenant_id, kb_id)
|
||||
|
||||
|
||||
def set_graph(tenant_id, kb_id, graph, docids):
|
||||
@ -517,3 +518,36 @@ def flat_uniq_list(arr, key):
|
||||
res.append(a)
|
||||
return list(set(res))
|
||||
|
||||
|
||||
def rebuild_graph(tenant_id, kb_id):
|
||||
graph = nx.Graph()
|
||||
src_ids = []
|
||||
flds = ["entity_kwd", "entity_type_kwd", "from_entity_kwd", "to_entity_kwd", "weight_int", "knowledge_graph_kwd", "source_id"]
|
||||
bs = 256
|
||||
for i in range(0, 10000000, bs):
|
||||
es_res = settings.docStoreConn.search(flds, [],
|
||||
{"kb_id": kb_id, "knowledge_graph_kwd": ["entity", "relation"]},
|
||||
[],
|
||||
OrderByExpr(),
|
||||
i, bs, search.index_name(tenant_id), [kb_id]
|
||||
)
|
||||
tot = settings.docStoreConn.getTotal(es_res)
|
||||
if tot == 0:
|
||||
return None, None
|
||||
|
||||
es_res = settings.docStoreConn.getFields(es_res, flds)
|
||||
for id, d in es_res.items():
|
||||
src_ids.extend(d.get("source_id", []))
|
||||
if d["knowledge_graph_kwd"] == "entity":
|
||||
graph.add_node(d["entity_kwd"], entity_type=d["entity_type_kwd"])
|
||||
else:
|
||||
graph.add_edge(
|
||||
d["from_entity_kwd"],
|
||||
d["to_entity_kwd"],
|
||||
weight=int(d["weight_int"])
|
||||
)
|
||||
|
||||
if len(es_res.keys()) < 128:
|
||||
return graph, list(set(src_ids))
|
||||
|
||||
return graph, list(set(src_ids))
|
||||
|
||||
Reference in New Issue
Block a user