Refa: add more logs to KG. (#8889)

### What problem does this PR solve?


### Type of change

- [x] Refactoring
This commit is contained in:
Kevin Hu
2025-07-17 14:43:08 +08:00
committed by GitHub
parent a422367a40
commit 729e6098f9
3 changed files with 35 additions and 9 deletions

View File

@ -484,16 +484,20 @@ async def set_graph(tenant_id: str, kb_id: str, embd_mdl, graph: nx.Graph, chang
semaphore = trio.Semaphore(5)
async with trio.open_nursery() as nursery:
for node in change.added_updated_nodes:
for ii, node in enumerate(change.added_updated_nodes):
node_attrs = graph.nodes[node]
async with semaphore:
if ii%100 == 9 and callback:
callback(msg=f"Get embedding of nodes: {ii}/{len(change.added_updated_nodes)}")
nursery.start_soon(graph_node_to_chunk, kb_id, embd_mdl, node, node_attrs, chunks)
for from_node, to_node in change.added_updated_edges:
for ii, (from_node, to_node) in enumerate(change.added_updated_edges):
edge_attrs = graph.get_edge_data(from_node, to_node)
if not edge_attrs:
# added_updated_edges could record a non-existing edge if both from_node and to_node participate in nodes merging.
continue
async with semaphore:
if ii%100 == 9 and callback:
callback(msg=f"Get embedding of edges: {ii}/{len(change.added_updated_edges)}")
nursery.start_soon(graph_edge_to_chunk, kb_id, embd_mdl, from_node, to_node, edge_attrs, chunks)
now = trio.current_time()
if callback:
@ -502,6 +506,9 @@ async def set_graph(tenant_id: str, kb_id: str, embd_mdl, graph: nx.Graph, chang
es_bulk_size = 4
for b in range(0, len(chunks), es_bulk_size):
async with semaphore:
if b % 100 == es_bulk_size and callback:
callback(msg=f"Insert chunks: {b}/{len(chunks)}")
doc_store_result = await trio.to_thread.run_sync(lambda: settings.docStoreConn.insert(chunks[b:b + es_bulk_size], search.index_name(tenant_id), kb_id))
if doc_store_result:
error_message = f"Insert chunk error: {doc_store_result}, please check log file and Elasticsearch/Infinity status!"