Perf: set timeout of some steps in KG. (#8873)

### What problem does this PR solve?

### Type of change


- [x] Performance Improvement
This commit is contained in:
Kevin Hu
2025-07-16 18:06:03 +08:00
committed by GitHub
parent b3018a455f
commit fbd115773b
9 changed files with 28 additions and 29 deletions

View File

@ -124,7 +124,7 @@ async def run_graphrag(
return
@timeout(60*60*2)
@timeout(60*60, 1)
async def generate_subgraph(
extractor: Extractor,
tenant_id: str,
@ -229,7 +229,7 @@ async def merge_subgraph(
return new_graph
@timeout(60*60)
@timeout(60*30, 1)
async def resolve_entities(
graph,
subgraph_nodes: set[str],
@ -255,7 +255,7 @@ async def resolve_entities(
callback(msg=f"Graph resolution done in {now - start:.2f}s.")
@timeout(60*30)
@timeout(60*30, 1)
async def extract_community(
graph,
tenant_id: str,

View File

@ -17,13 +17,12 @@ from typing import Any, Callable
import os
import trio
from typing import Set, Tuple
import networkx as nx
import numpy as np
import xxhash
from networkx.readwrite import json_graph
import dataclasses
from api.utils.api_utils import timeout
from api import settings
from api.utils import get_uuid
from rag.nlp import search, rag_tokenizer
@ -305,6 +304,7 @@ def chunk_id(chunk):
return xxhash.xxh64((chunk["content_with_weight"] + chunk["kb_id"]).encode("utf-8")).hexdigest()
@timeout(1, 3)
async def graph_node_to_chunk(kb_id, embd_mdl, ent_name, meta, chunks):
chunk = {
"id": get_uuid(),
@ -357,6 +357,7 @@ def get_relation(tenant_id, kb_id, from_ent_name, to_ent_name, size=1):
return res
@timeout(1, 3)
async def graph_edge_to_chunk(kb_id, embd_mdl, from_ent_name, to_ent_name, meta, chunks):
chunk = {
"id": get_uuid(),