From ecdb1701df3800639c6039f2c269237e2fcfe3a6 Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Thu, 17 Jul 2025 16:48:50 +0800 Subject: [PATCH] Perf: test llm before RAPTOR. (#8897) ### What problem does this PR solve? ### Type of change - [x] Performance Improvement --- api/utils/api_utils.py | 15 +++++++++++++++ deepdoc/parser/figure_parser.py | 2 ++ graphrag/general/index.py | 13 ++----------- rag/svr/task_executor.py | 4 +++- 4 files changed, 22 insertions(+), 12 deletions(-) diff --git a/api/utils/api_utils.py b/api/utils/api_utils.py index 078aa7bf2..cf60ea7db 100644 --- a/api/utils/api_utils.py +++ b/api/utils/api_utils.py @@ -670,3 +670,18 @@ def timeout( return wrapper return decorator + +async def is_strong_enough(chat_model, embedding_model): + + @timeout(30, 2) + async def _is_strong_enough(): + nonlocal chat_model, embedding_model + _ = await trio.to_thread.run_sync(lambda: embedding_model.encode(["Are you strong enough!?"])) + res = await trio.to_thread.run_sync(lambda: chat_model.chat("Nothing special.", [{"role":"user", "content": "Are you strong enough!?"}], {})) + if res.find("**ERROR**") >= 0: + raise Exception(res) + + # Pressure test for GraphRAG task + async with trio.open_nursery() as nursery: + for _ in range(12): + nursery.start_soon(_is_strong_enough, chat_model, embedding_model) \ No newline at end of file diff --git a/deepdoc/parser/figure_parser.py b/deepdoc/parser/figure_parser.py index b29a4a8a5..0ec315876 100644 --- a/deepdoc/parser/figure_parser.py +++ b/deepdoc/parser/figure_parser.py @@ -17,6 +17,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from PIL import Image +from api.utils.api_utils import timeout from rag.app.picture import vision_llm_chunk as picture_vision_llm_chunk from rag.prompts import vision_llm_figure_describe_prompt @@ -80,6 +81,7 @@ class VisionFigureParser: def __call__(self, **kwargs): callback = kwargs.get("callback", lambda prog, msg: None) + @timeout(30, 3) def process(figure_idx, figure_binary): description_text = picture_vision_llm_chunk( binary=figure_binary, diff --git a/graphrag/general/index.py b/graphrag/general/index.py index bb51adcdb..42230f537 100644 --- a/graphrag/general/index.py +++ b/graphrag/general/index.py @@ -20,7 +20,7 @@ import trio from api import settings from api.utils import get_uuid -from api.utils.api_utils import timeout +from api.utils.api_utils import timeout, is_strong_enough from graphrag.light.graph_extractor import GraphExtractor as LightKGExt from graphrag.general.graph_extractor import GraphExtractor as GeneralKGExt from graphrag.general.community_reports_extractor import CommunityReportsExtractor @@ -39,13 +39,6 @@ from rag.nlp import rag_tokenizer, search from rag.utils.redis_conn import RedisDistributedLock -@timeout(30, 2) -async def _is_strong_enough(chat_model, embedding_model): - _ = await trio.to_thread.run_sync(lambda: embedding_model.encode(["Are you strong enough!?"])) - res = await trio.to_thread.run_sync(lambda: chat_model.chat("Nothing special.", [{"role":"user", "content": "Are you strong enough!?"}], {})) - if res.find("**ERROR**") >= 0: - raise Exception(res) - async def run_graphrag( row: dict, @@ -57,9 +50,7 @@ async def run_graphrag( callback, ): # Pressure test for GraphRAG task - async with trio.open_nursery() as nursery: - for _ in range(12): - nursery.start_soon(_is_strong_enough, chat_model, embedding_model) + await is_strong_enough(chat_model, embedding_model) start = trio.current_time() tenant_id, kb_id, doc_id = row["tenant_id"], str(row["kb_id"]), row["doc_id"] diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py index c39bb982e..837a5a8a4 100644 --- a/rag/svr/task_executor.py +++ b/rag/svr/task_executor.py @@ -21,7 +21,7 @@ import sys import threading import time -from api.utils.api_utils import timeout +from api.utils.api_utils import timeout, is_strong_enough from api.utils.log_utils import init_root_logger, get_project_base_directory from graphrag.general.index import run_graphrag from graphrag.utils import get_llm_cache, set_llm_cache, get_tags_from_cache, set_tags_to_cache @@ -466,6 +466,8 @@ async def embedding(docs, mdl, parser_config=None, callback=None): @timeout(3600) async def run_raptor(row, chat_mdl, embd_mdl, vector_size, callback=None): + # Pressure test for GraphRAG task + await is_strong_enough(chat_mdl, embd_mdl) chunks = [] vctr_nm = "q_%d_vec"%vector_size for d in settings.retrievaler.chunk_list(row["doc_id"], row["tenant_id"], [str(row["kb_id"])],