From ecdb1701df3800639c6039f2c269237e2fcfe3a6 Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhu.sh@gmail.com>
Date: Thu, 17 Jul 2025 16:48:50 +0800
Subject: [PATCH] Perf: test llm before RAPTOR. (#8897)

### What problem does this PR solve?


### Type of change

- [x] Performance Improvement
---
 api/utils/api_utils.py          | 15 +++++++++++++++
 deepdoc/parser/figure_parser.py |  2 ++
 graphrag/general/index.py       | 13 ++-----------
 rag/svr/task_executor.py        |  4 +++-
 4 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/api/utils/api_utils.py b/api/utils/api_utils.py
index 078aa7bf2..cf60ea7db 100644
--- a/api/utils/api_utils.py
+++ b/api/utils/api_utils.py
@@ -670,3 +670,18 @@ def timeout(
         return wrapper
     return decorator
 
+
+async def is_strong_enough(chat_model, embedding_model):
+
+    @timeout(30, 2)
+    async def _is_strong_enough():
+        nonlocal chat_model, embedding_model
+        _ = await trio.to_thread.run_sync(lambda: embedding_model.encode(["Are you strong enough!?"]))
+        res =  await trio.to_thread.run_sync(lambda: chat_model.chat("Nothing special.", [{"role":"user", "content": "Are you strong enough!?"}], {}))
+        if res.find("**ERROR**") >= 0:
+            raise Exception(res)
+
+    # Pressure test for GraphRAG task
+    async with trio.open_nursery() as nursery:
+        for _ in range(12):
+            nursery.start_soon(_is_strong_enough, chat_model, embedding_model)
\ No newline at end of file
diff --git a/deepdoc/parser/figure_parser.py b/deepdoc/parser/figure_parser.py
index b29a4a8a5..0ec315876 100644
--- a/deepdoc/parser/figure_parser.py
+++ b/deepdoc/parser/figure_parser.py
@@ -17,6 +17,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
 
 from PIL import Image
 
+from api.utils.api_utils import timeout
 from rag.app.picture import vision_llm_chunk as picture_vision_llm_chunk
 from rag.prompts import vision_llm_figure_describe_prompt
 
@@ -80,6 +81,7 @@ class VisionFigureParser:
     def __call__(self, **kwargs):
         callback = kwargs.get("callback", lambda prog, msg: None)
 
+        @timeout(30, 3)
         def process(figure_idx, figure_binary):
             description_text = picture_vision_llm_chunk(
                 binary=figure_binary,
diff --git a/graphrag/general/index.py b/graphrag/general/index.py
index bb51adcdb..42230f537 100644
--- a/graphrag/general/index.py
+++ b/graphrag/general/index.py
@@ -20,7 +20,7 @@ import trio
 
 from api import settings
 from api.utils import get_uuid
-from api.utils.api_utils import timeout
+from api.utils.api_utils import timeout, is_strong_enough
 from graphrag.light.graph_extractor import GraphExtractor as LightKGExt
 from graphrag.general.graph_extractor import GraphExtractor as GeneralKGExt
 from graphrag.general.community_reports_extractor import CommunityReportsExtractor
@@ -39,13 +39,6 @@ from rag.nlp import rag_tokenizer, search
 from rag.utils.redis_conn import RedisDistributedLock
 
 
-@timeout(30, 2)
-async def _is_strong_enough(chat_model, embedding_model):
-    _ = await trio.to_thread.run_sync(lambda: embedding_model.encode(["Are you strong enough!?"]))
-    res =  await trio.to_thread.run_sync(lambda: chat_model.chat("Nothing special.", [{"role":"user", "content": "Are you strong enough!?"}], {}))
-    if res.find("**ERROR**") >= 0:
-        raise Exception(res)
-
 
 async def run_graphrag(
     row: dict,
@@ -57,9 +50,7 @@ async def run_graphrag(
     callback,
 ):
     # Pressure test for GraphRAG task
-    async with trio.open_nursery() as nursery:
-        for _ in range(12):
-            nursery.start_soon(_is_strong_enough, chat_model, embedding_model)
+    await is_strong_enough(chat_model, embedding_model)
 
     start = trio.current_time()
     tenant_id, kb_id, doc_id = row["tenant_id"], str(row["kb_id"]), row["doc_id"]
diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py
index c39bb982e..837a5a8a4 100644
--- a/rag/svr/task_executor.py
+++ b/rag/svr/task_executor.py
@@ -21,7 +21,7 @@ import sys
 import threading
 import time
 
-from api.utils.api_utils import timeout
+from api.utils.api_utils import timeout, is_strong_enough
 from api.utils.log_utils import init_root_logger, get_project_base_directory
 from graphrag.general.index import run_graphrag
 from graphrag.utils import get_llm_cache, set_llm_cache, get_tags_from_cache, set_tags_to_cache
@@ -466,6 +466,8 @@ async def embedding(docs, mdl, parser_config=None, callback=None):
 
 @timeout(3600)
 async def run_raptor(row, chat_mdl, embd_mdl, vector_size, callback=None):
+    # Pressure test for GraphRAG task
+    await is_strong_enough(chat_mdl, embd_mdl)
     chunks = []
     vctr_nm = "q_%d_vec"%vector_size
     for d in settings.retrievaler.chunk_list(row["doc_id"], row["tenant_id"], [str(row["kb_id"])],