From b4b6d296ea759884751a66f867eacd693082b4ec Mon Sep 17 00:00:00 2001
From: Liu An <asiro@qq.com>
Date: Wed, 23 Jul 2025 15:08:36 +0800
Subject: [PATCH] Fix: Increase timeouts for document parsing and model checks
 (#8996)

### What problem does this PR solve?

- Extended embedding model timeout from 3 to 10 seconds in api_utils.py
- Added more time for large file batches and concurrent parsing
operations to prevent test flakiness
- Import from #8940
- https://github.com/infiniflow/ragflow/actions/runs/16422052652

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
---
 api/utils/api_utils.py                                 | 10 ++++------
 .../test_parse_documents.py                            |  4 ++--
 .../test_parse_documents.py                            |  4 ++--
 3 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/api/utils/api_utils.py b/api/utils/api_utils.py
index ede7f3000..0bb0931cb 100644
--- a/api/utils/api_utils.py
+++ b/api/utils/api_utils.py
@@ -26,15 +26,12 @@ from copy import deepcopy
 from functools import wraps
 from hmac import HMAC
 from io import BytesIO
-from typing import Any, Optional, Union, Callable, Coroutine, Type
+from typing import Any, Callable, Coroutine, Optional, Type, Union
 from urllib.parse import quote, urlencode
 from uuid import uuid1
 
-import trio
-from rag.utils.mcp_tool_call_conn import MCPToolCallSession, close_multiple_mcp_toolcall_sessions
-
-
 import requests
+import trio
 from flask import (
     Response,
     jsonify,
@@ -53,6 +50,7 @@ from api.constants import REQUEST_MAX_WAIT_SEC, REQUEST_WAIT_SEC
 from api.db.db_models import APIToken
 from api.db.services.llm_service import LLMService, TenantLLMService
 from api.utils import CustomJSONEncoder, get_uuid, json_dumps
+from rag.utils.mcp_tool_call_conn import MCPToolCallSession, close_multiple_mcp_toolcall_sessions
 
 requests.models.complexjson.dumps = functools.partial(json.dumps, cls=CustomJSONEncoder)
 
@@ -693,7 +691,7 @@ async def is_strong_enough(chat_model, embedding_model):
     async def _is_strong_enough():
         nonlocal chat_model, embedding_model
         if embedding_model:
-            with trio.fail_after(3):
+            with trio.fail_after(10):
                 _ = await trio.to_thread.run_sync(lambda: embedding_model.encode(["Are you strong enough!?"]))
         if chat_model:
             with trio.fail_after(30):
diff --git a/test/testcases/test_http_api/test_file_management_within_dataset/test_parse_documents.py b/test/testcases/test_http_api/test_file_management_within_dataset/test_parse_documents.py
index 04aa8cac2..fd31e5cee 100644
--- a/test/testcases/test_http_api/test_file_management_within_dataset/test_parse_documents.py
+++ b/test/testcases/test_http_api/test_file_management_within_dataset/test_parse_documents.py
@@ -167,7 +167,7 @@ class TestDocumentsParse:
 
 @pytest.mark.p3
 def test_parse_100_files(HttpApiAuth, add_dataset_func, tmp_path):
-    @wait_for(100, 1, "Document parsing timeout")
+    @wait_for(200, 1, "Document parsing timeout")
     def condition(_auth, _dataset_id, _document_num):
         res = list_documents(_auth, _dataset_id, {"page_size": _document_num})
         for doc in res["data"]["docs"]:
@@ -188,7 +188,7 @@ def test_parse_100_files(HttpApiAuth, add_dataset_func, tmp_path):
 
 @pytest.mark.p3
 def test_concurrent_parse(HttpApiAuth, add_dataset_func, tmp_path):
-    @wait_for(120, 1, "Document parsing timeout")
+    @wait_for(200, 1, "Document parsing timeout")
     def condition(_auth, _dataset_id, _document_num):
         res = list_documents(_auth, _dataset_id, {"page_size": _document_num})
         for doc in res["data"]["docs"]:
diff --git a/test/testcases/test_sdk_api/test_file_management_within_dataset/test_parse_documents.py b/test/testcases/test_sdk_api/test_file_management_within_dataset/test_parse_documents.py
index 2afc9928f..2b94d488e 100644
--- a/test/testcases/test_sdk_api/test_file_management_within_dataset/test_parse_documents.py
+++ b/test/testcases/test_sdk_api/test_file_management_within_dataset/test_parse_documents.py
@@ -116,7 +116,7 @@ class TestDocumentsParse:
 
 @pytest.mark.p3
 def test_parse_100_files(add_dataset_func, tmp_path):
-    @wait_for(100, 1, "Document parsing timeout")
+    @wait_for(200, 1, "Document parsing timeout")
     def condition(_dataset: DataSet, _count: int):
         documents = _dataset.list_documents(page_size=_count * 2)
         for document in documents:
@@ -136,7 +136,7 @@ def test_parse_100_files(add_dataset_func, tmp_path):
 
 @pytest.mark.p3
 def test_concurrent_parse(add_dataset_func, tmp_path):
-    @wait_for(120, 1, "Document parsing timeout")
+    @wait_for(200, 1, "Document parsing timeout")
     def condition(_dataset: DataSet, _count: int):
         documents = _dataset.list_documents(page_size=_count * 2)
         for document in documents: