From b4b6d296ea759884751a66f867eacd693082b4ec Mon Sep 17 00:00:00 2001 From: Liu An Date: Wed, 23 Jul 2025 15:08:36 +0800 Subject: [PATCH] Fix: Increase timeouts for document parsing and model checks (#8996) ### What problem does this PR solve? - Extended embedding model timeout from 3 to 10 seconds in api_utils.py - Added more time for large file batches and concurrent parsing operations to prevent test flakiness - Import from #8940 - https://github.com/infiniflow/ragflow/actions/runs/16422052652 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- api/utils/api_utils.py | 10 ++++------ .../test_parse_documents.py | 4 ++-- .../test_parse_documents.py | 4 ++-- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/api/utils/api_utils.py b/api/utils/api_utils.py index ede7f3000..0bb0931cb 100644 --- a/api/utils/api_utils.py +++ b/api/utils/api_utils.py @@ -26,15 +26,12 @@ from copy import deepcopy from functools import wraps from hmac import HMAC from io import BytesIO -from typing import Any, Optional, Union, Callable, Coroutine, Type +from typing import Any, Callable, Coroutine, Optional, Type, Union from urllib.parse import quote, urlencode from uuid import uuid1 -import trio -from rag.utils.mcp_tool_call_conn import MCPToolCallSession, close_multiple_mcp_toolcall_sessions - - import requests +import trio from flask import ( Response, jsonify, @@ -53,6 +50,7 @@ from api.constants import REQUEST_MAX_WAIT_SEC, REQUEST_WAIT_SEC from api.db.db_models import APIToken from api.db.services.llm_service import LLMService, TenantLLMService from api.utils import CustomJSONEncoder, get_uuid, json_dumps +from rag.utils.mcp_tool_call_conn import MCPToolCallSession, close_multiple_mcp_toolcall_sessions requests.models.complexjson.dumps = functools.partial(json.dumps, cls=CustomJSONEncoder) @@ -693,7 +691,7 @@ async def is_strong_enough(chat_model, embedding_model): async def _is_strong_enough(): nonlocal chat_model, embedding_model if embedding_model: - with trio.fail_after(3): + with trio.fail_after(10): _ = await trio.to_thread.run_sync(lambda: embedding_model.encode(["Are you strong enough!?"])) if chat_model: with trio.fail_after(30): diff --git a/test/testcases/test_http_api/test_file_management_within_dataset/test_parse_documents.py b/test/testcases/test_http_api/test_file_management_within_dataset/test_parse_documents.py index 04aa8cac2..fd31e5cee 100644 --- a/test/testcases/test_http_api/test_file_management_within_dataset/test_parse_documents.py +++ b/test/testcases/test_http_api/test_file_management_within_dataset/test_parse_documents.py @@ -167,7 +167,7 @@ class TestDocumentsParse: @pytest.mark.p3 def test_parse_100_files(HttpApiAuth, add_dataset_func, tmp_path): - @wait_for(100, 1, "Document parsing timeout") + @wait_for(200, 1, "Document parsing timeout") def condition(_auth, _dataset_id, _document_num): res = list_documents(_auth, _dataset_id, {"page_size": _document_num}) for doc in res["data"]["docs"]: @@ -188,7 +188,7 @@ def test_parse_100_files(HttpApiAuth, add_dataset_func, tmp_path): @pytest.mark.p3 def test_concurrent_parse(HttpApiAuth, add_dataset_func, tmp_path): - @wait_for(120, 1, "Document parsing timeout") + @wait_for(200, 1, "Document parsing timeout") def condition(_auth, _dataset_id, _document_num): res = list_documents(_auth, _dataset_id, {"page_size": _document_num}) for doc in res["data"]["docs"]: diff --git a/test/testcases/test_sdk_api/test_file_management_within_dataset/test_parse_documents.py b/test/testcases/test_sdk_api/test_file_management_within_dataset/test_parse_documents.py index 2afc9928f..2b94d488e 100644 --- a/test/testcases/test_sdk_api/test_file_management_within_dataset/test_parse_documents.py +++ b/test/testcases/test_sdk_api/test_file_management_within_dataset/test_parse_documents.py @@ -116,7 +116,7 @@ class TestDocumentsParse: @pytest.mark.p3 def test_parse_100_files(add_dataset_func, tmp_path): - @wait_for(100, 1, "Document parsing timeout") + @wait_for(200, 1, "Document parsing timeout") def condition(_dataset: DataSet, _count: int): documents = _dataset.list_documents(page_size=_count * 2) for document in documents: @@ -136,7 +136,7 @@ def test_parse_100_files(add_dataset_func, tmp_path): @pytest.mark.p3 def test_concurrent_parse(add_dataset_func, tmp_path): - @wait_for(120, 1, "Document parsing timeout") + @wait_for(200, 1, "Document parsing timeout") def condition(_dataset: DataSet, _count: int): documents = _dataset.list_documents(page_size=_count * 2) for document in documents: