From 9a8ce9d3e2d0b7ca94176cd341694425222691db Mon Sep 17 00:00:00 2001 From: dzikus Date: Mon, 1 Dec 2025 04:26:34 +0100 Subject: [PATCH] fix: increase Quart RESPONSE_TIMEOUT and BODY_TIMEOUT for slow LLM responses (#11612) ### What problem does this PR solve? Quart framework has default RESPONSE_TIMEOUT and BODY_TIMEOUT of 60 seconds. This causes the frontend chat to hang exactly after 60 seconds when using slow LLM backends (e.g., Ollama on CPU, or remote APIs with high latency). This fix adds configurable timeout settings via environment variables with sensible defaults (600 seconds = 10 minutes) to match other timeout configurations in RAGFlow. Fixes issues with chat timeout when: - Using local Ollama on CPU (response time ~2 minutes) - Using remote LLM APIs with high latency - Processing complex RAG queries with many chunks ### Type of change - [X] Bug Fix (non-breaking change which fixes an issue) Co-authored-by: Grzegorz Sterniczuk --- api/apps/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/api/apps/__init__.py b/api/apps/__init__.py index a6e33c13b..e034f460b 100644 --- a/api/apps/__init__.py +++ b/api/apps/__init__.py @@ -82,6 +82,11 @@ app.url_map.strict_slashes = False app.json_encoder = CustomJSONEncoder app.errorhandler(Exception)(server_error_response) +# Configure Quart timeouts for slow LLM responses (e.g., local Ollama on CPU) +# Default Quart timeouts are 60 seconds which is too short for many LLM backends +app.config["RESPONSE_TIMEOUT"] = int(os.environ.get("QUART_RESPONSE_TIMEOUT", 600)) +app.config["BODY_TIMEOUT"] = int(os.environ.get("QUART_BODY_TIMEOUT", 600)) + ## convince for dev and debug # app.config["LOGIN_DISABLED"] = True app.config["SESSION_PERMANENT"] = False