From 9a8ce9d3e2d0b7ca94176cd341694425222691db Mon Sep 17 00:00:00 2001
From: dzikus <grzegorz@sterniczuk.eu>
Date: Mon, 1 Dec 2025 04:26:34 +0100
Subject: [PATCH] fix: increase Quart RESPONSE_TIMEOUT and BODY_TIMEOUT for
 slow LLM responses (#11612)

### What problem does this PR solve?

Quart framework has default RESPONSE_TIMEOUT and BODY_TIMEOUT of 60
seconds.
This causes the frontend chat to hang exactly after 60 seconds when
using
slow LLM backends (e.g., Ollama on CPU, or remote APIs with high
latency).

This fix adds configurable timeout settings via environment variables
with
sensible defaults (600 seconds = 10 minutes) to match other timeout
configurations in RAGFlow.

Fixes issues with chat timeout when:
- Using local Ollama on CPU (response time ~2 minutes)
- Using remote LLM APIs with high latency
- Processing complex RAG queries with many chunks

### Type of change

- [X] Bug Fix (non-breaking change which fixes an issue)

Co-authored-by: Grzegorz Sterniczuk <grzegorz@sternicz.uk>
---
 api/apps/__init__.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/api/apps/__init__.py b/api/apps/__init__.py
index a6e33c13b..e034f460b 100644
--- a/api/apps/__init__.py
+++ b/api/apps/__init__.py
@@ -82,6 +82,11 @@ app.url_map.strict_slashes = False
 app.json_encoder = CustomJSONEncoder
 app.errorhandler(Exception)(server_error_response)
 
+# Configure Quart timeouts for slow LLM responses (e.g., local Ollama on CPU)
+# Default Quart timeouts are 60 seconds which is too short for many LLM backends
+app.config["RESPONSE_TIMEOUT"] = int(os.environ.get("QUART_RESPONSE_TIMEOUT", 600))
+app.config["BODY_TIMEOUT"] = int(os.environ.get("QUART_BODY_TIMEOUT", 600))
+
 ## convince for dev and debug
 # app.config["LOGIN_DISABLED"] = True
 app.config["SESSION_PERMANENT"] = False