From 9863862348302c428e4cc62e2239444798e41d36 Mon Sep 17 00:00:00 2001 From: N0bodycan <49983270+N0bodycan@users.noreply.github.com> Date: Tue, 9 Dec 2025 17:14:30 +0800 Subject: [PATCH] fix: prevent redundant retries in async_chat_streamly upon success (#11832) ## What changes were proposed in this pull request? Added a return statement after the successful completion of the async for loop in async_chat_streamly. ## Why are the changes needed? Previously, the code lacked a break/return mechanism inside the try block. This caused the retry loop (for attempt in range...) to continue executing even after the LLM response was successfully generated and yielded, resulting in duplicate requests (up to max_retries times). ## Does this PR introduce any user-facing change? No (it fixes an internal logic bug). --- rag/llm/chat_model.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index f3f207eb2..8a2743866 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -187,6 +187,9 @@ class Base(ABC): ans = delta_ans total_tokens += tol yield ans + + yield total_tokens + return except Exception as e: e = await self._exceptions_async(e, attempt) if e: @@ -194,8 +197,6 @@ class Base(ABC): yield total_tokens return - yield total_tokens - def _length_stop(self, ans): if is_chinese([ans]): return ans + LENGTH_NOTIFICATION_CN