From 9863862348302c428e4cc62e2239444798e41d36 Mon Sep 17 00:00:00 2001
From: N0bodycan <49983270+N0bodycan@users.noreply.github.com>
Date: Tue, 9 Dec 2025 17:14:30 +0800
Subject: [PATCH] fix: prevent redundant retries in async_chat_streamly upon
 success (#11832)

## What changes were proposed in this pull request?
Added a return statement after the successful completion of the async
for loop in async_chat_streamly.

## Why are the changes needed?
Previously, the code lacked a break/return mechanism inside the try
block. This caused the retry loop (for attempt in range...) to continue
executing even after the LLM response was successfully generated and
yielded, resulting in duplicate requests (up to max_retries times).

## Does this PR introduce any user-facing change?
No (it fixes an internal logic bug).
---
 rag/llm/chat_model.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py
index f3f207eb2..8a2743866 100644
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@@ -187,6 +187,9 @@ class Base(ABC):
                     ans = delta_ans
                     total_tokens += tol
                     yield ans
+
+                yield total_tokens
+                return
             except Exception as e:
                 e = await self._exceptions_async(e, attempt)
                 if e:
@@ -194,8 +197,6 @@ class Base(ABC):
                     yield total_tokens
                     return
 
-        yield total_tokens
-
     def _length_stop(self, ans):
         if is_chinese([ans]):
             return ans + LENGTH_NOTIFICATION_CN