From 91d6fb8061dc838d9c1d3b91103e171e7cbbec8d Mon Sep 17 00:00:00 2001
From: Yuhao Bi <byh0831@gmail.com>
Date: Fri, 5 Sep 2025 19:17:21 +0800
Subject: [PATCH] Fix miscalculated token count (#9776)

### What problem does this PR solve?

The total token was incorrectly accumulated when using the
OpenAI-API-Compatible api.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
---
 rag/llm/chat_model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py
index 98d71141c..adc4a3c5a 100644
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@@ -374,7 +374,7 @@ class Base(ABC):
                         if not tol:
                             total_tokens += num_tokens_from_string(resp.choices[0].delta.content)
                         else:
-                            total_tokens += tol
+                            total_tokens = tol
 
                         finish_reason = resp.choices[0].finish_reason if hasattr(resp.choices[0], "finish_reason") else ""
                         if finish_reason == "length":
@@ -410,7 +410,7 @@ class Base(ABC):
                     if not tol:
                         total_tokens += num_tokens_from_string(resp.choices[0].delta.content)
                     else:
-                        total_tokens += tol
+                        total_tokens = tol
                     answer += resp.choices[0].delta.content
                     yield resp.choices[0].delta.content