From 6ab4c1a6e9008f4598e4fcfbbb83593c7d5fda08 Mon Sep 17 00:00:00 2001 From: Stephen Hu <812791840@qq.com> Date: Fri, 10 Oct 2025 11:03:40 +0800 Subject: [PATCH] Refactor: improve how NvidiaCV calculate res total token counts (#10455) ### What problem does this PR solve? improve how NvidiaCV calculate res total token counts ### Type of change - [x] Refactoring --- rag/llm/cv_model.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py index 7e763641a..55f01021a 100644 --- a/rag/llm/cv_model.py +++ b/rag/llm/cv_model.py @@ -614,7 +614,7 @@ class NvidiaCV(Base): response = response.json() return ( response["choices"][0]["message"]["content"].strip(), - response["usage"]["total_tokens"], + total_token_count_from_response(response), ) def _request(self, msg, gen_conf={}): @@ -637,7 +637,7 @@ class NvidiaCV(Base): response = self._request(vision_prompt) return ( response["choices"][0]["message"]["content"].strip(), - response["usage"]["total_tokens"], + total_token_count_from_response(response) ) def chat(self, system, history, gen_conf, images=[], **kwargs): @@ -645,7 +645,7 @@ class NvidiaCV(Base): response = self._request(self._form_history(system, history, images), gen_conf) return ( response["choices"][0]["message"]["content"].strip(), - response["usage"]["total_tokens"], + total_token_count_from_response(response) ) except Exception as e: return "**ERROR**: " + str(e), 0 @@ -656,7 +656,7 @@ class NvidiaCV(Base): response = self._request(self._form_history(system, history, images), gen_conf) cnt = response["choices"][0]["message"]["content"] if "usage" in response and "total_tokens" in response["usage"]: - total_tokens += response["usage"]["total_tokens"] + total_tokens += total_token_count_from_response(response) for resp in cnt: yield resp except Exception as e: