diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py index 7e763641a..55f01021a 100644 --- a/rag/llm/cv_model.py +++ b/rag/llm/cv_model.py @@ -614,7 +614,7 @@ class NvidiaCV(Base): response = response.json() return ( response["choices"][0]["message"]["content"].strip(), - response["usage"]["total_tokens"], + total_token_count_from_response(response), ) def _request(self, msg, gen_conf={}): @@ -637,7 +637,7 @@ class NvidiaCV(Base): response = self._request(vision_prompt) return ( response["choices"][0]["message"]["content"].strip(), - response["usage"]["total_tokens"], + total_token_count_from_response(response) ) def chat(self, system, history, gen_conf, images=[], **kwargs): @@ -645,7 +645,7 @@ class NvidiaCV(Base): response = self._request(self._form_history(system, history, images), gen_conf) return ( response["choices"][0]["message"]["content"].strip(), - response["usage"]["total_tokens"], + total_token_count_from_response(response) ) except Exception as e: return "**ERROR**: " + str(e), 0 @@ -656,7 +656,7 @@ class NvidiaCV(Base): response = self._request(self._form_history(system, history, images), gen_conf) cnt = response["choices"][0]["message"]["content"] if "usage" in response and "total_tokens" in response["usage"]: - total_tokens += response["usage"]["total_tokens"] + total_tokens += total_token_count_from_response(response) for resp in cnt: yield resp except Exception as e: