diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py index cdadf9476..cd3163f02 100644 --- a/rag/llm/cv_model.py +++ b/rag/llm/cv_model.py @@ -797,8 +797,7 @@ class NvidiaCV(Base): try: response = self._request(self._form_history(system, history, images), gen_conf) cnt = response["choices"][0]["message"]["content"] - if "usage" in response and "total_tokens" in response["usage"]: - total_tokens += total_token_count_from_response(response) + total_tokens += total_token_count_from_response(response) for resp in cnt: yield resp except Exception as e: @@ -847,7 +846,7 @@ class AnthropicCV(Base): prompt = self.prompt(b64, prompt if prompt else vision_llm_describe_prompt()) response = self.client.messages.create(model=self.model_name, max_tokens=self.max_tokens, messages=prompt) - return response["content"][0]["text"].strip(), response["usage"]["input_tokens"] + response["usage"]["output_tokens"] + return response["content"][0]["text"].strip(), total_token_count_from_response(response) def _clean_conf(self, gen_conf): if "presence_penalty" in gen_conf: @@ -874,7 +873,7 @@ class AnthropicCV(Base): ans += "...\nFor the content length reason, it stopped, continue?" if is_english([ans]) else "······\n由于长度的原因,回答被截断了,要继续吗?" return ( ans, - response["usage"]["input_tokens"] + response["usage"]["output_tokens"], + total_token_count_from_response(response), ) except Exception as e: return ans + "\n**ERROR**: " + str(e), 0 diff --git a/rag/utils/__init__.py b/rag/utils/__init__.py index 1d43a5e59..16acfd98e 100644 --- a/rag/utils/__init__.py +++ b/rag/utils/__init__.py @@ -63,6 +63,12 @@ def total_token_count_from_response(resp): return resp["usage"]["total_tokens"] except Exception: pass + + if 'usage' in resp and 'input_tokens' in resp['usage'] and 'output_tokens' in resp['usage']: + try: + return resp["usage"]["input_tokens"] + resp["usage"]["output_tokens"] + except Exception: + pass return 0