mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Fix: Disable reasoning on Gemini 2.5 Flash by default (#10477)
### What problem does this PR solve? Gemini 2.5 Flash Models use reasoning by default. There is currently no way to disable this behaviour. This leads to very long response times (> 1min). The default behaviour should be, that reasoning is disabled and configurable issue #10474 ### Type of change - [X] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -1188,8 +1188,36 @@ class GoogleChat(Base):
|
|||||||
del gen_conf[k]
|
del gen_conf[k]
|
||||||
return gen_conf
|
return gen_conf
|
||||||
|
|
||||||
|
def _get_thinking_config(self, gen_conf):
|
||||||
|
"""Extract and create ThinkingConfig from gen_conf.
|
||||||
|
|
||||||
|
Default behavior for Vertex AI Generative Models: thinking_budget=0 (disabled)
|
||||||
|
unless explicitly specified by the user. This does not apply to Claude models.
|
||||||
|
|
||||||
|
Users can override by setting thinking_budget in gen_conf/llm_setting:
|
||||||
|
- 0: Disabled (default)
|
||||||
|
- 1-24576: Manual budget
|
||||||
|
- -1: Auto (model decides)
|
||||||
|
"""
|
||||||
|
# Claude models don't support ThinkingConfig
|
||||||
|
if "claude" in self.model_name:
|
||||||
|
gen_conf.pop("thinking_budget", None)
|
||||||
|
return None
|
||||||
|
|
||||||
|
# For Vertex AI Generative Models, default to thinking disabled
|
||||||
|
thinking_budget = gen_conf.pop("thinking_budget", 0)
|
||||||
|
|
||||||
|
if thinking_budget is not None:
|
||||||
|
try:
|
||||||
|
import vertexai.generative_models as glm # type: ignore
|
||||||
|
return glm.ThinkingConfig(thinking_budget=thinking_budget)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
def _chat(self, history, gen_conf={}, **kwargs):
|
def _chat(self, history, gen_conf={}, **kwargs):
|
||||||
system = history[0]["content"] if history and history[0]["role"] == "system" else ""
|
system = history[0]["content"] if history and history[0]["role"] == "system" else ""
|
||||||
|
thinking_config = self._get_thinking_config(gen_conf)
|
||||||
gen_conf = self._clean_conf(gen_conf)
|
gen_conf = self._clean_conf(gen_conf)
|
||||||
if "claude" in self.model_name:
|
if "claude" in self.model_name:
|
||||||
response = self.client.messages.create(
|
response = self.client.messages.create(
|
||||||
@ -1223,7 +1251,10 @@ class GoogleChat(Base):
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
response = self.client.generate_content(hist, generation_config=gen_conf)
|
if thinking_config:
|
||||||
|
response = self.client.generate_content(hist, generation_config=gen_conf, thinking_config=thinking_config)
|
||||||
|
else:
|
||||||
|
response = self.client.generate_content(hist, generation_config=gen_conf)
|
||||||
ans = response.text
|
ans = response.text
|
||||||
return ans, response.usage_metadata.total_token_count
|
return ans, response.usage_metadata.total_token_count
|
||||||
|
|
||||||
@ -1255,6 +1286,7 @@ class GoogleChat(Base):
|
|||||||
response = None
|
response = None
|
||||||
total_tokens = 0
|
total_tokens = 0
|
||||||
self.client._system_instruction = system
|
self.client._system_instruction = system
|
||||||
|
thinking_config = self._get_thinking_config(gen_conf)
|
||||||
if "max_tokens" in gen_conf:
|
if "max_tokens" in gen_conf:
|
||||||
gen_conf["max_output_tokens"] = gen_conf["max_tokens"]
|
gen_conf["max_output_tokens"] = gen_conf["max_tokens"]
|
||||||
del gen_conf["max_tokens"]
|
del gen_conf["max_tokens"]
|
||||||
@ -1272,7 +1304,10 @@ class GoogleChat(Base):
|
|||||||
]
|
]
|
||||||
ans = ""
|
ans = ""
|
||||||
try:
|
try:
|
||||||
response = self.client.generate_content(history, generation_config=gen_conf, stream=True)
|
if thinking_config:
|
||||||
|
response = self.client.generate_content(history, generation_config=gen_conf, thinking_config=thinking_config, stream=True)
|
||||||
|
else:
|
||||||
|
response = self.client.generate_content(history, generation_config=gen_conf, stream=True)
|
||||||
for resp in response:
|
for resp in response:
|
||||||
ans = resp.text
|
ans = resp.text
|
||||||
total_tokens += num_tokens_from_string(ans)
|
total_tokens += num_tokens_from_string(ans)
|
||||||
|
|||||||
Reference in New Issue
Block a user