mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-01-31 15:45:08 +08:00
Feat: add kimi-k2.5 (#12852)
### What problem does this PR solve? Add kimi-k2.5 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -994,6 +994,13 @@
|
|||||||
"model_type": "chat",
|
"model_type": "chat",
|
||||||
"is_tools": true
|
"is_tools": true
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "kimi-k2.5",
|
||||||
|
"tags": "LLM,CHAT,256k",
|
||||||
|
"max_tokens": 256000,
|
||||||
|
"model_type": "chat",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"llm_name": "kimi-latest",
|
"llm_name": "kimi-latest",
|
||||||
"tags": "LLM,CHAT,8k,32k,128k",
|
"tags": "LLM,CHAT,8k,32k,128k",
|
||||||
@ -5578,4 +5585,4 @@
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1180,13 +1180,30 @@ class LiteLLMBase(ABC):
|
|||||||
return LLMErrorCode.ERROR_GENERIC
|
return LLMErrorCode.ERROR_GENERIC
|
||||||
|
|
||||||
def _clean_conf(self, gen_conf):
|
def _clean_conf(self, gen_conf):
|
||||||
|
gen_conf = deepcopy(gen_conf) if gen_conf else {}
|
||||||
|
|
||||||
if self.provider == SupportedLiteLLMProvider.HunYuan:
|
if self.provider == SupportedLiteLLMProvider.HunYuan:
|
||||||
unsupported = ["presence_penalty", "frequency_penalty"]
|
unsupported = ["presence_penalty", "frequency_penalty"]
|
||||||
for key in unsupported:
|
for key in unsupported:
|
||||||
gen_conf.pop(key, None)
|
gen_conf.pop(key, None)
|
||||||
|
|
||||||
if "max_tokens" in gen_conf:
|
elif "kimi-k2.5" in self.model_name.lower():
|
||||||
del gen_conf["max_tokens"]
|
reasoning = gen_conf.pop("reasoning", None) # will never get one here, handle this later
|
||||||
|
thinking = {"type": "enabled"} # enable thinking by default
|
||||||
|
if reasoning is not None:
|
||||||
|
thinking = {"type": "enabled"} if reasoning else {"type": "disabled"}
|
||||||
|
elif not isinstance(thinking, dict) or thinking.get("type") not in {"enabled", "disabled"}:
|
||||||
|
thinking = {"type": "disabled"}
|
||||||
|
gen_conf["thinking"] = thinking
|
||||||
|
|
||||||
|
thinking_enabled = thinking.get("type") == "enabled"
|
||||||
|
gen_conf["temperature"] = 1.0 if thinking_enabled else 0.6
|
||||||
|
gen_conf["top_p"] = 0.95
|
||||||
|
gen_conf["n"] = 1
|
||||||
|
gen_conf["presence_penalty"] = 0.0
|
||||||
|
gen_conf["frequency_penalty"] = 0.0
|
||||||
|
|
||||||
|
gen_conf.pop("max_tokens", None)
|
||||||
return gen_conf
|
return gen_conf
|
||||||
|
|
||||||
async def async_chat(self, system, history, gen_conf, **kwargs):
|
async def async_chat(self, system, history, gen_conf, **kwargs):
|
||||||
|
|||||||
Reference in New Issue
Block a user