From b57c82b122d52eeb9300a370031f11350dee5e3d Mon Sep 17 00:00:00 2001 From: Yongteng Lei Date: Wed, 28 Jan 2026 12:41:20 +0800 Subject: [PATCH] Feat: add kimi-k2.5 (#12852) ### What problem does this PR solve? Add kimi-k2.5 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- conf/llm_factories.json | 9 ++++++++- rag/llm/chat_model.py | 21 +++++++++++++++++++-- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/conf/llm_factories.json b/conf/llm_factories.json index 5439f368b..b31942688 100644 --- a/conf/llm_factories.json +++ b/conf/llm_factories.json @@ -994,6 +994,13 @@ "model_type": "chat", "is_tools": true }, + { + "llm_name": "kimi-k2.5", + "tags": "LLM,CHAT,256k", + "max_tokens": 256000, + "model_type": "chat", + "is_tools": true + }, { "llm_name": "kimi-latest", "tags": "LLM,CHAT,8k,32k,128k", @@ -5578,4 +5585,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index 0e9fea1f3..2eb8ec4fa 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -1180,13 +1180,30 @@ class LiteLLMBase(ABC): return LLMErrorCode.ERROR_GENERIC def _clean_conf(self, gen_conf): + gen_conf = deepcopy(gen_conf) if gen_conf else {} + if self.provider == SupportedLiteLLMProvider.HunYuan: unsupported = ["presence_penalty", "frequency_penalty"] for key in unsupported: gen_conf.pop(key, None) - if "max_tokens" in gen_conf: - del gen_conf["max_tokens"] + elif "kimi-k2.5" in self.model_name.lower(): + reasoning = gen_conf.pop("reasoning", None) # will never get one here, handle this later + thinking = {"type": "enabled"} # enable thinking by default + if reasoning is not None: + thinking = {"type": "enabled"} if reasoning else {"type": "disabled"} + elif not isinstance(thinking, dict) or thinking.get("type") not in {"enabled", "disabled"}: + thinking = {"type": "disabled"} + gen_conf["thinking"] = thinking + + thinking_enabled = thinking.get("type") == "enabled" + gen_conf["temperature"] = 1.0 if thinking_enabled else 0.6 + gen_conf["top_p"] = 0.95 + gen_conf["n"] = 1 + gen_conf["presence_penalty"] = 0.0 + gen_conf["frequency_penalty"] = 0.0 + + gen_conf.pop("max_tokens", None) return gen_conf async def async_chat(self, system, history, gen_conf, **kwargs):