From b57c82b122d52eeb9300a370031f11350dee5e3d Mon Sep 17 00:00:00 2001
From: Yongteng Lei <yongtengrey@outlook.com>
Date: Wed, 28 Jan 2026 12:41:20 +0800
Subject: [PATCH] Feat: add kimi-k2.5 (#12852)

### What problem does this PR solve?

Add kimi-k2.5

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
---
 conf/llm_factories.json |  9 ++++++++-
 rag/llm/chat_model.py   | 21 +++++++++++++++++++--
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/conf/llm_factories.json b/conf/llm_factories.json
index 5439f368b..b31942688 100644
--- a/conf/llm_factories.json
+++ b/conf/llm_factories.json
@@ -994,6 +994,13 @@
                     "model_type": "chat",
                     "is_tools": true
                 },
+                {
+                    "llm_name": "kimi-k2.5",
+                    "tags": "LLM,CHAT,256k",
+                    "max_tokens": 256000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
                 {
                     "llm_name": "kimi-latest",
                     "tags": "LLM,CHAT,8k,32k,128k",
@@ -5578,4 +5585,4 @@
             ]
         }
     ]
-}
\ No newline at end of file
+}
diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py
index 0e9fea1f3..2eb8ec4fa 100644
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@@ -1180,13 +1180,30 @@ class LiteLLMBase(ABC):
         return LLMErrorCode.ERROR_GENERIC
 
     def _clean_conf(self, gen_conf):
+        gen_conf = deepcopy(gen_conf) if gen_conf else {}
+
         if self.provider == SupportedLiteLLMProvider.HunYuan:
             unsupported = ["presence_penalty", "frequency_penalty"]
             for key in unsupported:
                 gen_conf.pop(key, None)
 
-        if "max_tokens" in gen_conf:
-            del gen_conf["max_tokens"]
+        elif "kimi-k2.5" in self.model_name.lower():
+            reasoning = gen_conf.pop("reasoning", None) # will never get one here, handle this later
+            thinking = {"type": "enabled"} # enable thinking by default
+            if reasoning is not None:
+                thinking = {"type": "enabled"} if reasoning else {"type": "disabled"}
+            elif not isinstance(thinking, dict) or thinking.get("type") not in {"enabled", "disabled"}:
+                thinking = {"type": "disabled"}
+            gen_conf["thinking"] = thinking
+
+            thinking_enabled = thinking.get("type") == "enabled"
+            gen_conf["temperature"] = 1.0 if thinking_enabled else 0.6
+            gen_conf["top_p"] = 0.95
+            gen_conf["n"] = 1
+            gen_conf["presence_penalty"] = 0.0
+            gen_conf["frequency_penalty"] = 0.0
+
+        gen_conf.pop("max_tokens", None)
         return gen_conf
 
     async def async_chat(self, system, history, gen_conf, **kwargs):