Refa: automatic LLMs registration (#8651)

### What problem does this PR solve? Support automatic LLMs registration. ### Type of change - [x] Refactoring
2026-02-02 16:45:08 +08:00 · 2025-07-03 19:05:31 +08:00
parent 3234a15aae
commit f8a6987f1e
7 changed files with 619 additions and 876 deletions
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@ -142,11 +142,7 @@ class Base(ABC):
            return f"{ERROR_PREFIX}: {error_code} - {str(e)}"

    def _verbose_tool_use(self, name, args, res):
-        return "<tool_call>" + json.dumps({
-            "name": name,
-            "args": args,
-            "result": res
-        }, ensure_ascii=False, indent=2) + "</tool_call>"
+        return "<tool_call>" + json.dumps({"name": name, "args": args, "result": res}, ensure_ascii=False, indent=2) + "</tool_call>"

    def _append_history(self, hist, tool_call, tool_res):
        hist.append(
@ -191,10 +187,10 @@ class Base(ABC):
        tk_count = 0
        hist = deepcopy(history)
        # Implement exponential backoff retry strategy
-        for attempt in range(self.max_retries+1):
+        for attempt in range(self.max_retries + 1):
            history = hist
            try:
-                for _ in range(self.max_rounds*2):
+                for _ in range(self.max_rounds * 2):
                    response = self.client.chat.completions.create(model=self.model_name, messages=history, tools=self.tools, **gen_conf)
                    tk_count += self.total_token_count(response)
                    if any([not response.choices, not response.choices[0].message]):
@ -269,7 +265,7 @@ class Base(ABC):
        for attempt in range(self.max_retries + 1):
            history = hist
            try:
-                for _ in range(self.max_rounds*2):
+                for _ in range(self.max_rounds * 2):
                    reasoning_start = False
                    response = self.client.chat.completions.create(model=self.model_name, messages=history, stream=True, tools=tools, **gen_conf)
                    final_tool_calls = {}
@ -430,6 +426,8 @@ class Base(ABC):


 class GptTurbo(Base):
+    _FACTORY_NAME = "OpenAI"
+
    def __init__(self, key, model_name="gpt-3.5-turbo", base_url="https://api.openai.com/v1", **kwargs):
        if not base_url:
            base_url = "https://api.openai.com/v1"
@ -437,6 +435,8 @@ class GptTurbo(Base):


 class MoonshotChat(Base):
+    _FACTORY_NAME = "Moonshot"
+
    def __init__(self, key, model_name="moonshot-v1-8k", base_url="https://api.moonshot.cn/v1", **kwargs):
        if not base_url:
            base_url = "https://api.moonshot.cn/v1"
@ -444,6 +444,8 @@ class MoonshotChat(Base):


 class XinferenceChat(Base):
+    _FACTORY_NAME = "Xinference"
+
    def __init__(self, key=None, model_name="", base_url="", **kwargs):
        if not base_url:
            raise ValueError("Local llm url cannot be None")
@ -452,6 +454,8 @@ class XinferenceChat(Base):


 class HuggingFaceChat(Base):
+    _FACTORY_NAME = "HuggingFace"
+
    def __init__(self, key=None, model_name="", base_url="", **kwargs):
        if not base_url:
            raise ValueError("Local llm url cannot be None")
@ -460,6 +464,8 @@ class HuggingFaceChat(Base):


 class ModelScopeChat(Base):
+    _FACTORY_NAME = "ModelScope"
+
    def __init__(self, key=None, model_name="", base_url="", **kwargs):
        if not base_url:
            raise ValueError("Local llm url cannot be None")
@ -468,6 +474,8 @@ class ModelScopeChat(Base):


 class DeepSeekChat(Base):
+    _FACTORY_NAME = "DeepSeek"
+
    def __init__(self, key, model_name="deepseek-chat", base_url="https://api.deepseek.com/v1", **kwargs):
        if not base_url:
            base_url = "https://api.deepseek.com/v1"
@ -475,6 +483,8 @@ class DeepSeekChat(Base):


 class AzureChat(Base):
+    _FACTORY_NAME = "Azure-OpenAI"
+
    def __init__(self, key, model_name, base_url, **kwargs):
        api_key = json.loads(key).get("api_key", "")
        api_version = json.loads(key).get("api_version", "2024-02-01")
@ -484,6 +494,8 @@ class AzureChat(Base):


 class BaiChuanChat(Base):
+    _FACTORY_NAME = "BaiChuan"
+
    def __init__(self, key, model_name="Baichuan3-Turbo", base_url="https://api.baichuan-ai.com/v1", **kwargs):
        if not base_url:
            base_url = "https://api.baichuan-ai.com/v1"
@ -557,6 +569,8 @@ class BaiChuanChat(Base):


 class QWenChat(Base):
+    _FACTORY_NAME = "Tongyi-Qianwen"
+
    def __init__(self, key, model_name=Generation.Models.qwen_turbo, base_url=None, **kwargs):
        if not base_url:
            base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
@ -565,6 +579,8 @@ class QWenChat(Base):


 class ZhipuChat(Base):
+    _FACTORY_NAME = "ZHIPU-AI"
+
    def __init__(self, key, model_name="glm-3-turbo", base_url=None, **kwargs):
        super().__init__(key, model_name, base_url=base_url, **kwargs)

@ -630,6 +646,8 @@ class ZhipuChat(Base):


 class OllamaChat(Base):
+    _FACTORY_NAME = "Ollama"
+
    def __init__(self, key, model_name, base_url=None, **kwargs):
        super().__init__(key, model_name, base_url=base_url, **kwargs)

@ -694,6 +712,8 @@ class OllamaChat(Base):


 class LocalAIChat(Base):
+    _FACTORY_NAME = "LocalAI"
+
    def __init__(self, key, model_name, base_url=None, **kwargs):
        super().__init__(key, model_name, base_url=base_url, **kwargs)

@ -752,6 +772,8 @@ class LocalLLM(Base):


 class VolcEngineChat(Base):
+    _FACTORY_NAME = "VolcEngine"
+
    def __init__(self, key, model_name, base_url="https://ark.cn-beijing.volces.com/api/v3", **kwargs):
        """
        Since do not want to modify the original database fields, and the VolcEngine authentication method is quite special,
@ -765,6 +787,8 @@ class VolcEngineChat(Base):


 class MiniMaxChat(Base):
+    _FACTORY_NAME = "MiniMax"
+
    def __init__(self, key, model_name, base_url="https://api.minimax.chat/v1/text/chatcompletion_v2", **kwargs):
        super().__init__(key, model_name, base_url=base_url, **kwargs)

@ -843,6 +867,8 @@ class MiniMaxChat(Base):


 class MistralChat(Base):
+    _FACTORY_NAME = "Mistral"
+
    def __init__(self, key, model_name, base_url=None, **kwargs):
        super().__init__(key, model_name, base_url=base_url, **kwargs)

@ -896,6 +922,8 @@ class MistralChat(Base):


 class BedrockChat(Base):
+    _FACTORY_NAME = "Bedrock"
+
    def __init__(self, key, model_name, base_url=None, **kwargs):
        super().__init__(key, model_name, base_url=base_url, **kwargs)

@ -978,6 +1006,8 @@ class BedrockChat(Base):


 class GeminiChat(Base):
+    _FACTORY_NAME = "Gemini"
+
    def __init__(self, key, model_name, base_url=None, **kwargs):
        super().__init__(key, model_name, base_url=base_url, **kwargs)

@ -997,6 +1027,7 @@ class GeminiChat(Base):

    def _chat(self, history, gen_conf):
        from google.generativeai.types import content_types
+
        system = history[0]["content"] if history and history[0]["role"] == "system" else ""
        hist = []
        for item in history:
@ -1019,6 +1050,7 @@ class GeminiChat(Base):

    def chat_streamly(self, system, history, gen_conf):
        from google.generativeai.types import content_types
+
        gen_conf = self._clean_conf(gen_conf)
        if system:
            self.model._system_instruction = content_types.to_content(system)
@ -1042,6 +1074,8 @@ class GeminiChat(Base):


 class GroqChat(Base):
+    _FACTORY_NAME = "Groq"
+
    def __init__(self, key, model_name, base_url=None, **kwargs):
        super().__init__(key, model_name, base_url=base_url, **kwargs)

@ -1086,6 +1120,8 @@ class GroqChat(Base):

 ## openrouter
 class OpenRouterChat(Base):
+    _FACTORY_NAME = "OpenRouter"
+
    def __init__(self, key, model_name, base_url="https://openrouter.ai/api/v1", **kwargs):
        if not base_url:
            base_url = "https://openrouter.ai/api/v1"
@ -1093,6 +1129,8 @@ class OpenRouterChat(Base):


 class StepFunChat(Base):
+    _FACTORY_NAME = "StepFun"
+
    def __init__(self, key, model_name, base_url="https://api.stepfun.com/v1", **kwargs):
        if not base_url:
            base_url = "https://api.stepfun.com/v1"
@ -1100,6 +1138,8 @@ class StepFunChat(Base):


 class NvidiaChat(Base):
+    _FACTORY_NAME = "NVIDIA"
+
    def __init__(self, key, model_name, base_url="https://integrate.api.nvidia.com/v1", **kwargs):
        if not base_url:
            base_url = "https://integrate.api.nvidia.com/v1"
@ -1107,6 +1147,8 @@ class NvidiaChat(Base):


 class LmStudioChat(Base):
+    _FACTORY_NAME = "LM-Studio"
+
    def __init__(self, key, model_name, base_url, **kwargs):
        if not base_url:
            raise ValueError("Local llm url cannot be None")
@ -1117,6 +1159,8 @@ class LmStudioChat(Base):


 class OpenAI_APIChat(Base):
+    _FACTORY_NAME = ["VLLM", "OpenAI-API-Compatible"]
+
    def __init__(self, key, model_name, base_url):
        if not base_url:
            raise ValueError("url cannot be None")
@ -1125,6 +1169,8 @@ class OpenAI_APIChat(Base):


 class PPIOChat(Base):
+    _FACTORY_NAME = "PPIO"
+
    def __init__(self, key, model_name, base_url="https://api.ppinfra.com/v3/openai", **kwargs):
        if not base_url:
            base_url = "https://api.ppinfra.com/v3/openai"
@ -1132,6 +1178,8 @@ class PPIOChat(Base):


 class CoHereChat(Base):
+    _FACTORY_NAME = "Cohere"
+
    def __init__(self, key, model_name, base_url=None, **kwargs):
        super().__init__(key, model_name, base_url=base_url, **kwargs)

@ -1207,6 +1255,8 @@ class CoHereChat(Base):


 class LeptonAIChat(Base):
+    _FACTORY_NAME = "LeptonAI"
+
    def __init__(self, key, model_name, base_url=None, **kwargs):
        if not base_url:
            base_url = urljoin("https://" + model_name + ".lepton.run", "api/v1")
@ -1214,6 +1264,8 @@ class LeptonAIChat(Base):


 class TogetherAIChat(Base):
+    _FACTORY_NAME = "TogetherAI"
+
    def __init__(self, key, model_name, base_url="https://api.together.xyz/v1", **kwargs):
        if not base_url:
            base_url = "https://api.together.xyz/v1"
@ -1221,6 +1273,8 @@ class TogetherAIChat(Base):


 class PerfXCloudChat(Base):
+    _FACTORY_NAME = "PerfXCloud"
+
    def __init__(self, key, model_name, base_url="https://cloud.perfxlab.cn/v1", **kwargs):
        if not base_url:
            base_url = "https://cloud.perfxlab.cn/v1"
@ -1228,6 +1282,8 @@ class PerfXCloudChat(Base):


 class UpstageChat(Base):
+    _FACTORY_NAME = "Upstage"
+
    def __init__(self, key, model_name, base_url="https://api.upstage.ai/v1/solar", **kwargs):
        if not base_url:
            base_url = "https://api.upstage.ai/v1/solar"
@ -1235,6 +1291,8 @@ class UpstageChat(Base):


 class NovitaAIChat(Base):
+    _FACTORY_NAME = "NovitaAI"
+
    def __init__(self, key, model_name, base_url="https://api.novita.ai/v3/openai", **kwargs):
        if not base_url:
            base_url = "https://api.novita.ai/v3/openai"
@ -1242,6 +1300,8 @@ class NovitaAIChat(Base):


 class SILICONFLOWChat(Base):
+    _FACTORY_NAME = "SILICONFLOW"
+
    def __init__(self, key, model_name, base_url="https://api.siliconflow.cn/v1", **kwargs):
        if not base_url:
            base_url = "https://api.siliconflow.cn/v1"
@ -1249,6 +1309,8 @@ class SILICONFLOWChat(Base):


 class YiChat(Base):
+    _FACTORY_NAME = "01.AI"
+
    def __init__(self, key, model_name, base_url="https://api.lingyiwanwu.com/v1", **kwargs):
        if not base_url:
            base_url = "https://api.lingyiwanwu.com/v1"
@ -1256,6 +1318,8 @@ class YiChat(Base):


 class GiteeChat(Base):
+    _FACTORY_NAME = "GiteeAI"
+
    def __init__(self, key, model_name, base_url="https://ai.gitee.com/v1/", **kwargs):
        if not base_url:
            base_url = "https://ai.gitee.com/v1/"
@ -1263,6 +1327,8 @@ class GiteeChat(Base):


 class ReplicateChat(Base):
+    _FACTORY_NAME = "Replicate"
+
    def __init__(self, key, model_name, base_url=None, **kwargs):
        super().__init__(key, model_name, base_url=base_url, **kwargs)

@ -1302,6 +1368,8 @@ class ReplicateChat(Base):


 class HunyuanChat(Base):
+    _FACTORY_NAME = "Tencent Hunyuan"
+
    def __init__(self, key, model_name, base_url=None, **kwargs):
        super().__init__(key, model_name, base_url=base_url, **kwargs)

@ -1378,6 +1446,8 @@ class HunyuanChat(Base):


 class SparkChat(Base):
+    _FACTORY_NAME = "XunFei Spark"
+
    def __init__(self, key, model_name, base_url="https://spark-api-open.xf-yun.com/v1", **kwargs):
        if not base_url:
            base_url = "https://spark-api-open.xf-yun.com/v1"
@ -1398,6 +1468,8 @@ class SparkChat(Base):


 class BaiduYiyanChat(Base):
+    _FACTORY_NAME = "BaiduYiyan"
+
    def __init__(self, key, model_name, base_url=None, **kwargs):
        super().__init__(key, model_name, base_url=base_url, **kwargs)

@ -1444,6 +1516,8 @@ class BaiduYiyanChat(Base):


 class AnthropicChat(Base):
+    _FACTORY_NAME = "Anthropic"
+
    def __init__(self, key, model_name, base_url="https://api.anthropic.com/v1/", **kwargs):
        if not base_url:
            base_url = "https://api.anthropic.com/v1/"
@ -1451,6 +1525,8 @@ class AnthropicChat(Base):


 class GoogleChat(Base):
+    _FACTORY_NAME = "Google Cloud"
+
    def __init__(self, key, model_name, base_url=None, **kwargs):
        super().__init__(key, model_name, base_url=base_url, **kwargs)

@ -1529,9 +1605,11 @@ class GoogleChat(Base):
            if "role" in item and item["role"] == "assistant":
                item["role"] = "model"
            if "content" in item:
-                item["parts"] = [{
-                    "text": item.pop("content"),
-                }]
+                item["parts"] = [
+                    {
+                        "text": item.pop("content"),
+                    }
+                ]

        response = self.client.generate_content(hist, generation_config=gen_conf)
        ans = response.text
@ -1587,8 +1665,10 @@ class GoogleChat(Base):


 class GPUStackChat(Base):
+    _FACTORY_NAME = "GPUStack"
+
    def __init__(self, key=None, model_name="", base_url="", **kwargs):
        if not base_url:
            raise ValueError("Local llm url cannot be None")
        base_url = urljoin(base_url, "v1")
-        super().__init__(key, model_name, base_url, **kwargs)
+        super().__init__(key, model_name, base_url, **kwargs)