Refa: automatic LLMs registration (#8651)

### What problem does this PR solve?

Support automatic LLMs registration.

### Type of change

- [x] Refactoring
This commit is contained in:
Yongteng Lei
2025-07-03 19:05:31 +08:00
committed by GitHub
parent 3234a15aae
commit f8a6987f1e
7 changed files with 619 additions and 876 deletions

View File

@ -142,11 +142,7 @@ class Base(ABC):
return f"{ERROR_PREFIX}: {error_code} - {str(e)}"
def _verbose_tool_use(self, name, args, res):
return "<tool_call>" + json.dumps({
"name": name,
"args": args,
"result": res
}, ensure_ascii=False, indent=2) + "</tool_call>"
return "<tool_call>" + json.dumps({"name": name, "args": args, "result": res}, ensure_ascii=False, indent=2) + "</tool_call>"
def _append_history(self, hist, tool_call, tool_res):
hist.append(
@ -191,10 +187,10 @@ class Base(ABC):
tk_count = 0
hist = deepcopy(history)
# Implement exponential backoff retry strategy
for attempt in range(self.max_retries+1):
for attempt in range(self.max_retries + 1):
history = hist
try:
for _ in range(self.max_rounds*2):
for _ in range(self.max_rounds * 2):
response = self.client.chat.completions.create(model=self.model_name, messages=history, tools=self.tools, **gen_conf)
tk_count += self.total_token_count(response)
if any([not response.choices, not response.choices[0].message]):
@ -269,7 +265,7 @@ class Base(ABC):
for attempt in range(self.max_retries + 1):
history = hist
try:
for _ in range(self.max_rounds*2):
for _ in range(self.max_rounds * 2):
reasoning_start = False
response = self.client.chat.completions.create(model=self.model_name, messages=history, stream=True, tools=tools, **gen_conf)
final_tool_calls = {}
@ -430,6 +426,8 @@ class Base(ABC):
class GptTurbo(Base):
_FACTORY_NAME = "OpenAI"
def __init__(self, key, model_name="gpt-3.5-turbo", base_url="https://api.openai.com/v1", **kwargs):
if not base_url:
base_url = "https://api.openai.com/v1"
@ -437,6 +435,8 @@ class GptTurbo(Base):
class MoonshotChat(Base):
_FACTORY_NAME = "Moonshot"
def __init__(self, key, model_name="moonshot-v1-8k", base_url="https://api.moonshot.cn/v1", **kwargs):
if not base_url:
base_url = "https://api.moonshot.cn/v1"
@ -444,6 +444,8 @@ class MoonshotChat(Base):
class XinferenceChat(Base):
_FACTORY_NAME = "Xinference"
def __init__(self, key=None, model_name="", base_url="", **kwargs):
if not base_url:
raise ValueError("Local llm url cannot be None")
@ -452,6 +454,8 @@ class XinferenceChat(Base):
class HuggingFaceChat(Base):
_FACTORY_NAME = "HuggingFace"
def __init__(self, key=None, model_name="", base_url="", **kwargs):
if not base_url:
raise ValueError("Local llm url cannot be None")
@ -460,6 +464,8 @@ class HuggingFaceChat(Base):
class ModelScopeChat(Base):
_FACTORY_NAME = "ModelScope"
def __init__(self, key=None, model_name="", base_url="", **kwargs):
if not base_url:
raise ValueError("Local llm url cannot be None")
@ -468,6 +474,8 @@ class ModelScopeChat(Base):
class DeepSeekChat(Base):
_FACTORY_NAME = "DeepSeek"
def __init__(self, key, model_name="deepseek-chat", base_url="https://api.deepseek.com/v1", **kwargs):
if not base_url:
base_url = "https://api.deepseek.com/v1"
@ -475,6 +483,8 @@ class DeepSeekChat(Base):
class AzureChat(Base):
_FACTORY_NAME = "Azure-OpenAI"
def __init__(self, key, model_name, base_url, **kwargs):
api_key = json.loads(key).get("api_key", "")
api_version = json.loads(key).get("api_version", "2024-02-01")
@ -484,6 +494,8 @@ class AzureChat(Base):
class BaiChuanChat(Base):
_FACTORY_NAME = "BaiChuan"
def __init__(self, key, model_name="Baichuan3-Turbo", base_url="https://api.baichuan-ai.com/v1", **kwargs):
if not base_url:
base_url = "https://api.baichuan-ai.com/v1"
@ -557,6 +569,8 @@ class BaiChuanChat(Base):
class QWenChat(Base):
_FACTORY_NAME = "Tongyi-Qianwen"
def __init__(self, key, model_name=Generation.Models.qwen_turbo, base_url=None, **kwargs):
if not base_url:
base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
@ -565,6 +579,8 @@ class QWenChat(Base):
class ZhipuChat(Base):
_FACTORY_NAME = "ZHIPU-AI"
def __init__(self, key, model_name="glm-3-turbo", base_url=None, **kwargs):
super().__init__(key, model_name, base_url=base_url, **kwargs)
@ -630,6 +646,8 @@ class ZhipuChat(Base):
class OllamaChat(Base):
_FACTORY_NAME = "Ollama"
def __init__(self, key, model_name, base_url=None, **kwargs):
super().__init__(key, model_name, base_url=base_url, **kwargs)
@ -694,6 +712,8 @@ class OllamaChat(Base):
class LocalAIChat(Base):
_FACTORY_NAME = "LocalAI"
def __init__(self, key, model_name, base_url=None, **kwargs):
super().__init__(key, model_name, base_url=base_url, **kwargs)
@ -752,6 +772,8 @@ class LocalLLM(Base):
class VolcEngineChat(Base):
_FACTORY_NAME = "VolcEngine"
def __init__(self, key, model_name, base_url="https://ark.cn-beijing.volces.com/api/v3", **kwargs):
"""
Since do not want to modify the original database fields, and the VolcEngine authentication method is quite special,
@ -765,6 +787,8 @@ class VolcEngineChat(Base):
class MiniMaxChat(Base):
_FACTORY_NAME = "MiniMax"
def __init__(self, key, model_name, base_url="https://api.minimax.chat/v1/text/chatcompletion_v2", **kwargs):
super().__init__(key, model_name, base_url=base_url, **kwargs)
@ -843,6 +867,8 @@ class MiniMaxChat(Base):
class MistralChat(Base):
_FACTORY_NAME = "Mistral"
def __init__(self, key, model_name, base_url=None, **kwargs):
super().__init__(key, model_name, base_url=base_url, **kwargs)
@ -896,6 +922,8 @@ class MistralChat(Base):
class BedrockChat(Base):
_FACTORY_NAME = "Bedrock"
def __init__(self, key, model_name, base_url=None, **kwargs):
super().__init__(key, model_name, base_url=base_url, **kwargs)
@ -978,6 +1006,8 @@ class BedrockChat(Base):
class GeminiChat(Base):
_FACTORY_NAME = "Gemini"
def __init__(self, key, model_name, base_url=None, **kwargs):
super().__init__(key, model_name, base_url=base_url, **kwargs)
@ -997,6 +1027,7 @@ class GeminiChat(Base):
def _chat(self, history, gen_conf):
from google.generativeai.types import content_types
system = history[0]["content"] if history and history[0]["role"] == "system" else ""
hist = []
for item in history:
@ -1019,6 +1050,7 @@ class GeminiChat(Base):
def chat_streamly(self, system, history, gen_conf):
from google.generativeai.types import content_types
gen_conf = self._clean_conf(gen_conf)
if system:
self.model._system_instruction = content_types.to_content(system)
@ -1042,6 +1074,8 @@ class GeminiChat(Base):
class GroqChat(Base):
_FACTORY_NAME = "Groq"
def __init__(self, key, model_name, base_url=None, **kwargs):
super().__init__(key, model_name, base_url=base_url, **kwargs)
@ -1086,6 +1120,8 @@ class GroqChat(Base):
## openrouter
class OpenRouterChat(Base):
_FACTORY_NAME = "OpenRouter"
def __init__(self, key, model_name, base_url="https://openrouter.ai/api/v1", **kwargs):
if not base_url:
base_url = "https://openrouter.ai/api/v1"
@ -1093,6 +1129,8 @@ class OpenRouterChat(Base):
class StepFunChat(Base):
_FACTORY_NAME = "StepFun"
def __init__(self, key, model_name, base_url="https://api.stepfun.com/v1", **kwargs):
if not base_url:
base_url = "https://api.stepfun.com/v1"
@ -1100,6 +1138,8 @@ class StepFunChat(Base):
class NvidiaChat(Base):
_FACTORY_NAME = "NVIDIA"
def __init__(self, key, model_name, base_url="https://integrate.api.nvidia.com/v1", **kwargs):
if not base_url:
base_url = "https://integrate.api.nvidia.com/v1"
@ -1107,6 +1147,8 @@ class NvidiaChat(Base):
class LmStudioChat(Base):
_FACTORY_NAME = "LM-Studio"
def __init__(self, key, model_name, base_url, **kwargs):
if not base_url:
raise ValueError("Local llm url cannot be None")
@ -1117,6 +1159,8 @@ class LmStudioChat(Base):
class OpenAI_APIChat(Base):
_FACTORY_NAME = ["VLLM", "OpenAI-API-Compatible"]
def __init__(self, key, model_name, base_url):
if not base_url:
raise ValueError("url cannot be None")
@ -1125,6 +1169,8 @@ class OpenAI_APIChat(Base):
class PPIOChat(Base):
_FACTORY_NAME = "PPIO"
def __init__(self, key, model_name, base_url="https://api.ppinfra.com/v3/openai", **kwargs):
if not base_url:
base_url = "https://api.ppinfra.com/v3/openai"
@ -1132,6 +1178,8 @@ class PPIOChat(Base):
class CoHereChat(Base):
_FACTORY_NAME = "Cohere"
def __init__(self, key, model_name, base_url=None, **kwargs):
super().__init__(key, model_name, base_url=base_url, **kwargs)
@ -1207,6 +1255,8 @@ class CoHereChat(Base):
class LeptonAIChat(Base):
_FACTORY_NAME = "LeptonAI"
def __init__(self, key, model_name, base_url=None, **kwargs):
if not base_url:
base_url = urljoin("https://" + model_name + ".lepton.run", "api/v1")
@ -1214,6 +1264,8 @@ class LeptonAIChat(Base):
class TogetherAIChat(Base):
_FACTORY_NAME = "TogetherAI"
def __init__(self, key, model_name, base_url="https://api.together.xyz/v1", **kwargs):
if not base_url:
base_url = "https://api.together.xyz/v1"
@ -1221,6 +1273,8 @@ class TogetherAIChat(Base):
class PerfXCloudChat(Base):
_FACTORY_NAME = "PerfXCloud"
def __init__(self, key, model_name, base_url="https://cloud.perfxlab.cn/v1", **kwargs):
if not base_url:
base_url = "https://cloud.perfxlab.cn/v1"
@ -1228,6 +1282,8 @@ class PerfXCloudChat(Base):
class UpstageChat(Base):
_FACTORY_NAME = "Upstage"
def __init__(self, key, model_name, base_url="https://api.upstage.ai/v1/solar", **kwargs):
if not base_url:
base_url = "https://api.upstage.ai/v1/solar"
@ -1235,6 +1291,8 @@ class UpstageChat(Base):
class NovitaAIChat(Base):
_FACTORY_NAME = "NovitaAI"
def __init__(self, key, model_name, base_url="https://api.novita.ai/v3/openai", **kwargs):
if not base_url:
base_url = "https://api.novita.ai/v3/openai"
@ -1242,6 +1300,8 @@ class NovitaAIChat(Base):
class SILICONFLOWChat(Base):
_FACTORY_NAME = "SILICONFLOW"
def __init__(self, key, model_name, base_url="https://api.siliconflow.cn/v1", **kwargs):
if not base_url:
base_url = "https://api.siliconflow.cn/v1"
@ -1249,6 +1309,8 @@ class SILICONFLOWChat(Base):
class YiChat(Base):
_FACTORY_NAME = "01.AI"
def __init__(self, key, model_name, base_url="https://api.lingyiwanwu.com/v1", **kwargs):
if not base_url:
base_url = "https://api.lingyiwanwu.com/v1"
@ -1256,6 +1318,8 @@ class YiChat(Base):
class GiteeChat(Base):
_FACTORY_NAME = "GiteeAI"
def __init__(self, key, model_name, base_url="https://ai.gitee.com/v1/", **kwargs):
if not base_url:
base_url = "https://ai.gitee.com/v1/"
@ -1263,6 +1327,8 @@ class GiteeChat(Base):
class ReplicateChat(Base):
_FACTORY_NAME = "Replicate"
def __init__(self, key, model_name, base_url=None, **kwargs):
super().__init__(key, model_name, base_url=base_url, **kwargs)
@ -1302,6 +1368,8 @@ class ReplicateChat(Base):
class HunyuanChat(Base):
_FACTORY_NAME = "Tencent Hunyuan"
def __init__(self, key, model_name, base_url=None, **kwargs):
super().__init__(key, model_name, base_url=base_url, **kwargs)
@ -1378,6 +1446,8 @@ class HunyuanChat(Base):
class SparkChat(Base):
_FACTORY_NAME = "XunFei Spark"
def __init__(self, key, model_name, base_url="https://spark-api-open.xf-yun.com/v1", **kwargs):
if not base_url:
base_url = "https://spark-api-open.xf-yun.com/v1"
@ -1398,6 +1468,8 @@ class SparkChat(Base):
class BaiduYiyanChat(Base):
_FACTORY_NAME = "BaiduYiyan"
def __init__(self, key, model_name, base_url=None, **kwargs):
super().__init__(key, model_name, base_url=base_url, **kwargs)
@ -1444,6 +1516,8 @@ class BaiduYiyanChat(Base):
class AnthropicChat(Base):
_FACTORY_NAME = "Anthropic"
def __init__(self, key, model_name, base_url="https://api.anthropic.com/v1/", **kwargs):
if not base_url:
base_url = "https://api.anthropic.com/v1/"
@ -1451,6 +1525,8 @@ class AnthropicChat(Base):
class GoogleChat(Base):
_FACTORY_NAME = "Google Cloud"
def __init__(self, key, model_name, base_url=None, **kwargs):
super().__init__(key, model_name, base_url=base_url, **kwargs)
@ -1529,9 +1605,11 @@ class GoogleChat(Base):
if "role" in item and item["role"] == "assistant":
item["role"] = "model"
if "content" in item:
item["parts"] = [{
"text": item.pop("content"),
}]
item["parts"] = [
{
"text": item.pop("content"),
}
]
response = self.client.generate_content(hist, generation_config=gen_conf)
ans = response.text
@ -1587,8 +1665,10 @@ class GoogleChat(Base):
class GPUStackChat(Base):
_FACTORY_NAME = "GPUStack"
def __init__(self, key=None, model_name="", base_url="", **kwargs):
if not base_url:
raise ValueError("Local llm url cannot be None")
base_url = urljoin(base_url, "v1")
super().__init__(key, model_name, base_url, **kwargs)
super().__init__(key, model_name, base_url, **kwargs)