From 7ebc1f0943b307d522fffe9dd8739e8d028f2ea1 Mon Sep 17 00:00:00 2001 From: Yongteng Lei Date: Wed, 23 Jul 2025 18:10:35 +0800 Subject: [PATCH] Feat: add model provider DeepInfra (#9003) ### What problem does this PR solve? Add model provider DeepInfra. This model list comes from our community. NOTE: most endpoints haven't been tested, but they should work as OpenAI does. ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- conf/llm_factories.json | 293 +++++++++++++++++++++++++++ docs/references/supported_models.mdx | 1 + rag/llm/chat_model.py | 9 + rag/llm/embedding_model.py | 12 +- rag/llm/sequence2txt_model.py | 10 + rag/llm/tts_model.py | 9 + web/src/assets/svg/llm/deepinfra.svg | 1 + web/src/constants/llm.ts | 2 + 8 files changed, 336 insertions(+), 1 deletion(-) create mode 100644 web/src/assets/svg/llm/deepinfra.svg diff --git a/conf/llm_factories.json b/conf/llm_factories.json index aa24de6ac..95f7720d0 100644 --- a/conf/llm_factories.json +++ b/conf/llm_factories.json @@ -3830,6 +3830,299 @@ "tags": "LLM,TEXT EMBEDDING,TTS,SPEECH2TEXT,TEXT RE-RANK", "status": "1", "llm": [] + }, + { + "name": "DeepInfra", + "logo": "", + "tags": "LLM,TEXT EMBEDDING,TTS,SPEECH2TEXT,MODERATION", + "status": "1", + "llm": [ + { + "llm_name": "moonshotai/Kimi-K2-Instruct", + "tags": "LLM,CHAT", + "model_type": "chat" + }, + { + "llm_name": "mistralai/Voxtral-Small-24B-2507", + "tags": "SPEECH2TEXT", + "model_type": "speech2text" + }, + { + "llm_name": "mistralai/Voxtral-Mini-3B-2507", + "tags": "SPEECH2TEXT", + "model_type": "speech2text" + }, + { + "llm_name": "deepseek-ai/DeepSeek-R1-0528-Turbo", + "tags": "LLM,CHAT", + "model_type": "chat" + }, + { + "llm_name": "Qwen/Qwen3-235B-A22B", + "tags": "LLM,CHAT", + "model_type": "chat" + }, + { + "llm_name": "Qwen/Qwen3-30B-A3B", + "tags": "LLM,CHAT", + "model_type": "chat" + }, + { + "llm_name": "Qwen/Qwen3-32B", + "tags": "LLM,CHAT", + "model_type": "chat" + }, + { + "llm_name": "Qwen/Qwen3-14B", + "tags": "LLM,CHAT", + "model_type": "chat" + }, + { + "llm_name": "deepseek-ai/DeepSeek-V3-0324-Turbo", + "tags": "LLM,CHAT", + "model_type": "chat" + }, + { + "llm_name": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "tags": "LLM,CHAT", + "model_type": "chat" + }, + { + "llm_name": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "tags": "LLM,CHAT", + "model_type": "chat" + }, + { + "llm_name": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "tags": "LLM,CHAT", + "model_type": "chat" + }, + { + "llm_name": "deepseek-ai/DeepSeek-R1-0528", + "tags": "LLM,CHAT", + "model_type": "chat" + }, + { + "llm_name": "deepseek-ai/DeepSeek-V3-0324", + "tags": "LLM,CHAT", + "model_type": "chat" + }, + { + "llm_name": "mistralai/Devstral-Small-2507", + "tags": "LLM,CHAT", + "model_type": "chat" + }, + { + "llm_name": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", + "tags": "LLM,CHAT", + "model_type": "chat" + }, + { + "llm_name": "meta-llama/Llama-Guard-4-12B", + "tags": "LLM,CHAT", + "model_type": "chat" + }, + { + "llm_name": "Qwen/QwQ-32B", + "tags": "LLM,CHAT", + "model_type": "chat" + }, + { + "llm_name": "anthropic/claude-4-opus", + "tags": "LLM,CHAT", + "model_type": "chat" + }, + { + "llm_name": "anthropic/claude-4-sonnet", + "tags": "LLM,CHAT", + "model_type": "chat" + }, + { + "llm_name": "google/gemini-2.5-flash", + "tags": "LLM,CHAT", + "model_type": "chat" + }, + { + "llm_name": "google/gemini-2.5-pro", + "tags": "LLM,CHAT", + "model_type": "chat" + }, + { + "llm_name": "google/gemma-3-27b-it", + "tags": "LLM,CHAT", + "model_type": "chat" + }, + { + "llm_name": "google/gemma-3-12b-it", + "tags": "LLM,CHAT", + "model_type": "chat" + }, + { + "llm_name": "google/gemma-3-4b-it", + "tags": "LLM,CHAT", + "model_type": "chat" + }, + { + "llm_name": "hexgrad/Kokoro-82M", + "tags": "TTS", + "model_type": "tts" + }, + { + "llm_name": "canopylabs/orpheus-3b-0.1-ft", + "tags": "TTS", + "model_type": "tts" + }, + { + "llm_name": "sesame/csm-1b", + "tags": "TTS", + "model_type": "tts" + }, + { + "llm_name": "microsoft/Phi-4-multimodal-instruct", + "tags": "LLM,CHAT", + "model_type": "chat" + }, + { + "llm_name": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "tags": "LLM,CHAT", + "model_type": "chat" + }, + { + "llm_name": "deepseek-ai/DeepSeek-V3", + "tags": "LLM,CHAT", + "model_type": "chat" + }, + { + "llm_name": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "tags": "LLM,CHAT", + "model_type": "chat" + }, + { + "llm_name": "meta-llama/Llama-3.3-70B-Instruct", + "tags": "LLM,CHAT", + "model_type": "chat" + }, + { + "llm_name": "microsoft/phi-4", + "tags": "LLM,CHAT", + "model_type": "chat" + }, + { + "llm_name": "openai/whisper-large-v3-turbo", + "tags": "SPEECH2TEXT", + "model_type": "speech2text" + }, + { + "llm_name": "BAAI/bge-base-en-v1.5", + "tags": "TEXT EMBEDDING", + "model_type": "embedding" + }, + { + "llm_name": "BAAI/bge-en-icl", + "tags": "TEXT EMBEDDING", + "model_type": "embedding" + }, + { + "llm_name": "BAAI/bge-large-en-v1.5", + "tags": "TEXT EMBEDDING", + "model_type": "embedding" + }, + { + "llm_name": "BAAI/bge-m3", + "tags": "TEXT EMBEDDING", + "model_type": "embedding" + }, + { + "llm_name": "BAAI/bge-m3-multi", + "tags": "TEXT EMBEDDING", + "model_type": "embedding" + }, + { + "llm_name": "Qwen/Qwen3-Embedding-0.6B", + "tags": "TEXT EMBEDDING", + "model_type": "embedding" + }, + { + "llm_name": "Qwen/Qwen3-Embedding-4B", + "tags": "TEXT EMBEDDING", + "model_type": "embedding" + }, + { + "llm_name": "Qwen/Qwen3-Embedding-8B", + "tags": "TEXT EMBEDDING", + "model_type": "embedding" + }, + { + "llm_name": "intfloat/e5-base-v2", + "tags": "TEXT EMBEDDING", + "model_type": "embedding" + }, + { + "llm_name": "intfloat/e5-large-v2", + "tags": "TEXT EMBEDDING", + "model_type": "embedding" + }, + { + "llm_name": "intfloat/multilingual-e5-large", + "tags": "TEXT EMBEDDING", + "model_type": "embedding" + }, + { + "llm_name": "intfloat/multilingual-e5-large-instruct", + "tags": "TEXT EMBEDDING", + "model_type": "embedding" + }, + { + "llm_name": "sentence-transformers/all-MiniLM-L12-v2", + "tags": "TEXT EMBEDDING", + "model_type": "embedding" + }, + { + "llm_name": "sentence-transformers/all-MiniLM-L6-v2", + "tags": "TEXT EMBEDDING", + "model_type": "embedding" + }, + { + "llm_name": "sentence-transformers/all-mpnet-base-v2", + "tags": "TEXT EMBEDDING", + "model_type": "embedding" + }, + { + "llm_name": "sentence-transformers/clip-ViT-B-32", + "tags": "TEXT EMBEDDING", + "model_type": "embedding" + }, + { + "llm_name": "sentence-transformers/clip-ViT-B-32-multilingual-v1", + "tags": "TEXT EMBEDDING", + "model_type": "embedding" + }, + { + "llm_name": "sentence-transformers/multi-qa-mpnet-base-dot-v1", + "tags": "TEXT EMBEDDING", + "model_type": "embedding" + }, + { + "llm_name": "sentence-transformers/paraphrase-MiniLM-L6-v2", + "tags": "TEXT EMBEDDING", + "model_type": "embedding" + }, + { + "llm_name": "shibing624/text2vec-base-chinese", + "tags": "TEXT EMBEDDING", + "model_type": "embedding" + }, + { + "llm_name": "thenlper/gte-base", + "tags": "TEXT EMBEDDING", + "model_type": "embedding" + }, + { + "llm_name": "thenlper/gte-large", + "tags": "TEXT EMBEDDING", + "model_type": "embedding" + } + ] } ] } diff --git a/docs/references/supported_models.mdx b/docs/references/supported_models.mdx index 897dcff21..472e5fbe4 100644 --- a/docs/references/supported_models.mdx +++ b/docs/references/supported_models.mdx @@ -62,6 +62,7 @@ A complete list of models supported by RAGFlow, which will continue to expand. | Youdao | | :heavy_check_mark: | :heavy_check_mark: | | | | | ZHIPU-AI | :heavy_check_mark: | :heavy_check_mark: | | :heavy_check_mark: | | | | 01.AI | :heavy_check_mark: | | | | | | +| DeepInfra | :heavy_check_mark: | :heavy_check_mark: | | | :heavy_check_mark: | :heavy_check_mark: | ```mdx-code-block diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index 068871ef0..ad8af6f3f 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -1682,3 +1682,12 @@ class GPUStackChat(Base): raise ValueError("Local llm url cannot be None") base_url = urljoin(base_url, "v1") super().__init__(key, model_name, base_url, **kwargs) + + +class DeepInfraChat(Base): + _FACTORY_NAME = "DeepInfra" + + def __init__(self, key, model_name, base_url="https://api.deepinfra.com/v1/openai", **kwargs): + if not base_url: + base_url = "https://api.deepinfra.com/v1/openai" + super().__init__(key, model_name, base_url, **kwargs) diff --git a/rag/llm/embedding_model.py b/rag/llm/embedding_model.py index 9774e7dd9..5c99a4139 100644 --- a/rag/llm/embedding_model.py +++ b/rag/llm/embedding_model.py @@ -202,9 +202,10 @@ class QWenEmbed(Base): self.model_name = model_name def encode(self, texts: list): - import dashscope import time + import dashscope + batch_size = 4 res = [] token_count = 0 @@ -900,3 +901,12 @@ class GiteeEmbed(SILICONFLOWEmbed): if not base_url: base_url = "https://ai.gitee.com/v1/embeddings" super().__init__(key, model_name, base_url) + + +class DeepInfraEmbed(OpenAIEmbed): + _FACTORY_NAME = "DeepInfra" + + def __init__(self, key, model_name, base_url="https://api.deepinfra.com/v1/openai"): + if not base_url: + base_url = "https://api.deepinfra.com/v1/openai" + super().__init__(key, model_name, base_url) diff --git a/rag/llm/sequence2txt_model.py b/rag/llm/sequence2txt_model.py index 7d6c24b76..1300144ad 100644 --- a/rag/llm/sequence2txt_model.py +++ b/rag/llm/sequence2txt_model.py @@ -208,3 +208,13 @@ class GiteeSeq2txt(Base): self.client = OpenAI(api_key=key, base_url=base_url) self.model_name = model_name + +class DeepInfraSeq2txt(Base): + _FACTORY_NAME = "DeepInfra" + + def __init__(self, key, model_name, base_url="https://api.deepinfra.com/v1/openai", **kwargs): + if not base_url: + base_url = "https://api.deepinfra.com/v1/openai" + + self.client = OpenAI(api_key=key, base_url=base_url) + self.model_name = model_name diff --git a/rag/llm/tts_model.py b/rag/llm/tts_model.py index b2333b426..f60640546 100644 --- a/rag/llm/tts_model.py +++ b/rag/llm/tts_model.py @@ -382,3 +382,12 @@ class SILICONFLOWTTS(Base): for chunk in response.iter_content(): if chunk: yield chunk + + +class DeepInfraTTS(OpenAITTS): + _FACTORY_NAME = "DeepInfra" + + def __init__(self, key, model_name, base_url="https://api.deepinfra.com/v1/openai", **kwargs): + if not base_url: + base_url = "https://api.deepinfra.com/v1/openai" + super().__init__(key, model_name, base_url, **kwargs) diff --git a/web/src/assets/svg/llm/deepinfra.svg b/web/src/assets/svg/llm/deepinfra.svg new file mode 100644 index 000000000..565c8095b --- /dev/null +++ b/web/src/assets/svg/llm/deepinfra.svg @@ -0,0 +1 @@ +DeepInfra \ No newline at end of file diff --git a/web/src/constants/llm.ts b/web/src/constants/llm.ts index edcad4baf..60e2aeb77 100644 --- a/web/src/constants/llm.ts +++ b/web/src/constants/llm.ts @@ -50,6 +50,7 @@ export enum LLMFactory { GPUStack = 'GPUStack', VLLM = 'VLLM', GiteeAI = 'GiteeAI', + DeepInfra = 'DeepInfra', } // Please lowercase the file name @@ -105,4 +106,5 @@ export const IconMap = { [LLMFactory.GPUStack]: 'gpustack', [LLMFactory.VLLM]: 'vllm', [LLMFactory.GiteeAI]: 'gitee-ai', + [LLMFactory.DeepInfra]: 'deepinfra', };