From 7ebc1f0943b307d522fffe9dd8739e8d028f2ea1 Mon Sep 17 00:00:00 2001
From: Yongteng Lei <yongtengrey@outlook.com>
Date: Wed, 23 Jul 2025 18:10:35 +0800
Subject: [PATCH] Feat: add model provider DeepInfra (#9003)

### What problem does this PR solve?

Add model provider DeepInfra. This model list comes from our community.

NOTE: most endpoints haven't been tested, but they should work as OpenAI
does.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
---
 conf/llm_factories.json              | 293 +++++++++++++++++++++++++++
 docs/references/supported_models.mdx |   1 +
 rag/llm/chat_model.py                |   9 +
 rag/llm/embedding_model.py           |  12 +-
 rag/llm/sequence2txt_model.py        |  10 +
 rag/llm/tts_model.py                 |   9 +
 web/src/assets/svg/llm/deepinfra.svg |   1 +
 web/src/constants/llm.ts             |   2 +
 8 files changed, 336 insertions(+), 1 deletion(-)
 create mode 100644 web/src/assets/svg/llm/deepinfra.svg

diff --git a/conf/llm_factories.json b/conf/llm_factories.json
index aa24de6ac..95f7720d0 100644
--- a/conf/llm_factories.json
+++ b/conf/llm_factories.json
@@ -3830,6 +3830,299 @@
             "tags": "LLM,TEXT EMBEDDING,TTS,SPEECH2TEXT,TEXT RE-RANK",
             "status": "1",
             "llm": []
+        },
+        {
+            "name": "DeepInfra",
+            "logo": "",
+            "tags": "LLM,TEXT EMBEDDING,TTS,SPEECH2TEXT,MODERATION",
+            "status": "1",
+            "llm": [
+                {
+                    "llm_name": "moonshotai/Kimi-K2-Instruct",
+                    "tags": "LLM,CHAT",
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "mistralai/Voxtral-Small-24B-2507",
+                    "tags": "SPEECH2TEXT",
+                    "model_type": "speech2text"
+                },
+                {
+                    "llm_name": "mistralai/Voxtral-Mini-3B-2507",
+                    "tags": "SPEECH2TEXT",
+                    "model_type": "speech2text"
+                },
+                {
+                    "llm_name": "deepseek-ai/DeepSeek-R1-0528-Turbo",
+                    "tags": "LLM,CHAT",
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "Qwen/Qwen3-235B-A22B",
+                    "tags": "LLM,CHAT",
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "Qwen/Qwen3-30B-A3B",
+                    "tags": "LLM,CHAT",
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "Qwen/Qwen3-32B",
+                    "tags": "LLM,CHAT",
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "Qwen/Qwen3-14B",
+                    "tags": "LLM,CHAT",
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "deepseek-ai/DeepSeek-V3-0324-Turbo",
+                    "tags": "LLM,CHAT",
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo",
+                    "tags": "LLM,CHAT",
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+                    "tags": "LLM,CHAT",
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+                    "tags": "LLM,CHAT",
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "deepseek-ai/DeepSeek-R1-0528",
+                    "tags": "LLM,CHAT",
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "deepseek-ai/DeepSeek-V3-0324",
+                    "tags": "LLM,CHAT",
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "mistralai/Devstral-Small-2507",
+                    "tags": "LLM,CHAT",
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "mistralai/Mistral-Small-3.2-24B-Instruct-2506",
+                    "tags": "LLM,CHAT",
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "meta-llama/Llama-Guard-4-12B",
+                    "tags": "LLM,CHAT",
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "Qwen/QwQ-32B",
+                    "tags": "LLM,CHAT",
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "anthropic/claude-4-opus",
+                    "tags": "LLM,CHAT",
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "anthropic/claude-4-sonnet",
+                    "tags": "LLM,CHAT",
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "google/gemini-2.5-flash",
+                    "tags": "LLM,CHAT",
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "google/gemini-2.5-pro",
+                    "tags": "LLM,CHAT",
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "google/gemma-3-27b-it",
+                    "tags": "LLM,CHAT",
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "google/gemma-3-12b-it",
+                    "tags": "LLM,CHAT",
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "google/gemma-3-4b-it",
+                    "tags": "LLM,CHAT",
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "hexgrad/Kokoro-82M",
+                    "tags": "TTS",
+                    "model_type": "tts"
+                },
+                {
+                    "llm_name": "canopylabs/orpheus-3b-0.1-ft",
+                    "tags": "TTS",
+                    "model_type": "tts"
+                },
+                {
+                    "llm_name": "sesame/csm-1b",
+                    "tags": "TTS",
+                    "model_type": "tts"
+                },
+                {
+                    "llm_name": "microsoft/Phi-4-multimodal-instruct",
+                    "tags": "LLM,CHAT",
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
+                    "tags": "LLM,CHAT",
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "deepseek-ai/DeepSeek-V3",
+                    "tags": "LLM,CHAT",
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+                    "tags": "LLM,CHAT",
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "meta-llama/Llama-3.3-70B-Instruct",
+                    "tags": "LLM,CHAT",
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "microsoft/phi-4",
+                    "tags": "LLM,CHAT",
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "openai/whisper-large-v3-turbo",
+                    "tags": "SPEECH2TEXT",
+                    "model_type": "speech2text"
+                },
+                {
+                    "llm_name": "BAAI/bge-base-en-v1.5",
+                    "tags": "TEXT EMBEDDING",
+                    "model_type": "embedding"
+                },
+                {
+                    "llm_name": "BAAI/bge-en-icl",
+                    "tags": "TEXT EMBEDDING",
+                    "model_type": "embedding"
+                },
+                {
+                    "llm_name": "BAAI/bge-large-en-v1.5",
+                    "tags": "TEXT EMBEDDING",
+                    "model_type": "embedding"
+                },
+                {
+                    "llm_name": "BAAI/bge-m3",
+                    "tags": "TEXT EMBEDDING",
+                    "model_type": "embedding"
+                },
+                {
+                    "llm_name": "BAAI/bge-m3-multi",
+                    "tags": "TEXT EMBEDDING",
+                    "model_type": "embedding"
+                },
+                {
+                    "llm_name": "Qwen/Qwen3-Embedding-0.6B",
+                    "tags": "TEXT EMBEDDING",
+                    "model_type": "embedding"
+                },
+                {
+                    "llm_name": "Qwen/Qwen3-Embedding-4B",
+                    "tags": "TEXT EMBEDDING",
+                    "model_type": "embedding"
+                },
+                {
+                    "llm_name": "Qwen/Qwen3-Embedding-8B",
+                    "tags": "TEXT EMBEDDING",
+                    "model_type": "embedding"
+                },
+                {
+                    "llm_name": "intfloat/e5-base-v2",
+                    "tags": "TEXT EMBEDDING",
+                    "model_type": "embedding"
+                },
+                {
+                    "llm_name": "intfloat/e5-large-v2",
+                    "tags": "TEXT EMBEDDING",
+                    "model_type": "embedding"
+                },
+                {
+                    "llm_name": "intfloat/multilingual-e5-large",
+                    "tags": "TEXT EMBEDDING",
+                    "model_type": "embedding"
+                },
+                {
+                    "llm_name": "intfloat/multilingual-e5-large-instruct",
+                    "tags": "TEXT EMBEDDING",
+                    "model_type": "embedding"
+                },
+                {
+                    "llm_name": "sentence-transformers/all-MiniLM-L12-v2",
+                    "tags": "TEXT EMBEDDING",
+                    "model_type": "embedding"
+                },
+                {
+                    "llm_name": "sentence-transformers/all-MiniLM-L6-v2",
+                    "tags": "TEXT EMBEDDING",
+                    "model_type": "embedding"
+                },
+                {
+                    "llm_name": "sentence-transformers/all-mpnet-base-v2",
+                    "tags": "TEXT EMBEDDING",
+                    "model_type": "embedding"
+                },
+                {
+                    "llm_name": "sentence-transformers/clip-ViT-B-32",
+                    "tags": "TEXT EMBEDDING",
+                    "model_type": "embedding"
+                },
+                {
+                    "llm_name": "sentence-transformers/clip-ViT-B-32-multilingual-v1",
+                    "tags": "TEXT EMBEDDING",
+                    "model_type": "embedding"
+                },
+                {
+                    "llm_name": "sentence-transformers/multi-qa-mpnet-base-dot-v1",
+                    "tags": "TEXT EMBEDDING",
+                    "model_type": "embedding"
+                },
+                {
+                    "llm_name": "sentence-transformers/paraphrase-MiniLM-L6-v2",
+                    "tags": "TEXT EMBEDDING",
+                    "model_type": "embedding"
+                },
+                {
+                    "llm_name": "shibing624/text2vec-base-chinese",
+                    "tags": "TEXT EMBEDDING",
+                    "model_type": "embedding"
+                },
+                {
+                    "llm_name": "thenlper/gte-base",
+                    "tags": "TEXT EMBEDDING",
+                    "model_type": "embedding"
+                },
+                {
+                    "llm_name": "thenlper/gte-large",
+                    "tags": "TEXT EMBEDDING",
+                    "model_type": "embedding"
+                }
+            ]
         }
     ]
 }
diff --git a/docs/references/supported_models.mdx b/docs/references/supported_models.mdx
index 897dcff21..472e5fbe4 100644
--- a/docs/references/supported_models.mdx
+++ b/docs/references/supported_models.mdx
@@ -62,6 +62,7 @@ A complete list of models supported by RAGFlow, which will continue to expand.
 | Youdao                |                    | :heavy_check_mark: | :heavy_check_mark: |                    |                    |                    |
 | ZHIPU-AI              | :heavy_check_mark: | :heavy_check_mark: |                    | :heavy_check_mark: |                    |                    |
 | 01.AI                 | :heavy_check_mark: |                    |                    |                    |                    |                    |
+| DeepInfra             | :heavy_check_mark: | :heavy_check_mark: |                    |                    | :heavy_check_mark: | :heavy_check_mark: |
 
 ```mdx-code-block
 </APITable>
diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py
index 068871ef0..ad8af6f3f 100644
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@@ -1682,3 +1682,12 @@ class GPUStackChat(Base):
             raise ValueError("Local llm url cannot be None")
         base_url = urljoin(base_url, "v1")
         super().__init__(key, model_name, base_url, **kwargs)
+
+
+class DeepInfraChat(Base):
+    _FACTORY_NAME = "DeepInfra"
+
+    def __init__(self, key, model_name, base_url="https://api.deepinfra.com/v1/openai", **kwargs):
+        if not base_url:
+            base_url = "https://api.deepinfra.com/v1/openai"
+        super().__init__(key, model_name, base_url, **kwargs)
diff --git a/rag/llm/embedding_model.py b/rag/llm/embedding_model.py
index 9774e7dd9..5c99a4139 100644
--- a/rag/llm/embedding_model.py
+++ b/rag/llm/embedding_model.py
@@ -202,9 +202,10 @@ class QWenEmbed(Base):
         self.model_name = model_name
 
     def encode(self, texts: list):
-        import dashscope
         import time
 
+        import dashscope
+
         batch_size = 4
         res = []
         token_count = 0
@@ -900,3 +901,12 @@ class GiteeEmbed(SILICONFLOWEmbed):
         if not base_url:
             base_url = "https://ai.gitee.com/v1/embeddings"
         super().__init__(key, model_name, base_url)
+
+
+class DeepInfraEmbed(OpenAIEmbed):
+    _FACTORY_NAME = "DeepInfra"
+
+    def __init__(self, key, model_name, base_url="https://api.deepinfra.com/v1/openai"):
+        if not base_url:
+            base_url = "https://api.deepinfra.com/v1/openai"
+        super().__init__(key, model_name, base_url)
diff --git a/rag/llm/sequence2txt_model.py b/rag/llm/sequence2txt_model.py
index 7d6c24b76..1300144ad 100644
--- a/rag/llm/sequence2txt_model.py
+++ b/rag/llm/sequence2txt_model.py
@@ -208,3 +208,13 @@ class GiteeSeq2txt(Base):
         self.client = OpenAI(api_key=key, base_url=base_url)
         self.model_name = model_name
 
+
+class DeepInfraSeq2txt(Base):
+    _FACTORY_NAME = "DeepInfra"
+
+    def __init__(self, key, model_name, base_url="https://api.deepinfra.com/v1/openai", **kwargs):
+        if not base_url:
+            base_url = "https://api.deepinfra.com/v1/openai"
+
+        self.client = OpenAI(api_key=key, base_url=base_url)
+        self.model_name = model_name
diff --git a/rag/llm/tts_model.py b/rag/llm/tts_model.py
index b2333b426..f60640546 100644
--- a/rag/llm/tts_model.py
+++ b/rag/llm/tts_model.py
@@ -382,3 +382,12 @@ class SILICONFLOWTTS(Base):
         for chunk in response.iter_content():
             if chunk:
                 yield chunk
+
+
+class DeepInfraTTS(OpenAITTS):
+    _FACTORY_NAME = "DeepInfra"
+
+    def __init__(self, key, model_name, base_url="https://api.deepinfra.com/v1/openai", **kwargs):
+        if not base_url:
+            base_url = "https://api.deepinfra.com/v1/openai"
+        super().__init__(key, model_name, base_url, **kwargs)
diff --git a/web/src/assets/svg/llm/deepinfra.svg b/web/src/assets/svg/llm/deepinfra.svg
new file mode 100644
index 000000000..565c8095b
--- /dev/null
+++ b/web/src/assets/svg/llm/deepinfra.svg
@@ -0,0 +1 @@
+<svg height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>DeepInfra</title><path d="M3.294 7.821A2.297 2.297 0 011 5.527a2.297 2.297 0 012.294-2.295A2.297 2.297 0 015.59 5.527 2.297 2.297 0 013.294 7.82zm0-3.688a1.396 1.396 0 000 2.79 1.396 1.396 0 000-2.79zM3.294 14.293A2.297 2.297 0 011 11.998a2.297 2.297 0 012.294-2.294 2.297 2.297 0 012.295 2.294 2.297 2.297 0 01-2.295 2.295zm0-3.688a1.395 1.395 0 000 2.788 1.395 1.395 0 100-2.788zM3.294 20.761A2.297 2.297 0 011 18.467a2.297 2.297 0 012.294-2.295 2.297 2.297 0 012.295 2.295 2.297 2.297 0 01-2.295 2.294zm0-3.688a1.396 1.396 0 000 2.79 1.396 1.396 0 000-2.79zM20.738 7.821a2.297 2.297 0 01-2.295-2.294 2.297 2.297 0 012.294-2.295 2.297 2.297 0 012.295 2.295 2.297 2.297 0 01-2.294 2.294zm0-3.688a1.396 1.396 0 101.395 1.395c0-.77-.626-1.395-1.395-1.395zM20.738 14.293a2.297 2.297 0 01-2.295-2.295 2.297 2.297 0 012.294-2.294 2.297 2.297 0 012.295 2.294 2.297 2.297 0 01-2.294 2.295zm0-3.688c-.769 0-1.395.625-1.395 1.393a1.396 1.396 0 002.79 0c0-.77-.626-1.393-1.395-1.393zM20.738 20.761a2.297 2.297 0 01-2.295-2.294 2.297 2.297 0 012.294-2.295 2.297 2.297 0 012.295 2.295 2.297 2.297 0 01-2.294 2.294zm0-3.688a1.396 1.396 0 101.395 1.395c0-.77-.626-1.395-1.395-1.395zM12.016 11.057a2.297 2.297 0 01-2.294-2.294 2.297 2.297 0 012.294-2.295 2.297 2.297 0 012.295 2.295 2.297 2.297 0 01-2.295 2.294zm0-3.688a1.396 1.396 0 101.395 1.395c0-.77-.625-1.395-1.395-1.395zM12.017 4.589a2.297 2.297 0 01-2.295-2.295A2.297 2.297 0 0112.017 0a2.297 2.297 0 012.294 2.294 2.297 2.297 0 01-2.294 2.295zm0-3.688a1.396 1.396 0 101.395 1.395c0-.77-.626-1.395-1.395-1.395zM12.017 17.529a2.297 2.297 0 01-2.295-2.295 2.297 2.297 0 012.295-2.294 2.297 2.297 0 012.294 2.294 2.297 2.297 0 01-2.294 2.295zm0-3.688a1.396 1.396 0 101.395 1.395c0-.77-.626-1.395-1.395-1.395zM12.016 24a2.297 2.297 0 01-2.294-2.295 2.297 2.297 0 012.294-2.294 2.297 2.297 0 012.295 2.294A2.297 2.297 0 0112.016 24zm0-3.688a1.396 1.396 0 101.395 1.395c0-.77-.625-1.395-1.395-1.395z" fill="#2A3275"></path><path d="M8.363 8.222a.742.742 0 01-.277-.053l-1.494-.596a.75.75 0 11.557-1.392l1.493.595a.75.75 0 01-.278 1.446h-.001zM8.363 14.566a.743.743 0 01-.277-.053l-1.494-.595a.75.75 0 11.557-1.393l1.493.596a.75.75 0 01-.278 1.445h-.001zM17.124 11.397a.741.741 0 01-.277-.054l-1.493-.595a.75.75 0 11.555-1.392l1.493.595a.75.75 0 01-.278 1.446zM17.124 5.05a.744.744 0 01-.277-.054L15.354 4.4a.75.75 0 01.555-1.392l1.493.596a.75.75 0 01-.278 1.445zM17.124 17.739a.743.743 0 01-.277-.053l-1.494-.596a.75.75 0 11.556-1.392l1.493.596a.75.75 0 01-.278 1.445zM6.91 17.966a.75.75 0 01-.279-1.445l1.494-.595a.749.749 0 11.556 1.392l-1.493.595a.743.743 0 01-.277.053H6.91zM6.91 11.66a.75.75 0 01-.279-1.446l1.494-.595a.75.75 0 01.556 1.392l-1.493.595a.743.743 0 01-.277.053H6.91zM6.91 5.033a.75.75 0 01-.279-1.446l1.494-.595a.75.75 0 01.556 1.392l-1.493.596a.744.744 0 01-.277.053H6.91zM8.363 21.364a.743.743 0 01-.277-.053l-1.494-.596a.75.75 0 01.555-1.392l1.494.595a.75.75 0 01-.278 1.446zM15.63 8.223a.75.75 0 01-.278-1.447l1.494-.595a.75.75 0 01.556 1.393l-1.494.595a.744.744 0 01-.276.054h-.002zM15.63 14.567a.75.75 0 01-.278-1.446l1.494-.596a.75.75 0 01.556 1.394l-1.494.595a.743.743 0 01-.276.053h-.002zM15.63 21.363a.749.749 0 01-.278-1.445l1.494-.595a.75.75 0 11.555 1.392l-1.494.595a.741.741 0 01-.277.053z" fill="#5699DB"></path></svg>
\ No newline at end of file
diff --git a/web/src/constants/llm.ts b/web/src/constants/llm.ts
index edcad4baf..60e2aeb77 100644
--- a/web/src/constants/llm.ts
+++ b/web/src/constants/llm.ts
@@ -50,6 +50,7 @@ export enum LLMFactory {
   GPUStack = 'GPUStack',
   VLLM = 'VLLM',
   GiteeAI = 'GiteeAI',
+  DeepInfra = 'DeepInfra',
 }
 
 // Please lowercase the file name
@@ -105,4 +106,5 @@ export const IconMap = {
   [LLMFactory.GPUStack]: 'gpustack',
   [LLMFactory.VLLM]: 'vllm',
   [LLMFactory.GiteeAI]: 'gitee-ai',
+  [LLMFactory.DeepInfra]: 'deepinfra',
 };