Feat: add gpustack model provider (#4469)

### What problem does this PR solve? Add GPUStack as a new model provider. [GPUStack](https://github.com/gpustack/gpustack) is an open-source GPU cluster manager for running LLMs. Currently, locally deployed models in GPUStack cannot integrate well with RAGFlow. GPUStack provides both OpenAI compatible APIs (Models / Chat Completions / Embeddings / Speech2Text / TTS) and other APIs like Rerank. We would like to use GPUStack as a model provider in ragflow. [GPUStack Docs](https://docs.gpustack.ai/latest/quickstart/) Related issue: https://github.com/infiniflow/ragflow/issues/4064. ### Type of change - [x] New Feature (non-breaking change which adds functionality) ### Testing Instructions 1. Install GPUStack and deploy the `llama-3.2-1b-instruct` llm, `bge-m3` text embedding model, `bge-reranker-v2-m3` rerank model, `faster-whisper-medium` Speech-to-Text model, `cosyvoice-300m-sft` in GPUStack. 2. Add provider in ragflow settings. 3. Testing in ragflow.
2026-02-02 16:45:08 +08:00 · 2025-01-15 14:15:58 +08:00
parent e478586a8e
commit 7944aacafa
12 changed files with 159 additions and 3 deletions
--- a/rag/llm/init.py
+++ b/rag/llm/init.py
@ -42,6 +42,7 @@ from .embedding_model import (
    VoyageEmbed,
    HuggingFaceEmbed,
    VolcEngineEmbed,
+    GPUStackEmbed,
 )
 from .chat_model import (
    GptTurbo,
@ -80,6 +81,7 @@ from .chat_model import (
    AnthropicChat,
    GoogleChat,
    HuggingFaceChat,
+    GPUStackChat,
 )

 from .cv_model import (
@ -116,6 +118,7 @@ from .rerank_model import (
    BaiduYiyanRerank,
    VoyageRerank,
    QWenRerank,
+    GPUStackRerank,
 )
 from .sequence2txt_model import (
    GPTSeq2txt,
@ -123,6 +126,7 @@ from .sequence2txt_model import (
    AzureSeq2txt,
    XinferenceSeq2txt,
    TencentCloudSeq2txt,
+    GPUStackSeq2txt,
 )
 from .tts_model import (
    FishAudioTTS,
@ -130,6 +134,7 @@ from .tts_model import (
    OpenAITTS,
    SparkTTS,
    XinferenceTTS,
+    GPUStackTTS,
 )

 EmbeddingModel = {
@ -161,6 +166,7 @@ EmbeddingModel = {
    "Voyage AI": VoyageEmbed,
    "HuggingFace": HuggingFaceEmbed,
    "VolcEngine": VolcEngineEmbed,
+    "GPUStack": GPUStackEmbed,
 }

 CvModel = {
@ -220,6 +226,7 @@ ChatModel = {
    "Anthropic": AnthropicChat,
    "Google Cloud": GoogleChat,
    "HuggingFace": HuggingFaceChat,
+    "GPUStack": GPUStackChat,
 }

 RerankModel = {
@ -237,6 +244,7 @@ RerankModel = {
    "BaiduYiyan": BaiduYiyanRerank,
    "Voyage AI": VoyageRerank,
    "Tongyi-Qianwen": QWenRerank,
+    "GPUStack": GPUStackRerank,
 }

 Seq2txtModel = {
@ -245,6 +253,7 @@ Seq2txtModel = {
    "Azure-OpenAI": AzureSeq2txt,
    "Xinference": XinferenceSeq2txt,
    "Tencent Cloud": TencentCloudSeq2txt,
+    "GPUStack": GPUStackSeq2txt,
 }

 TTSModel = {
@ -253,4 +262,5 @@ TTSModel = {
    "OpenAI": OpenAITTS,
    "XunFei Spark": SparkTTS,
    "Xinference": XinferenceTTS,
+    "GPUStack": GPUStackTTS,
 }