mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Feat: add gpustack model provider (#4469)
### What problem does this PR solve? Add GPUStack as a new model provider. [GPUStack](https://github.com/gpustack/gpustack) is an open-source GPU cluster manager for running LLMs. Currently, locally deployed models in GPUStack cannot integrate well with RAGFlow. GPUStack provides both OpenAI compatible APIs (Models / Chat Completions / Embeddings / Speech2Text / TTS) and other APIs like Rerank. We would like to use GPUStack as a model provider in ragflow. [GPUStack Docs](https://docs.gpustack.ai/latest/quickstart/) Related issue: https://github.com/infiniflow/ragflow/issues/4064. ### Type of change - [x] New Feature (non-breaking change which adds functionality) ### Testing Instructions 1. Install GPUStack and deploy the `llama-3.2-1b-instruct` llm, `bge-m3` text embedding model, `bge-reranker-v2-m3` rerank model, `faster-whisper-medium` Speech-to-Text model, `cosyvoice-300m-sft` in GPUStack. 2. Add provider in ragflow settings. 3. Testing in ragflow.
This commit is contained in:
@ -42,6 +42,7 @@ from .embedding_model import (
|
||||
VoyageEmbed,
|
||||
HuggingFaceEmbed,
|
||||
VolcEngineEmbed,
|
||||
GPUStackEmbed,
|
||||
)
|
||||
from .chat_model import (
|
||||
GptTurbo,
|
||||
@ -80,6 +81,7 @@ from .chat_model import (
|
||||
AnthropicChat,
|
||||
GoogleChat,
|
||||
HuggingFaceChat,
|
||||
GPUStackChat,
|
||||
)
|
||||
|
||||
from .cv_model import (
|
||||
@ -116,6 +118,7 @@ from .rerank_model import (
|
||||
BaiduYiyanRerank,
|
||||
VoyageRerank,
|
||||
QWenRerank,
|
||||
GPUStackRerank,
|
||||
)
|
||||
from .sequence2txt_model import (
|
||||
GPTSeq2txt,
|
||||
@ -123,6 +126,7 @@ from .sequence2txt_model import (
|
||||
AzureSeq2txt,
|
||||
XinferenceSeq2txt,
|
||||
TencentCloudSeq2txt,
|
||||
GPUStackSeq2txt,
|
||||
)
|
||||
from .tts_model import (
|
||||
FishAudioTTS,
|
||||
@ -130,6 +134,7 @@ from .tts_model import (
|
||||
OpenAITTS,
|
||||
SparkTTS,
|
||||
XinferenceTTS,
|
||||
GPUStackTTS,
|
||||
)
|
||||
|
||||
EmbeddingModel = {
|
||||
@ -161,6 +166,7 @@ EmbeddingModel = {
|
||||
"Voyage AI": VoyageEmbed,
|
||||
"HuggingFace": HuggingFaceEmbed,
|
||||
"VolcEngine": VolcEngineEmbed,
|
||||
"GPUStack": GPUStackEmbed,
|
||||
}
|
||||
|
||||
CvModel = {
|
||||
@ -220,6 +226,7 @@ ChatModel = {
|
||||
"Anthropic": AnthropicChat,
|
||||
"Google Cloud": GoogleChat,
|
||||
"HuggingFace": HuggingFaceChat,
|
||||
"GPUStack": GPUStackChat,
|
||||
}
|
||||
|
||||
RerankModel = {
|
||||
@ -237,6 +244,7 @@ RerankModel = {
|
||||
"BaiduYiyan": BaiduYiyanRerank,
|
||||
"Voyage AI": VoyageRerank,
|
||||
"Tongyi-Qianwen": QWenRerank,
|
||||
"GPUStack": GPUStackRerank,
|
||||
}
|
||||
|
||||
Seq2txtModel = {
|
||||
@ -245,6 +253,7 @@ Seq2txtModel = {
|
||||
"Azure-OpenAI": AzureSeq2txt,
|
||||
"Xinference": XinferenceSeq2txt,
|
||||
"Tencent Cloud": TencentCloudSeq2txt,
|
||||
"GPUStack": GPUStackSeq2txt,
|
||||
}
|
||||
|
||||
TTSModel = {
|
||||
@ -253,4 +262,5 @@ TTSModel = {
|
||||
"OpenAI": OpenAITTS,
|
||||
"XunFei Spark": SparkTTS,
|
||||
"Xinference": XinferenceTTS,
|
||||
"GPUStack": GPUStackTTS,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user