Feat: add gpustack model provider (#4469)

### What problem does this PR solve?

Add GPUStack as a new model provider.
[GPUStack](https://github.com/gpustack/gpustack) is an open-source GPU
cluster manager for running LLMs. Currently, locally deployed models in
GPUStack cannot integrate well with RAGFlow. GPUStack provides both
OpenAI compatible APIs (Models / Chat Completions / Embeddings /
Speech2Text / TTS) and other APIs like Rerank. We would like to use
GPUStack as a model provider in ragflow.

[GPUStack Docs](https://docs.gpustack.ai/latest/quickstart/)

Related issue: https://github.com/infiniflow/ragflow/issues/4064.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)



### Testing Instructions
1. Install GPUStack and deploy the `llama-3.2-1b-instruct` llm, `bge-m3`
text embedding model, `bge-reranker-v2-m3` rerank model,
`faster-whisper-medium` Speech-to-Text model, `cosyvoice-300m-sft` in
GPUStack.
2. Add provider in ragflow settings.
3. Testing in ragflow.
This commit is contained in:
Alex Chen
2025-01-15 14:15:58 +08:00
committed by GitHub
parent e478586a8e
commit 7944aacafa
12 changed files with 159 additions and 3 deletions

View File

@ -18,10 +18,12 @@ import threading
from urllib.parse import urljoin
import requests
import httpx
from huggingface_hub import snapshot_download
import os
from abc import ABC
import numpy as np
from yarl import URL
from api import settings
from api.utils.file_utils import get_home_cache_dir
@ -457,3 +459,53 @@ class QWenRerank(Base):
return rank, resp.usage.total_tokens
else:
raise ValueError(f"Error calling QWenRerank model {self.model_name}: {resp.status_code} - {resp.text}")
class GPUStackRerank(Base):
def __init__(
self, key, model_name, base_url
):
if not base_url:
raise ValueError("url cannot be None")
self.model_name = model_name
self.base_url = str(URL(base_url)/ "v1" / "rerank")
self.headers = {
"accept": "application/json",
"content-type": "application/json",
"authorization": f"Bearer {key}",
}
def similarity(self, query: str, texts: list):
payload = {
"model": self.model_name,
"query": query,
"documents": texts,
"top_n": len(texts),
}
try:
response = requests.post(
self.base_url, json=payload, headers=self.headers
)
response.raise_for_status()
response_json = response.json()
rank = np.zeros(len(texts), dtype=float)
if "results" not in response_json:
return rank, 0
token_count = 0
for t in texts:
token_count += num_tokens_from_string(t)
for result in response_json["results"]:
rank[result["index"]] = result["relevance_score"]
return (
rank,
token_count,
)
except httpx.HTTPStatusError as e:
raise ValueError(f"Error calling GPUStackRerank model {self.model_name}: {e.response.status_code} - {e.response.text}")