mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-02-02 16:45:08 +08:00
Feat: add gpustack model provider (#4469)
### What problem does this PR solve? Add GPUStack as a new model provider. [GPUStack](https://github.com/gpustack/gpustack) is an open-source GPU cluster manager for running LLMs. Currently, locally deployed models in GPUStack cannot integrate well with RAGFlow. GPUStack provides both OpenAI compatible APIs (Models / Chat Completions / Embeddings / Speech2Text / TTS) and other APIs like Rerank. We would like to use GPUStack as a model provider in ragflow. [GPUStack Docs](https://docs.gpustack.ai/latest/quickstart/) Related issue: https://github.com/infiniflow/ragflow/issues/4064. ### Type of change - [x] New Feature (non-breaking change which adds functionality) ### Testing Instructions 1. Install GPUStack and deploy the `llama-3.2-1b-instruct` llm, `bge-m3` text embedding model, `bge-reranker-v2-m3` rerank model, `faster-whisper-medium` Speech-to-Text model, `cosyvoice-300m-sft` in GPUStack. 2. Add provider in ragflow settings. 3. Testing in ragflow.
This commit is contained in:
@ -18,10 +18,12 @@ import threading
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import requests
|
||||
import httpx
|
||||
from huggingface_hub import snapshot_download
|
||||
import os
|
||||
from abc import ABC
|
||||
import numpy as np
|
||||
from yarl import URL
|
||||
|
||||
from api import settings
|
||||
from api.utils.file_utils import get_home_cache_dir
|
||||
@ -457,3 +459,53 @@ class QWenRerank(Base):
|
||||
return rank, resp.usage.total_tokens
|
||||
else:
|
||||
raise ValueError(f"Error calling QWenRerank model {self.model_name}: {resp.status_code} - {resp.text}")
|
||||
|
||||
class GPUStackRerank(Base):
|
||||
def __init__(
|
||||
self, key, model_name, base_url
|
||||
):
|
||||
if not base_url:
|
||||
raise ValueError("url cannot be None")
|
||||
|
||||
self.model_name = model_name
|
||||
self.base_url = str(URL(base_url)/ "v1" / "rerank")
|
||||
self.headers = {
|
||||
"accept": "application/json",
|
||||
"content-type": "application/json",
|
||||
"authorization": f"Bearer {key}",
|
||||
}
|
||||
|
||||
def similarity(self, query: str, texts: list):
|
||||
payload = {
|
||||
"model": self.model_name,
|
||||
"query": query,
|
||||
"documents": texts,
|
||||
"top_n": len(texts),
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
self.base_url, json=payload, headers=self.headers
|
||||
)
|
||||
response.raise_for_status()
|
||||
response_json = response.json()
|
||||
|
||||
rank = np.zeros(len(texts), dtype=float)
|
||||
if "results" not in response_json:
|
||||
return rank, 0
|
||||
|
||||
token_count = 0
|
||||
for t in texts:
|
||||
token_count += num_tokens_from_string(t)
|
||||
|
||||
for result in response_json["results"]:
|
||||
rank[result["index"]] = result["relevance_score"]
|
||||
|
||||
return (
|
||||
rank,
|
||||
token_count,
|
||||
)
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
raise ValueError(f"Error calling GPUStackRerank model {self.model_name}: {e.response.status_code} - {e.response.text}")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user