Refa: automatic LLMs registration (#8651)

### What problem does this PR solve?

Support automatic LLMs registration.

### Type of change

- [x] Refactoring
This commit is contained in:
Yongteng Lei
2025-07-03 19:05:31 +08:00
committed by GitHub
parent 3234a15aae
commit f8a6987f1e
7 changed files with 619 additions and 876 deletions

View File

@ -13,16 +13,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import os
import requests
from openai.lib.azure import AzureOpenAI
import io
from abc import ABC
from openai import OpenAI
import json
from rag.utils import num_tokens_from_string
import base64
import io
import json
import os
import re
from abc import ABC
import requests
from openai import OpenAI
from openai.lib.azure import AzureOpenAI
from rag.utils import num_tokens_from_string
class Base(ABC):
@ -30,11 +32,7 @@ class Base(ABC):
pass
def transcription(self, audio, **kwargs):
transcription = self.client.audio.transcriptions.create(
model=self.model_name,
file=audio,
response_format="text"
)
transcription = self.client.audio.transcriptions.create(model=self.model_name, file=audio, response_format="text")
return transcription.text.strip(), num_tokens_from_string(transcription.text.strip())
def audio2base64(self, audio):
@ -46,6 +44,8 @@ class Base(ABC):
class GPTSeq2txt(Base):
_FACTORY_NAME = "OpenAI"
def __init__(self, key, model_name="whisper-1", base_url="https://api.openai.com/v1"):
if not base_url:
base_url = "https://api.openai.com/v1"
@ -54,31 +54,34 @@ class GPTSeq2txt(Base):
class QWenSeq2txt(Base):
_FACTORY_NAME = "Tongyi-Qianwen"
def __init__(self, key, model_name="paraformer-realtime-8k-v1", **kwargs):
import dashscope
dashscope.api_key = key
self.model_name = model_name
def transcription(self, audio, format):
from http import HTTPStatus
from dashscope.audio.asr import Recognition
recognition = Recognition(model=self.model_name,
format=format,
sample_rate=16000,
callback=None)
recognition = Recognition(model=self.model_name, format=format, sample_rate=16000, callback=None)
result = recognition.call(audio)
ans = ""
if result.status_code == HTTPStatus.OK:
for sentence in result.get_sentence():
ans += sentence.text.decode('utf-8') + '\n'
ans += sentence.text.decode("utf-8") + "\n"
return ans, num_tokens_from_string(ans)
return "**ERROR**: " + result.message, 0
class AzureSeq2txt(Base):
_FACTORY_NAME = "Azure-OpenAI"
def __init__(self, key, model_name, lang="Chinese", **kwargs):
self.client = AzureOpenAI(api_key=key, azure_endpoint=kwargs["base_url"], api_version="2024-02-01")
self.model_name = model_name
@ -86,43 +89,33 @@ class AzureSeq2txt(Base):
class XinferenceSeq2txt(Base):
_FACTORY_NAME = "Xinference"
def __init__(self, key, model_name="whisper-small", **kwargs):
self.base_url = kwargs.get('base_url', None)
self.base_url = kwargs.get("base_url", None)
self.model_name = model_name
self.key = key
def transcription(self, audio, language="zh", prompt=None, response_format="json", temperature=0.7):
if isinstance(audio, str):
audio_file = open(audio, 'rb')
audio_file = open(audio, "rb")
audio_data = audio_file.read()
audio_file_name = audio.split("/")[-1]
else:
audio_data = audio
audio_file_name = "audio.wav"
payload = {
"model": self.model_name,
"language": language,
"prompt": prompt,
"response_format": response_format,
"temperature": temperature
}
payload = {"model": self.model_name, "language": language, "prompt": prompt, "response_format": response_format, "temperature": temperature}
files = {
"file": (audio_file_name, audio_data, 'audio/wav')
}
files = {"file": (audio_file_name, audio_data, "audio/wav")}
try:
response = requests.post(
f"{self.base_url}/v1/audio/transcriptions",
files=files,
data=payload
)
response = requests.post(f"{self.base_url}/v1/audio/transcriptions", files=files, data=payload)
response.raise_for_status()
result = response.json()
if 'text' in result:
transcription_text = result['text'].strip()
if "text" in result:
transcription_text = result["text"].strip()
return transcription_text, num_tokens_from_string(transcription_text)
else:
return "**ERROR**: Failed to retrieve transcription.", 0
@ -132,11 +125,11 @@ class XinferenceSeq2txt(Base):
class TencentCloudSeq2txt(Base):
def __init__(
self, key, model_name="16k_zh", base_url="https://asr.tencentcloudapi.com"
):
from tencentcloud.common import credential
_FACTORY_NAME = "Tencent Cloud"
def __init__(self, key, model_name="16k_zh", base_url="https://asr.tencentcloudapi.com"):
from tencentcloud.asr.v20190614 import asr_client
from tencentcloud.common import credential
key = json.loads(key)
sid = key.get("tencent_cloud_sid", "")
@ -146,11 +139,12 @@ class TencentCloudSeq2txt(Base):
self.model_name = model_name
def transcription(self, audio, max_retries=60, retry_interval=5):
import time
from tencentcloud.asr.v20190614 import models
from tencentcloud.common.exception.tencent_cloud_sdk_exception import (
TencentCloudSDKException,
)
from tencentcloud.asr.v20190614 import models
import time
b64 = self.audio2base64(audio)
try:
@ -174,9 +168,7 @@ class TencentCloudSeq2txt(Base):
while retries < max_retries:
resp = self.client.DescribeTaskStatus(req)
if resp.Data.StatusStr == "success":
text = re.sub(
r"\[\d+:\d+\.\d+,\d+:\d+\.\d+\]\s*", "", resp.Data.Result
).strip()
text = re.sub(r"\[\d+:\d+\.\d+,\d+:\d+\.\d+\]\s*", "", resp.Data.Result).strip()
return text, num_tokens_from_string(text)
elif resp.Data.StatusStr == "failed":
return (
@ -195,6 +187,8 @@ class TencentCloudSeq2txt(Base):
class GPUStackSeq2txt(Base):
_FACTORY_NAME = "GPUStack"
def __init__(self, key, model_name, base_url):
if not base_url:
raise ValueError("url cannot be None")
@ -206,8 +200,11 @@ class GPUStackSeq2txt(Base):
class GiteeSeq2txt(Base):
_FACTORY_NAME = "GiteeAI"
def __init__(self, key, model_name="whisper-1", base_url="https://ai.gitee.com/v1/"):
if not base_url:
base_url = "https://ai.gitee.com/v1/"
self.client = OpenAI(api_key=key, base_url=base_url)
self.model_name = model_name
self.model_name = model_name