Feat: add splitter (#10161)

### What problem does this PR solve?


### Type of change
- [x] New Feature (non-breaking change which adds functionality)

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: Lynn <lynn_inf@hotmail.com>
Co-authored-by: chanx <1243304602@qq.com>
Co-authored-by: balibabu <cike8899@users.noreply.github.com>
Co-authored-by: 纷繁下的无奈 <zhileihuang@126.com>
Co-authored-by: huangzl <huangzl@shinemo.com>
Co-authored-by: writinwaters <93570324+writinwaters@users.noreply.github.com>
Co-authored-by: Wilmer <33392318@qq.com>
Co-authored-by: Adrian Weidig <adrianweidig@gmx.net>
Co-authored-by: Zhichang Yu <yuzhichang@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: Yongteng Lei <yongtengrey@outlook.com>
Co-authored-by: Liu An <asiro@qq.com>
Co-authored-by: buua436 <66937541+buua436@users.noreply.github.com>
Co-authored-by: BadwomanCraZY <511528396@qq.com>
Co-authored-by: cucusenok <31804608+cucusenok@users.noreply.github.com>
Co-authored-by: Russell Valentine <russ@coldstonelabs.org>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Billy Bao <newyorkupperbay@gmail.com>
Co-authored-by: Zhedong Cen <cenzhedong2@126.com>
Co-authored-by: TensorNull <129579691+TensorNull@users.noreply.github.com>
Co-authored-by: TensorNull <tensor.null@gmail.com>
This commit is contained in:
Kevin Hu
2025-09-19 10:15:19 +08:00
committed by GitHub
parent f9c7404bee
commit a1b947ffd6
81 changed files with 3083 additions and 799 deletions

View File

@ -37,6 +37,18 @@ class SupportedLiteLLMProvider(StrEnum):
TogetherAI = "TogetherAI"
Anthropic = "Anthropic"
Ollama = "Ollama"
Meituan = "Meituan"
CometAPI = "CometAPI"
SILICONFLOW = "SILICONFLOW"
OpenRouter = "OpenRouter"
StepFun = "StepFun"
PPIO = "PPIO"
PerfXCloud = "PerfXCloud"
Upstage = "Upstage"
NovitaAI = "NovitaAI"
Lingyi_AI = "01.AI"
GiteeAI = "GiteeAI"
AI_302 = "302.AI"
FACTORY_DEFAULT_BASE_URL = {
@ -44,6 +56,18 @@ FACTORY_DEFAULT_BASE_URL = {
SupportedLiteLLMProvider.Dashscope: "https://dashscope.aliyuncs.com/compatible-mode/v1",
SupportedLiteLLMProvider.Moonshot: "https://api.moonshot.cn/v1",
SupportedLiteLLMProvider.Ollama: "",
SupportedLiteLLMProvider.Meituan: "https://api.longcat.chat/openai",
SupportedLiteLLMProvider.CometAPI: "https://api.cometapi.com/v1",
SupportedLiteLLMProvider.SILICONFLOW: "https://api.siliconflow.cn/v1",
SupportedLiteLLMProvider.OpenRouter: "https://openrouter.ai/api/v1",
SupportedLiteLLMProvider.StepFun: "https://api.stepfun.com/v1",
SupportedLiteLLMProvider.PPIO: "https://api.ppinfra.com/v3/openai",
SupportedLiteLLMProvider.PerfXCloud: "https://cloud.perfxlab.cn/v1",
SupportedLiteLLMProvider.Upstage: "https://api.upstage.ai/v1/solar",
SupportedLiteLLMProvider.NovitaAI: "https://api.novita.ai/v3/openai",
SupportedLiteLLMProvider.Lingyi_AI: "https://api.lingyiwanwu.com/v1",
SupportedLiteLLMProvider.GiteeAI: "https://ai.gitee.com/v1/",
SupportedLiteLLMProvider.AI_302: "https://api.302.ai/v1",
}
@ -62,6 +86,18 @@ LITELLM_PROVIDER_PREFIX = {
SupportedLiteLLMProvider.TogetherAI: "together_ai/",
SupportedLiteLLMProvider.Anthropic: "", # don't need a prefix
SupportedLiteLLMProvider.Ollama: "ollama_chat/",
SupportedLiteLLMProvider.Meituan: "openai/",
SupportedLiteLLMProvider.CometAPI: "openai/",
SupportedLiteLLMProvider.SILICONFLOW: "openai/",
SupportedLiteLLMProvider.OpenRouter: "openai/",
SupportedLiteLLMProvider.StepFun: "openai/",
SupportedLiteLLMProvider.PPIO: "openai/",
SupportedLiteLLMProvider.PerfXCloud: "openai/",
SupportedLiteLLMProvider.Upstage: "openai/",
SupportedLiteLLMProvider.NovitaAI: "openai/",
SupportedLiteLLMProvider.Lingyi_AI: "openai/",
SupportedLiteLLMProvider.GiteeAI: "openai/",
SupportedLiteLLMProvider.AI_302: "openai/",
}
ChatModel = globals().get("ChatModel", {})

View File

@ -895,25 +895,6 @@ class MistralChat(Base):
yield total_tokens
## openrouter
class OpenRouterChat(Base):
_FACTORY_NAME = "OpenRouter"
def __init__(self, key, model_name, base_url="https://openrouter.ai/api/v1", **kwargs):
if not base_url:
base_url = "https://openrouter.ai/api/v1"
super().__init__(key, model_name, base_url, **kwargs)
class StepFunChat(Base):
_FACTORY_NAME = "StepFun"
def __init__(self, key, model_name, base_url="https://api.stepfun.com/v1", **kwargs):
if not base_url:
base_url = "https://api.stepfun.com/v1"
super().__init__(key, model_name, base_url, **kwargs)
class LmStudioChat(Base):
_FACTORY_NAME = "LM-Studio"
@ -936,15 +917,6 @@ class OpenAI_APIChat(Base):
super().__init__(key, model_name, base_url, **kwargs)
class PPIOChat(Base):
_FACTORY_NAME = "PPIO"
def __init__(self, key, model_name, base_url="https://api.ppinfra.com/v3/openai", **kwargs):
if not base_url:
base_url = "https://api.ppinfra.com/v3/openai"
super().__init__(key, model_name, base_url, **kwargs)
class LeptonAIChat(Base):
_FACTORY_NAME = "LeptonAI"
@ -954,60 +926,6 @@ class LeptonAIChat(Base):
super().__init__(key, model_name, base_url, **kwargs)
class PerfXCloudChat(Base):
_FACTORY_NAME = "PerfXCloud"
def __init__(self, key, model_name, base_url="https://cloud.perfxlab.cn/v1", **kwargs):
if not base_url:
base_url = "https://cloud.perfxlab.cn/v1"
super().__init__(key, model_name, base_url, **kwargs)
class UpstageChat(Base):
_FACTORY_NAME = "Upstage"
def __init__(self, key, model_name, base_url="https://api.upstage.ai/v1/solar", **kwargs):
if not base_url:
base_url = "https://api.upstage.ai/v1/solar"
super().__init__(key, model_name, base_url, **kwargs)
class NovitaAIChat(Base):
_FACTORY_NAME = "NovitaAI"
def __init__(self, key, model_name, base_url="https://api.novita.ai/v3/openai", **kwargs):
if not base_url:
base_url = "https://api.novita.ai/v3/openai"
super().__init__(key, model_name, base_url, **kwargs)
class SILICONFLOWChat(Base):
_FACTORY_NAME = "SILICONFLOW"
def __init__(self, key, model_name, base_url="https://api.siliconflow.cn/v1", **kwargs):
if not base_url:
base_url = "https://api.siliconflow.cn/v1"
super().__init__(key, model_name, base_url, **kwargs)
class YiChat(Base):
_FACTORY_NAME = "01.AI"
def __init__(self, key, model_name, base_url="https://api.lingyiwanwu.com/v1", **kwargs):
if not base_url:
base_url = "https://api.lingyiwanwu.com/v1"
super().__init__(key, model_name, base_url, **kwargs)
class GiteeChat(Base):
_FACTORY_NAME = "GiteeAI"
def __init__(self, key, model_name, base_url="https://ai.gitee.com/v1/", **kwargs):
if not base_url:
base_url = "https://ai.gitee.com/v1/"
super().__init__(key, model_name, base_url, **kwargs)
class ReplicateChat(Base):
_FACTORY_NAME = "Replicate"
@ -1347,26 +1265,46 @@ class GPUStackChat(Base):
super().__init__(key, model_name, base_url, **kwargs)
class Ai302Chat(Base):
_FACTORY_NAME = "302.AI"
class TokenPonyChat(Base):
_FACTORY_NAME = "TokenPony"
def __init__(self, key, model_name, base_url="https://api.302.ai/v1", **kwargs):
def __init__(self, key, model_name, base_url="https://ragflow.vip-api.tokenpony.cn/v1", **kwargs):
if not base_url:
base_url = "https://api.302.ai/v1"
super().__init__(key, model_name, base_url, **kwargs)
class MeituanChat(Base):
_FACTORY_NAME = "Meituan"
def __init__(self, key, model_name, base_url="https://api.longcat.chat/openai", **kwargs):
if not base_url:
base_url = "https://api.longcat.chat/openai"
super().__init__(key, model_name, base_url, **kwargs)
base_url = "https://ragflow.vip-api.tokenpony.cn/v1"
class LiteLLMBase(ABC):
_FACTORY_NAME = ["Tongyi-Qianwen", "Bedrock", "Moonshot", "xAI", "DeepInfra", "Groq", "Cohere", "Gemini", "DeepSeek", "NVIDIA", "TogetherAI", "Anthropic", "Ollama"]
_FACTORY_NAME = [
"Tongyi-Qianwen",
"Bedrock",
"Moonshot",
"xAI",
"DeepInfra",
"Groq",
"Cohere",
"Gemini",
"DeepSeek",
"NVIDIA",
"TogetherAI",
"Anthropic",
"Ollama",
"Meituan",
"CometAPI",
"SILICONFLOW",
"OpenRouter",
"StepFun",
"PPIO",
"PerfXCloud",
"Upstage",
"NovitaAI",
"01.AI",
"GiteeAI",
"302.AI",
]
import litellm
litellm._turn_on_debug()
def __init__(self, key, model_name, base_url=None, **kwargs):
self.timeout = int(os.environ.get("LM_TIMEOUT_SECONDS", 600))
@ -1374,7 +1312,7 @@ class LiteLLMBase(ABC):
self.prefix = LITELLM_PROVIDER_PREFIX.get(self.provider, "")
self.model_name = f"{self.prefix}{model_name}"
self.api_key = key
self.base_url = (base_url or FACTORY_DEFAULT_BASE_URL.get(self.provider, "")).rstrip('/')
self.base_url = (base_url or FACTORY_DEFAULT_BASE_URL.get(self.provider, "")).rstrip("/")
# Configure retry parameters
self.max_retries = kwargs.get("max_retries", int(os.environ.get("LLM_MAX_RETRIES", 5)))
self.base_delay = kwargs.get("retry_interval", float(os.environ.get("LLM_BASE_DELAY", 2.0)))

View File

@ -86,9 +86,10 @@ class DefaultEmbedding(Base):
with DefaultEmbedding._model_lock:
import torch
from FlagEmbedding import FlagModel
if "CUDA_VISIBLE_DEVICES" in os.environ:
input_cuda_visible_devices = os.environ["CUDA_VISIBLE_DEVICES"]
os.environ["CUDA_VISIBLE_DEVICES"] = "0" # handle some issues with multiple GPUs when initializing the model
os.environ["CUDA_VISIBLE_DEVICES"] = "0" # handle some issues with multiple GPUs when initializing the model
if not DefaultEmbedding._model or model_name != DefaultEmbedding._model_name:
try:
@ -145,7 +146,7 @@ class OpenAIEmbed(Base):
ress = []
total_tokens = 0
for i in range(0, len(texts), batch_size):
res = self.client.embeddings.create(input=texts[i : i + batch_size], model=self.model_name, encoding_format="float")
res = self.client.embeddings.create(input=texts[i : i + batch_size], model=self.model_name, encoding_format="float", extra_body={"drop_params": True})
try:
ress.extend([d.embedding for d in res.data])
total_tokens += self.total_token_count(res)
@ -154,7 +155,7 @@ class OpenAIEmbed(Base):
return np.array(ress), total_tokens
def encode_queries(self, text):
res = self.client.embeddings.create(input=[truncate(text, 8191)], model=self.model_name, encoding_format="float")
res = self.client.embeddings.create(input=[truncate(text, 8191)], model=self.model_name, encoding_format="float",extra_body={"drop_params": True})
return np.array(res.data[0].embedding), self.total_token_count(res)
@ -472,6 +473,7 @@ class MistralEmbed(Base):
def encode(self, texts: list):
import time
import random
texts = [truncate(t, 8196) for t in texts]
batch_size = 16
ress = []
@ -495,6 +497,7 @@ class MistralEmbed(Base):
def encode_queries(self, text):
import time
import random
retry_max = 5
while retry_max > 0:
try:
@ -659,7 +662,7 @@ class OpenAI_APIEmbed(OpenAIEmbed):
def __init__(self, key, model_name, base_url):
if not base_url:
raise ValueError("url cannot be None")
base_url = urljoin(base_url, "v1")
#base_url = urljoin(base_url, "v1")
self.client = OpenAI(api_key=key, base_url=base_url)
self.model_name = model_name.split("___")[0]
@ -751,7 +754,11 @@ class SILICONFLOWEmbed(Base):
token_count = 0
for i in range(0, len(texts), batch_size):
texts_batch = texts[i : i + batch_size]
texts_batch = [" " if not text.strip() else text for text in texts_batch]
if self.model_name in ["BAAI/bge-large-zh-v1.5", "BAAI/bge-large-en-v1.5"]:
# limit 512, 340 is almost safe
texts_batch = [" " if not text.strip() else truncate(text, 340) for text in texts_batch]
else:
texts_batch = [" " if not text.strip() else text for text in texts_batch]
payload = {
"model": self.model_name,
@ -938,6 +945,7 @@ class GiteeEmbed(SILICONFLOWEmbed):
base_url = "https://ai.gitee.com/v1/embeddings"
super().__init__(key, model_name, base_url)
class DeepInfraEmbed(OpenAIEmbed):
_FACTORY_NAME = "DeepInfra"
@ -954,3 +962,12 @@ class Ai302Embed(Base):
if not base_url:
base_url = "https://api.302.ai/v1/embeddings"
super().__init__(key, model_name, base_url)
class CometEmbed(OpenAIEmbed):
_FACTORY_NAME = "CometAPI"
def __init__(self, key, model_name, base_url="https://api.cometapi.com/v1"):
if not base_url:
base_url = "https://api.cometapi.com/v1"
super().__init__(key, model_name, base_url)

View File

@ -218,7 +218,7 @@ class GPUStackSeq2txt(Base):
class GiteeSeq2txt(Base):
_FACTORY_NAME = "GiteeAI"
def __init__(self, key, model_name="whisper-1", base_url="https://ai.gitee.com/v1/"):
def __init__(self, key, model_name="whisper-1", base_url="https://ai.gitee.com/v1/", **kwargs):
if not base_url:
base_url = "https://ai.gitee.com/v1/"
self.client = OpenAI(api_key=key, base_url=base_url)
@ -234,3 +234,13 @@ class DeepInfraSeq2txt(Base):
self.client = OpenAI(api_key=key, base_url=base_url)
self.model_name = model_name
class CometSeq2txt(Base):
_FACTORY_NAME = "CometAPI"
def __init__(self, key, model_name="whisper-1", base_url="https://api.cometapi.com/v1", **kwargs):
if not base_url:
base_url = "https://api.cometapi.com/v1"
self.client = OpenAI(api_key=key, base_url=base_url)
self.model_name = model_name

View File

@ -394,3 +394,11 @@ class DeepInfraTTS(OpenAITTS):
if not base_url:
base_url = "https://api.deepinfra.com/v1/openai"
super().__init__(key, model_name, base_url, **kwargs)
class CometAPITTS(OpenAITTS):
_FACTORY_NAME = "CometAPI"
def __init__(self, key, model_name, base_url="https://api.cometapi.com/v1", **kwargs):
if not base_url:
base_url = "https://api.cometapi.com/v1"
super().__init__(key, model_name, base_url, **kwargs)