add support for Voyage AI (#2159)

### What problem does this PR solve?

#1853  #2138 add support for Voyage AI

### Type of change
- [x] New Feature (non-breaking change which adds functionality)

---------

Co-authored-by: Zhedong Cen <cenzhedong2@126.com>
Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
This commit is contained in:
黄腾
2024-08-29 16:14:49 +08:00
committed by GitHub
parent 15b78bd894
commit 99993e5026
9 changed files with 141 additions and 5 deletions

View File

@ -45,7 +45,8 @@ EmbeddingModel = {
"Upstage": UpstageEmbed,
"SILICONFLOW": SILICONFLOWEmbed,
"Replicate": ReplicateEmbed,
"BaiduYiyan": BaiduYiyanEmbed
"BaiduYiyan": BaiduYiyanEmbed,
"Voyage AI": VoyageEmbed
}
@ -105,7 +106,7 @@ ChatModel = {
"Tencent Hunyuan": HunyuanChat,
"XunFei Spark": SparkChat,
"BaiduYiyan": BaiduYiyanChat,
"Anthropic": AnthropicChat
"Anthropic": VoyageChat,
}
@ -120,7 +121,8 @@ RerankModel = {
"cohere": CoHereRerank,
"TogetherAI": TogetherAIRerank,
"SILICONFLOW": SILICONFLOWRerank,
"BaiduYiyan": BaiduYiyanRerank
"BaiduYiyan": BaiduYiyanRerank,
"Voyage AI": VoyageRerank
}

View File

@ -623,3 +623,24 @@ class BaiduYiyanEmbed(Base):
np.array([r["embedding"] for r in res["data"]]),
res["usage"]["total_tokens"],
)
class VoyageEmbed(Base):
def __init__(self, key, model_name, base_url=None):
import voyageai
self.client = voyageai.Client(api_key=key)
self.model_name = model_name
def encode(self, texts: list, batch_size=32):
res = self.client.embed(
texts=texts, model=self.model_name, input_type="document"
)
return np.array(res.embeddings), res.total_tokens
def encode_queries(self, text):
res = self.client.embed
res = self.client.embed(
texts=text, model=self.model_name, input_type="query"
)
return np.array(res.embeddings), res.total_tokens

View File

@ -311,3 +311,19 @@ class BaiduYiyanRerank(Base):
rank = np.array([d["relevance_score"] for d in res["results"]])
indexs = [d["index"] for d in res["results"]]
return rank[indexs], res["usage"]["total_tokens"]
class VoyageRerank(Base):
def __init__(self, key, model_name, base_url=None):
import voyageai
self.client = voyageai.Client(api_key=key)
self.model_name = model_name
def similarity(self, query: str, texts: list):
res = self.client.rerank(
query=query, documents=texts, model=self.model_name, top_k=len(texts)
)
rank = np.array([r.relevance_score for r in res.results])
indexs = [r.index for r in res.results]
return rank[indexs], res.total_tokens