add support for cohere (#1849)

### What problem does this PR solve?

_Briefly describe what this PR aims to solve. Include background context
that will help reviewers understand the purpose of the PR._

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

---------

Co-authored-by: Zhedong Cen <cenzhedong2@126.com>
This commit is contained in:
黄腾
2024-08-07 18:40:51 +08:00
committed by GitHub
parent 60428c4ad2
commit e34817c2a9
10 changed files with 260 additions and 6 deletions

View File

@ -522,4 +522,34 @@ class OpenAI_APIEmbed(OpenAIEmbed):
if base_url.split("/")[-1] != "v1":
base_url = os.path.join(base_url, "v1")
self.client = OpenAI(api_key=key, base_url=base_url)
self.model_name = model_name.split("___")[0]
self.model_name = model_name.split("___")[0]
class CoHereEmbed(Base):
def __init__(self, key, model_name, base_url=None):
from cohere import Client
self.client = Client(api_key=key)
self.model_name = model_name
def encode(self, texts: list, batch_size=32):
res = self.client.embed(
texts=texts,
model=self.model_name,
input_type="search_query",
embedding_types=["float"],
)
return np.array([d for d in res.embeddings.float]), int(
res.meta.billed_units.input_tokens
)
def encode_queries(self, text):
res = self.client.embed(
texts=[text],
model=self.model_name,
input_type="search_query",
embedding_types=["float"],
)
return np.array([d for d in res.embeddings.float]), int(
res.meta.billed_units.input_tokens
)