diff --git a/api/apps/llm_app.py b/api/apps/llm_app.py index 97bc487d7..e28f64704 100644 --- a/api/apps/llm_app.py +++ b/api/apps/llm_app.py @@ -348,7 +348,7 @@ def list_app(): facts = set([o.to_dict()["llm_factory"] for o in objs if o.api_key and o.status == StatusEnum.VALID.value]) status = {(o.llm_name + "@" + o.llm_factory) for o in objs if o.status == StatusEnum.VALID.value} llms = LLMService.get_all() - llms = [m.to_dict() for m in llms if m.status == StatusEnum.VALID.value and m.fid not in weighted and (m.llm_name + "@" + m.fid) in status] + llms = [m.to_dict() for m in llms if m.status == StatusEnum.VALID.value and m.fid not in weighted and (m.fid == 'Builtin' or (m.llm_name + "@" + m.fid) in status)] for m in llms: m["available"] = m["fid"] in facts or m["llm_name"].lower() == "flag-embedding" or m["fid"] in self_deployed if "tei-" in os.getenv("COMPOSE_PROFILES", "") and m["model_type"] == LLMType.EMBEDDING and m["fid"] == "Builtin" and m["llm_name"] == os.getenv("TEI_MODEL", ""): diff --git a/rag/llm/embedding_model.py b/rag/llm/embedding_model.py index 796734384..3f4070af6 100644 --- a/rag/llm/embedding_model.py +++ b/rag/llm/embedding_model.py @@ -31,6 +31,7 @@ from common.log_utils import log_exception from common.token_utils import num_tokens_from_string, truncate from common import settings import logging +import base64 class Base(ABC): @@ -377,6 +378,46 @@ class JinaEmbed(Base): return np.array(embds[0]), cnt +class JinaMultiVecEmbed(Base): + _FACTORY_NAME = "Jina" + + def __init__(self, key, model_name="jina-embeddings-v4", base_url="https://api.jina.ai/v1/embeddings"): + self.base_url = "https://api.jina.ai/v1/embeddings" + self.headers = {"Content-Type": "application/json", "Authorization": f"Bearer {key}"} + self.model_name = model_name + + def encode(self, texts: list[str|bytes], task="retrieval.passage"): + batch_size = 16 + ress = [] + token_count = 0 + input = [] + for text in texts: + if isinstance(text, str): + input.append({"text": text}) + elif isinstance(text, bytes): + img_b64s = None + try: + base64.b64decode(text, validate=True) + img_b64s = text.decode('utf8') + except Exception: + img_b64s = base64.b64encode(text).decode('utf8') + input.append({"image": img_b64s}) # base64 encoded image + for i in range(0, len(texts), batch_size): + data = {"model": self.model_name, "task": task, "truncate": True, "return_multivector": True, "input": input[i : i + batch_size]} + response = requests.post(self.base_url, headers=self.headers, json=data) + try: + res = response.json() + ress.extend([d["embeddings"] for d in res["data"]]) + token_count += self.total_token_count(res) + except Exception as _e: + log_exception(_e, response) + return np.array(ress), token_count + + def encode_queries(self, text): + embds, cnt = self.encode([text], task="retrieval.query") + return np.array(embds[0]), cnt + + class MistralEmbed(Base): _FACTORY_NAME = "Mistral" diff --git a/rag/utils/infinity_conn.py b/rag/utils/infinity_conn.py index 03251e72c..46d2c4054 100644 --- a/rag/utils/infinity_conn.py +++ b/rag/utils/infinity_conn.py @@ -156,6 +156,7 @@ class InfinityConnection(DocStoreConnection): msg = f"Infinity {infinity_uri} is unhealthy in 120s." logger.error(msg) raise Exception(msg) + self.column_vec_patt = re.compile(r"q_(?P\d+)_vec") logger.info(f"Infinity {infinity_uri} is healthy.") def _migrate_db(self, inf_conn): @@ -323,7 +324,8 @@ class InfinityConnection(DocStoreConnection): output.append(score_func) if PAGERANK_FLD not in output: output.append(PAGERANK_FLD) - output = [f for f in output if f != "_score"] + output = [f for f in output if f != "_score" and self.column_vec_patt.match(f) is None] + logger.info(f"infinity output fields: {output}") if limit <= 0: # ElasticSearch default limit is 10000 limit = 10000