mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Don't release full image (#10654)
### What problem does this PR solve? Introduced gpu profile in .env Added Dockerfile_tei fix datrie Removed LIGHTEN flag ### Type of change - [x] Documentation Update - [x] Refactoring
This commit is contained in:
@ -14,9 +14,7 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import threading
|
||||
from abc import ABC
|
||||
from urllib.parse import urljoin
|
||||
@ -25,15 +23,14 @@ import dashscope
|
||||
import google.generativeai as genai
|
||||
import numpy as np
|
||||
import requests
|
||||
from huggingface_hub import snapshot_download
|
||||
from ollama import Client
|
||||
from openai import OpenAI
|
||||
from zhipuai import ZhipuAI
|
||||
|
||||
from api import settings
|
||||
from api.utils.file_utils import get_home_cache_dir
|
||||
from api.utils.log_utils import log_exception
|
||||
from rag.utils import num_tokens_from_string, truncate
|
||||
from api import settings
|
||||
import logging
|
||||
|
||||
|
||||
class Base(ABC):
|
||||
@ -63,71 +60,42 @@ class Base(ABC):
|
||||
return 0
|
||||
|
||||
|
||||
class DefaultEmbedding(Base):
|
||||
_FACTORY_NAME = "BAAI"
|
||||
class BuiltinEmbed(Base):
|
||||
_FACTORY_NAME = "Builtin"
|
||||
MAX_TOKENS = {"Qwen/Qwen3-Embedding-0.6B": 30000, "BAAI/bge-m3": 8000, "BAAI/bge-small-en-v1.5": 500}
|
||||
_model = None
|
||||
_model_name = ""
|
||||
_max_tokens = 500
|
||||
_model_lock = threading.Lock()
|
||||
|
||||
def __init__(self, key, model_name, **kwargs):
|
||||
"""
|
||||
If you have trouble downloading HuggingFace models, -_^ this might help!!
|
||||
|
||||
For Linux:
|
||||
export HF_ENDPOINT=https://hf-mirror.com
|
||||
|
||||
For Windows:
|
||||
Good luck
|
||||
^_-
|
||||
|
||||
"""
|
||||
if not settings.LIGHTEN:
|
||||
input_cuda_visible_devices = None
|
||||
with DefaultEmbedding._model_lock:
|
||||
import torch
|
||||
from FlagEmbedding import FlagModel
|
||||
|
||||
if "CUDA_VISIBLE_DEVICES" in os.environ:
|
||||
input_cuda_visible_devices = os.environ["CUDA_VISIBLE_DEVICES"]
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = "0" # handle some issues with multiple GPUs when initializing the model
|
||||
|
||||
if not DefaultEmbedding._model or model_name != DefaultEmbedding._model_name:
|
||||
try:
|
||||
DefaultEmbedding._model = FlagModel(
|
||||
os.path.join(get_home_cache_dir(), re.sub(r"^[a-zA-Z0-9]+/", "", model_name)),
|
||||
query_instruction_for_retrieval="为这个句子生成表示以用于检索相关文章:",
|
||||
use_fp16=torch.cuda.is_available(),
|
||||
)
|
||||
DefaultEmbedding._model_name = model_name
|
||||
except Exception:
|
||||
model_dir = snapshot_download(
|
||||
repo_id="BAAI/bge-large-zh-v1.5", local_dir=os.path.join(get_home_cache_dir(), re.sub(r"^[a-zA-Z0-9]+/", "", model_name)), local_dir_use_symlinks=False
|
||||
)
|
||||
DefaultEmbedding._model = FlagModel(model_dir, query_instruction_for_retrieval="为这个句子生成表示以用于检索相关文章:", use_fp16=torch.cuda.is_available())
|
||||
finally:
|
||||
if input_cuda_visible_devices:
|
||||
# restore CUDA_VISIBLE_DEVICES
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = input_cuda_visible_devices
|
||||
self._model = DefaultEmbedding._model
|
||||
self._model_name = DefaultEmbedding._model_name
|
||||
logging.info(f"Initialize BuiltinEmbed according to settings.EMBEDDING_CFG: {settings.EMBEDDING_CFG}")
|
||||
embedding_cfg = settings.EMBEDDING_CFG
|
||||
if not BuiltinEmbed._model and "tei-" in os.getenv("COMPOSE_PROFILES", ""):
|
||||
with BuiltinEmbed._model_lock:
|
||||
BuiltinEmbed._model_name = settings.EMBEDDING_MDL
|
||||
BuiltinEmbed._max_tokens = BuiltinEmbed.MAX_TOKENS.get(settings.EMBEDDING_MDL, 500)
|
||||
BuiltinEmbed._model = HuggingFaceEmbed(embedding_cfg["api_key"], settings.EMBEDDING_MDL, base_url=embedding_cfg["base_url"])
|
||||
self._model = BuiltinEmbed._model
|
||||
self._model_name = BuiltinEmbed._model_name
|
||||
self._max_tokens = BuiltinEmbed._max_tokens
|
||||
|
||||
def encode(self, texts: list):
|
||||
batch_size = 16
|
||||
texts = [truncate(t, 2048) for t in texts]
|
||||
texts = [truncate(t, self._max_tokens) for t in texts]
|
||||
token_count = 0
|
||||
for t in texts:
|
||||
token_count += num_tokens_from_string(t)
|
||||
ress = None
|
||||
for i in range(0, len(texts), batch_size):
|
||||
embeddings, token_count_delta = self._model.encode(texts[i : i + batch_size])
|
||||
token_count += token_count_delta
|
||||
if ress is None:
|
||||
ress = self._model.encode(texts[i : i + batch_size], convert_to_numpy=True)
|
||||
ress = embeddings
|
||||
else:
|
||||
ress = np.concatenate((ress, self._model.encode(texts[i : i + batch_size], convert_to_numpy=True)), axis=0)
|
||||
ress = np.concatenate((ress, embeddings), axis=0)
|
||||
return ress, token_count
|
||||
|
||||
def encode_queries(self, text: str):
|
||||
token_count = num_tokens_from_string(text)
|
||||
return self._model.encode_queries([text], convert_to_numpy=False)[0][0].cpu().numpy(), token_count
|
||||
return self._model.encode_queries(text)
|
||||
|
||||
|
||||
class OpenAIEmbed(Base):
|
||||
@ -326,51 +294,6 @@ class OllamaEmbed(Base):
|
||||
log_exception(_e, res)
|
||||
|
||||
|
||||
class FastEmbed(DefaultEmbedding):
|
||||
_FACTORY_NAME = "FastEmbed"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
key: str | None = None,
|
||||
model_name: str = "BAAI/bge-small-en-v1.5",
|
||||
cache_dir: str | None = None,
|
||||
threads: int | None = None,
|
||||
**kwargs,
|
||||
):
|
||||
if not settings.LIGHTEN:
|
||||
with FastEmbed._model_lock:
|
||||
from fastembed import TextEmbedding
|
||||
|
||||
if not DefaultEmbedding._model or model_name != DefaultEmbedding._model_name:
|
||||
try:
|
||||
DefaultEmbedding._model = TextEmbedding(model_name, cache_dir, threads, **kwargs)
|
||||
DefaultEmbedding._model_name = model_name
|
||||
except Exception:
|
||||
cache_dir = snapshot_download(
|
||||
repo_id="BAAI/bge-small-en-v1.5", local_dir=os.path.join(get_home_cache_dir(), re.sub(r"^[a-zA-Z0-9]+/", "", model_name)), local_dir_use_symlinks=False
|
||||
)
|
||||
DefaultEmbedding._model = TextEmbedding(model_name, cache_dir, threads, **kwargs)
|
||||
self._model = DefaultEmbedding._model
|
||||
self._model_name = model_name
|
||||
|
||||
def encode(self, texts: list):
|
||||
# Using the internal tokenizer to encode the texts and get the total
|
||||
# number of tokens
|
||||
encodings = self._model.model.tokenizer.encode_batch(texts)
|
||||
total_tokens = sum(len(e) for e in encodings)
|
||||
|
||||
embeddings = [e.tolist() for e in self._model.embed(texts, batch_size=16)]
|
||||
|
||||
return np.array(embeddings), total_tokens
|
||||
|
||||
def encode_queries(self, text: str):
|
||||
# Using the internal tokenizer to encode the texts and get the total
|
||||
# number of tokens
|
||||
encoding = self._model.model.tokenizer.encode(text)
|
||||
embedding = next(self._model.query_embed(text))
|
||||
return np.array(embedding), len(encoding.ids)
|
||||
|
||||
|
||||
class XinferenceEmbed(Base):
|
||||
_FACTORY_NAME = "Xinference"
|
||||
|
||||
@ -407,14 +330,7 @@ class YoudaoEmbed(Base):
|
||||
_client = None
|
||||
|
||||
def __init__(self, key=None, model_name="maidalun1020/bce-embedding-base_v1", **kwargs):
|
||||
if not settings.LIGHTEN and not YoudaoEmbed._client:
|
||||
from BCEmbedding import EmbeddingModel as qanthing
|
||||
|
||||
try:
|
||||
logging.info("LOADING BCE...")
|
||||
YoudaoEmbed._client = qanthing(model_name_or_path=os.path.join(get_home_cache_dir(), "bce-embedding-base_v1"))
|
||||
except Exception:
|
||||
YoudaoEmbed._client = qanthing(model_name_or_path=model_name.replace("maidalun1020", "InfiniFlow"))
|
||||
pass
|
||||
|
||||
def encode(self, texts: list):
|
||||
batch_size = 10
|
||||
@ -885,21 +801,18 @@ class HuggingFaceEmbed(Base):
|
||||
self.base_url = base_url or "http://127.0.0.1:8080"
|
||||
|
||||
def encode(self, texts: list):
|
||||
embeddings = []
|
||||
for text in texts:
|
||||
response = requests.post(f"{self.base_url}/embed", json={"inputs": text}, headers={"Content-Type": "application/json"})
|
||||
if response.status_code == 200:
|
||||
embedding = response.json()
|
||||
embeddings.append(embedding[0])
|
||||
else:
|
||||
raise Exception(f"Error: {response.status_code} - {response.text}")
|
||||
response = requests.post(f"{self.base_url}/embed", json={"inputs": texts}, headers={"Content-Type": "application/json"})
|
||||
if response.status_code == 200:
|
||||
embeddings = response.json()
|
||||
else:
|
||||
raise Exception(f"Error: {response.status_code} - {response.text}")
|
||||
return np.array(embeddings), sum([num_tokens_from_string(text) for text in texts])
|
||||
|
||||
def encode_queries(self, text):
|
||||
def encode_queries(self, text: str):
|
||||
response = requests.post(f"{self.base_url}/embed", json={"inputs": text}, headers={"Content-Type": "application/json"})
|
||||
if response.status_code == 200:
|
||||
embedding = response.json()
|
||||
return np.array(embedding[0]), num_tokens_from_string(text)
|
||||
embedding = response.json()[0]
|
||||
return np.array(embedding), num_tokens_from_string(text)
|
||||
else:
|
||||
raise Exception(f"Error: {response.status_code} - {response.text}")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user