diff --git a/docker/docker-compose-base.yml b/docker/docker-compose-base.yml index d2a142a82..98a4103bd 100644 --- a/docker/docker-compose-base.yml +++ b/docker/docker-compose-base.yml @@ -203,7 +203,7 @@ services: env_file: .env networks: - ragflow - command: ["--model-id", "/data/${TEI_MODEL}"] + command: ["--model-id", "/data/${TEI_MODEL}", "--auto-truncate"] restart: on-failure @@ -217,7 +217,7 @@ services: env_file: .env networks: - ragflow - command: ["--model-id", "/data/${TEI_MODEL}"] + command: ["--model-id", "/data/${TEI_MODEL}", "--auto-truncate"] deploy: resources: reservations: diff --git a/rag/llm/embedding_model.py b/rag/llm/embedding_model.py index b433815b3..10eba69d3 100644 --- a/rag/llm/embedding_model.py +++ b/rag/llm/embedding_model.py @@ -82,7 +82,7 @@ class BuiltinEmbed(Base): def encode(self, texts: list): batch_size = 16 - texts = [truncate(t, self._max_tokens) for t in texts] + # TEI is able to auto truncate inputs according to https://github.com/huggingface/text-embeddings-inference. token_count = 0 ress = None for i in range(0, len(texts), batch_size):