From fa38aed01b8c4c72958973d4971690fa39983b49 Mon Sep 17 00:00:00 2001 From: Billy Bao Date: Thu, 30 Oct 2025 19:00:53 +0800 Subject: [PATCH] Fix: the input length exceeds the context length (#10895) ### What problem does this PR solve? Fix: the input length exceeds the context length #10750 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- api/apps/llm_app.py | 2 +- api/db/services/llm_service.py | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/api/apps/llm_app.py b/api/apps/llm_app.py index 19b25325f..3ba56325e 100644 --- a/api/apps/llm_app.py +++ b/api/apps/llm_app.py @@ -215,7 +215,7 @@ def add_llm(): mdl = EmbeddingModel[factory]( key=llm['api_key'], model_name=mdl_nm, - base_url=llm["api_base"]) + base_url=llm["api_base"]) try: arr, tc = mdl.encode(["Test if the api key is available"]) if len(arr[0]) == 0: diff --git a/api/db/services/llm_service.py b/api/db/services/llm_service.py index 5be4175fb..0befc6f6e 100644 --- a/api/db/services/llm_service.py +++ b/api/db/services/llm_service.py @@ -16,6 +16,7 @@ import inspect import logging import re +from rag.utils import num_tokens_from_string from functools import partial from typing import Generator from api.db.db_models import LLM @@ -79,9 +80,19 @@ class LLMBundle(LLM4Tenant): def encode(self, texts: list): if self.langfuse: - generation = self.langfuse.start_generation(trace_context=self.trace_context, name="encode", model=self.llm_name, input={"texts": texts}) + generation = self.langfuse.start_generation(trace_context=self.trace_context, name="encode", model=self.llm_name, input={"texts": texts}) + + safe_texts = [] + for text in texts: + token_size = num_tokens_from_string(text) + if token_size > self.max_length: + target_len = int(self.max_length * 0.95) + safe_texts.append(text[:target_len]) + else: + safe_texts.append(text) + + embeddings, used_tokens = self.mdl.encode(safe_texts) - embeddings, used_tokens = self.mdl.encode(texts) llm_name = getattr(self, "llm_name", None) if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name): logging.error("LLMBundle.encode can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))