Fix: the input length exceeds the context length (#10895)

### What problem does this PR solve? Fix: the input length exceeds the context length #10750 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
2026-01-30 07:06:39 +08:00 · 2025-10-30 19:00:53 +08:00
parent ab52ffc9c0
commit fa38aed01b
2 changed files with 14 additions and 3 deletions
--- a/api/apps/llm_app.py
+++ b/api/apps/llm_app.py
@ -215,7 +215,7 @@ def add_llm():
        mdl = EmbeddingModel[factory](
            key=llm['api_key'],
            model_name=mdl_nm,
-            base_url=llm["api_base"])
+            base_url=llm["api_base"])   
        try:
            arr, tc = mdl.encode(["Test if the api key is available"])
            if len(arr[0]) == 0:
--- a/api/db/services/llm_service.py
+++ b/api/db/services/llm_service.py
@ -16,6 +16,7 @@
 import inspect
 import logging
 import re
+from rag.utils import num_tokens_from_string
 from functools import partial
 from typing import Generator
 from api.db.db_models import LLM
@ -79,9 +80,19 @@ class LLMBundle(LLM4Tenant):

    def encode(self, texts: list):
        if self.langfuse:
-            generation = self.langfuse.start_generation(trace_context=self.trace_context, name="encode", model=self.llm_name, input={"texts": texts})
+            generation = self.langfuse.start_generation(trace_context=self.trace_context, name="encode", model=self.llm_name, input={"texts": texts})        
+    
+        safe_texts = []
+        for text in texts:
+            token_size = num_tokens_from_string(text)
+            if token_size > self.max_length:
+                target_len = int(self.max_length * 0.95)
+                safe_texts.append(text[:target_len])
+            else:
+                safe_texts.append(text)
+                
+        embeddings, used_tokens = self.mdl.encode(safe_texts)

-        embeddings, used_tokens = self.mdl.encode(texts)
        llm_name = getattr(self, "llm_name", None)
        if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name):
            logging.error("LLMBundle.encode can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))