mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Fix: the input length exceeds the context length (#10895)
### What problem does this PR solve? Fix: the input length exceeds the context length #10750 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -215,7 +215,7 @@ def add_llm():
|
|||||||
mdl = EmbeddingModel[factory](
|
mdl = EmbeddingModel[factory](
|
||||||
key=llm['api_key'],
|
key=llm['api_key'],
|
||||||
model_name=mdl_nm,
|
model_name=mdl_nm,
|
||||||
base_url=llm["api_base"])
|
base_url=llm["api_base"])
|
||||||
try:
|
try:
|
||||||
arr, tc = mdl.encode(["Test if the api key is available"])
|
arr, tc = mdl.encode(["Test if the api key is available"])
|
||||||
if len(arr[0]) == 0:
|
if len(arr[0]) == 0:
|
||||||
|
|||||||
@ -16,6 +16,7 @@
|
|||||||
import inspect
|
import inspect
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
|
from rag.utils import num_tokens_from_string
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from typing import Generator
|
from typing import Generator
|
||||||
from api.db.db_models import LLM
|
from api.db.db_models import LLM
|
||||||
@ -79,9 +80,19 @@ class LLMBundle(LLM4Tenant):
|
|||||||
|
|
||||||
def encode(self, texts: list):
|
def encode(self, texts: list):
|
||||||
if self.langfuse:
|
if self.langfuse:
|
||||||
generation = self.langfuse.start_generation(trace_context=self.trace_context, name="encode", model=self.llm_name, input={"texts": texts})
|
generation = self.langfuse.start_generation(trace_context=self.trace_context, name="encode", model=self.llm_name, input={"texts": texts})
|
||||||
|
|
||||||
|
safe_texts = []
|
||||||
|
for text in texts:
|
||||||
|
token_size = num_tokens_from_string(text)
|
||||||
|
if token_size > self.max_length:
|
||||||
|
target_len = int(self.max_length * 0.95)
|
||||||
|
safe_texts.append(text[:target_len])
|
||||||
|
else:
|
||||||
|
safe_texts.append(text)
|
||||||
|
|
||||||
|
embeddings, used_tokens = self.mdl.encode(safe_texts)
|
||||||
|
|
||||||
embeddings, used_tokens = self.mdl.encode(texts)
|
|
||||||
llm_name = getattr(self, "llm_name", None)
|
llm_name = getattr(self, "llm_name", None)
|
||||||
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name):
|
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name):
|
||||||
logging.error("LLMBundle.encode can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
|
logging.error("LLMBundle.encode can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
|
||||||
|
|||||||
Reference in New Issue
Block a user