Fix: token num exceed (#10046)

### What problem does this PR solve? fix text input exceed token num limit when using siliconflow's embedding model BAAI/bge-large-zh-v1.5 and BAAI/bge-large-en-v1.5, truncate before input. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
2026-02-05 18:15:06 +08:00 · 2025-09-11 12:02:12 +08:00
parent 179091b1a4
commit 3d39b96c6f
1 changed files with 5 additions and 1 deletions
--- a/rag/llm/embedding_model.py
+++ b/rag/llm/embedding_model.py
@ -751,6 +751,10 @@ class SILICONFLOWEmbed(Base):
        token_count = 0
        for i in range(0, len(texts), batch_size):
            texts_batch = texts[i : i + batch_size]
+            if self.model_name in ["BAAI/bge-large-zh-v1.5", "BAAI/bge-large-en-v1.5"]:
+                # limit 512, 340 is almost safe
+                texts_batch = [" " if not text.strip() else truncate(text, 340) for text in texts_batch]
+            else:
                texts_batch = [" " if not text.strip() else text for text in texts_batch]

            payload = {