mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Fix: token num exceed (#10046)
### What problem does this PR solve? fix text input exceed token num limit when using siliconflow's embedding model BAAI/bge-large-zh-v1.5 and BAAI/bge-large-en-v1.5, truncate before input. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -751,7 +751,11 @@ class SILICONFLOWEmbed(Base):
|
|||||||
token_count = 0
|
token_count = 0
|
||||||
for i in range(0, len(texts), batch_size):
|
for i in range(0, len(texts), batch_size):
|
||||||
texts_batch = texts[i : i + batch_size]
|
texts_batch = texts[i : i + batch_size]
|
||||||
texts_batch = [" " if not text.strip() else text for text in texts_batch]
|
if self.model_name in ["BAAI/bge-large-zh-v1.5", "BAAI/bge-large-en-v1.5"]:
|
||||||
|
# limit 512, 340 is almost safe
|
||||||
|
texts_batch = [" " if not text.strip() else truncate(text, 340) for text in texts_batch]
|
||||||
|
else:
|
||||||
|
texts_batch = [" " if not text.strip() else text for text in texts_batch]
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": self.model_name,
|
"model": self.model_name,
|
||||||
|
|||||||
Reference in New Issue
Block a user