Fix: fix error 429 api rate limit when building knowledge graph for all chat model and Mistral embedding model (#9106)

### What problem does this PR solve?

fix error 429 api rate limit when building knowledge graph for all chat
model and Mistral embedding model.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
謝富祥
2025-07-30 11:37:49 +08:00
committed by GitHub
parent e26f37351d
commit 021e8b57ae
2 changed files with 29 additions and 12 deletions

View File

@ -73,7 +73,7 @@ class Base(ABC):
def _get_delay(self): def _get_delay(self):
"""Calculate retry delay time""" """Calculate retry delay time"""
return self.base_delay + random.uniform(10, 150) return self.base_delay + random.uniform(60, 150)
def _classify_error(self, error): def _classify_error(self, error):
"""Classify error based on error message content""" """Classify error based on error message content"""

View File

@ -463,25 +463,42 @@ class MistralEmbed(Base):
self.model_name = model_name self.model_name = model_name
def encode(self, texts: list): def encode(self, texts: list):
import time
import random
texts = [truncate(t, 8196) for t in texts] texts = [truncate(t, 8196) for t in texts]
batch_size = 16 batch_size = 16
ress = [] ress = []
token_count = 0 token_count = 0
for i in range(0, len(texts), batch_size): for i in range(0, len(texts), batch_size):
res = self.client.embeddings(input=texts[i : i + batch_size], model=self.model_name) retry_max = 5
try: while retry_max > 0:
ress.extend([d.embedding for d in res.data]) try:
token_count += self.total_token_count(res) res = self.client.embeddings(input=texts[i : i + batch_size], model=self.model_name)
except Exception as _e: ress.extend([d.embedding for d in res.data])
log_exception(_e, res) token_count += self.total_token_count(res)
break
except Exception as _e:
if retry_max == 1:
log_exception(_e)
delay = random.uniform(20, 60)
time.sleep(delay)
retry_max -= 1
return np.array(ress), token_count return np.array(ress), token_count
def encode_queries(self, text): def encode_queries(self, text):
res = self.client.embeddings(input=[truncate(text, 8196)], model=self.model_name) import time
try: import random
return np.array(res.data[0].embedding), self.total_token_count(res) retry_max = 5
except Exception as _e: while retry_max > 0:
log_exception(_e, res) try:
res = self.client.embeddings(input=[truncate(text, 8196)], model=self.model_name)
return np.array(res.data[0].embedding), self.total_token_count(res)
except Exception as _e:
if retry_max == 1:
log_exception(_e)
delay = random.randint(20, 60)
time.sleep(delay)
retry_max -= 1
class BedrockEmbed(Base): class BedrockEmbed(Base):