mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Fix: azure OpenAI retry (#10213)
### What problem does this PR solve? Currently, Azure OpenAI returns one minute Quota limit responses when chat API is utilized. This change is needed in order to be able to process almost any documents using models deployed in Azure Foundry. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -193,21 +193,30 @@ class Base(ABC):
|
|||||||
return ans + LENGTH_NOTIFICATION_CN
|
return ans + LENGTH_NOTIFICATION_CN
|
||||||
return ans + LENGTH_NOTIFICATION_EN
|
return ans + LENGTH_NOTIFICATION_EN
|
||||||
|
|
||||||
def _exceptions(self, e, attempt):
|
@property
|
||||||
|
def _retryable_errors(self) -> set[str]:
|
||||||
|
return {
|
||||||
|
LLMErrorCode.ERROR_RATE_LIMIT,
|
||||||
|
LLMErrorCode.ERROR_SERVER,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _should_retry(self, error_code: str) -> bool:
|
||||||
|
return error_code in self._retryable_errors
|
||||||
|
|
||||||
|
def _exceptions(self, e, attempt) -> str | None:
|
||||||
logging.exception("OpenAI chat_with_tools")
|
logging.exception("OpenAI chat_with_tools")
|
||||||
# Classify the error
|
# Classify the error
|
||||||
error_code = self._classify_error(e)
|
error_code = self._classify_error(e)
|
||||||
if attempt == self.max_retries:
|
if attempt == self.max_retries:
|
||||||
error_code = LLMErrorCode.ERROR_MAX_RETRIES
|
error_code = LLMErrorCode.ERROR_MAX_RETRIES
|
||||||
|
|
||||||
# Check if it's a rate limit error or server error and not the last attempt
|
if self._should_retry(error_code):
|
||||||
should_retry = error_code == LLMErrorCode.ERROR_RATE_LIMIT or error_code == LLMErrorCode.ERROR_SERVER
|
|
||||||
if not should_retry:
|
|
||||||
return f"{ERROR_PREFIX}: {error_code} - {str(e)}"
|
|
||||||
|
|
||||||
delay = self._get_delay()
|
delay = self._get_delay()
|
||||||
logging.warning(f"Error: {error_code}. Retrying in {delay:.2f} seconds... (Attempt {attempt + 1}/{self.max_retries})")
|
logging.warning(f"Error: {error_code}. Retrying in {delay:.2f} seconds... (Attempt {attempt + 1}/{self.max_retries})")
|
||||||
time.sleep(delay)
|
time.sleep(delay)
|
||||||
|
return None
|
||||||
|
|
||||||
|
return f"{ERROR_PREFIX}: {error_code} - {str(e)}"
|
||||||
|
|
||||||
def _verbose_tool_use(self, name, args, res):
|
def _verbose_tool_use(self, name, args, res):
|
||||||
return "<tool_call>" + json.dumps({"name": name, "args": args, "result": res}, ensure_ascii=False, indent=2) + "</tool_call>"
|
return "<tool_call>" + json.dumps({"name": name, "args": args, "result": res}, ensure_ascii=False, indent=2) + "</tool_call>"
|
||||||
@ -536,6 +545,14 @@ class AzureChat(Base):
|
|||||||
self.client = AzureOpenAI(api_key=api_key, azure_endpoint=base_url, api_version=api_version)
|
self.client = AzureOpenAI(api_key=api_key, azure_endpoint=base_url, api_version=api_version)
|
||||||
self.model_name = model_name
|
self.model_name = model_name
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _retryable_errors(self) -> set[str]:
|
||||||
|
return {
|
||||||
|
LLMErrorCode.ERROR_RATE_LIMIT,
|
||||||
|
LLMErrorCode.ERROR_SERVER,
|
||||||
|
LLMErrorCode.ERROR_QUOTA,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class BaiChuanChat(Base):
|
class BaiChuanChat(Base):
|
||||||
_FACTORY_NAME = "BaiChuan"
|
_FACTORY_NAME = "BaiChuan"
|
||||||
@ -1424,21 +1441,30 @@ class LiteLLMBase(ABC):
|
|||||||
return ans + LENGTH_NOTIFICATION_CN
|
return ans + LENGTH_NOTIFICATION_CN
|
||||||
return ans + LENGTH_NOTIFICATION_EN
|
return ans + LENGTH_NOTIFICATION_EN
|
||||||
|
|
||||||
def _exceptions(self, e, attempt):
|
@property
|
||||||
|
def _retryable_errors(self) -> set[str]:
|
||||||
|
return {
|
||||||
|
LLMErrorCode.ERROR_RATE_LIMIT,
|
||||||
|
LLMErrorCode.ERROR_SERVER,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _should_retry(self, error_code: str) -> bool:
|
||||||
|
return error_code in self._retryable_errors
|
||||||
|
|
||||||
|
def _exceptions(self, e, attempt) -> str | None:
|
||||||
logging.exception("OpenAI chat_with_tools")
|
logging.exception("OpenAI chat_with_tools")
|
||||||
# Classify the error
|
# Classify the error
|
||||||
error_code = self._classify_error(e)
|
error_code = self._classify_error(e)
|
||||||
if attempt == self.max_retries:
|
if attempt == self.max_retries:
|
||||||
error_code = LLMErrorCode.ERROR_MAX_RETRIES
|
error_code = LLMErrorCode.ERROR_MAX_RETRIES
|
||||||
|
|
||||||
# Check if it's a rate limit error or server error and not the last attempt
|
if self._should_retry(error_code):
|
||||||
should_retry = error_code == LLMErrorCode.ERROR_RATE_LIMIT or error_code == LLMErrorCode.ERROR_SERVER
|
|
||||||
if not should_retry:
|
|
||||||
return f"{ERROR_PREFIX}: {error_code} - {str(e)}"
|
|
||||||
|
|
||||||
delay = self._get_delay()
|
delay = self._get_delay()
|
||||||
logging.warning(f"Error: {error_code}. Retrying in {delay:.2f} seconds... (Attempt {attempt + 1}/{self.max_retries})")
|
logging.warning(f"Error: {error_code}. Retrying in {delay:.2f} seconds... (Attempt {attempt + 1}/{self.max_retries})")
|
||||||
time.sleep(delay)
|
time.sleep(delay)
|
||||||
|
return None
|
||||||
|
|
||||||
|
return f"{ERROR_PREFIX}: {error_code} - {str(e)}"
|
||||||
|
|
||||||
def _verbose_tool_use(self, name, args, res):
|
def _verbose_tool_use(self, name, args, res):
|
||||||
return "<tool_call>" + json.dumps({"name": name, "args": args, "result": res}, ensure_ascii=False, indent=2) + "</tool_call>"
|
return "<tool_call>" + json.dumps({"name": name, "args": args, "result": res}, ensure_ascii=False, indent=2) + "</tool_call>"
|
||||||
|
|||||||
Reference in New Issue
Block a user