Compare commits

..

4 Commits

Author SHA1 Message Date
38be53cf31 fix: prevent list index out of range in chat streaming (#10238)
### What problem does this PR solve?
issue:
[Bug]: ERROR: list index out of range #10188
change:
fix a potential list index out of range error in chat response parsing
by adding explicit checks for empty choices.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
2025-09-23 19:59:39 +08:00
65a06d62d8 Flow text processing bug (#10246)
### What problem does this PR solve?
@KevinHuSh 

Hello, my submission this morning did not fully resolve this issue.
After researching the knowledge, I have decided to delete the two lines
of regular expression processing that were added this morning.

```
remote 2 line
modify 1 line
```
I have mounted the following code in Docker compose and verified that it
will no longer report '\ m' errors

<img width="1050" height="447" alt="image"
src="https://github.com/user-attachments/assets/2aaf1b86-04ac-45ce-a2f1-052fed620e80"
/>

[my before pull](https://github.com/infiniflow/ragflow/pull/10211) 

<img width="1000" height="603" alt="image"
src="https://github.com/user-attachments/assets/fb3909ef-00ee-46c6-a26f-e64736777291"
/>

Thanks for your code Review

### Type of change

- [√ ] Bug Fix (non-breaking change which fixes an issue)

---------

Co-authored-by: mxc <mxc@example.com>
2025-09-23 19:59:13 +08:00
10cbbb76f8 revert gpt5 integration (#10228)
### What problem does this PR solve?

  Revert back to chat.completions.

### Type of change

- [ ] Bug Fix (non-breaking change which fixes an issue)
- [ ] New Feature (non-breaking change which adds functionality)
- [ ] Documentation Update
- [ ] Refactoring
- [ ] Performance Improvement
- [x] Other (please describe):
  Revert back to chat.completions.
2025-09-23 16:06:12 +08:00
1c84d1b562 Fix: azure OpenAI retry (#10213)
### What problem does this PR solve?

Currently, Azure OpenAI returns one minute Quota limit responses when
chat API is utilized. This change is needed in order to be able to
process almost any documents using models deployed in Azure Foundry.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
2025-09-23 12:19:28 +08:00
4 changed files with 59 additions and 36 deletions

View File

@ -90,9 +90,7 @@ class StringTransform(Message, ABC):
for k,v in kwargs.items(): for k,v in kwargs.items():
if not v: if not v:
v = "" v = ""
k = re.sub(r'\\m', 'm', k) script = re.sub(k, lambda match: v, script)
v = re.sub(r'\\m', 'm', v)
script = re.sub(k, v, script)
self.set_output("result", script) self.set_output("result", script)

View File

@ -182,7 +182,7 @@ def chat_completion_openai_like(tenant_id, chat_id):
stream = True stream = True
reference = True reference = True
completion = client.responses.create( completion = client.chat.completions.create(
model=model, model=model,
messages=[ messages=[
{"role": "system", "content": "You are a helpful assistant."}, {"role": "system", "content": "You are a helpful assistant."},

View File

@ -144,9 +144,9 @@ class Base(ABC):
if self.model_name.lower().find("qwen3") >= 0: if self.model_name.lower().find("qwen3") >= 0:
kwargs["extra_body"] = {"enable_thinking": False} kwargs["extra_body"] = {"enable_thinking": False}
response = self.client.responses.create(model=self.model_name, messages=history, **gen_conf, **kwargs) response = self.client.chat.completions.create(model=self.model_name, messages=history, **gen_conf, **kwargs)
if any([not response.choices, not response.choices[0].message, not response.choices[0].message.content]): if (not response.choices or not response.choices[0].message or not response.choices[0].message.content):
return "", 0 return "", 0
ans = response.choices[0].message.content.strip() ans = response.choices[0].message.content.strip()
if response.choices[0].finish_reason == "length": if response.choices[0].finish_reason == "length":
@ -158,9 +158,9 @@ class Base(ABC):
reasoning_start = False reasoning_start = False
if kwargs.get("stop") or "stop" in gen_conf: if kwargs.get("stop") or "stop" in gen_conf:
response = self.client.responses.create(model=self.model_name, messages=history, stream=True, **gen_conf, stop=kwargs.get("stop")) response = self.client.chat.completions.create(model=self.model_name, messages=history, stream=True, **gen_conf, stop=kwargs.get("stop"))
else: else:
response = self.client.responses.create(model=self.model_name, messages=history, stream=True, **gen_conf) response = self.client.chat.completions.create(model=self.model_name, messages=history, stream=True, **gen_conf)
for resp in response: for resp in response:
if not resp.choices: if not resp.choices:
@ -193,21 +193,30 @@ class Base(ABC):
return ans + LENGTH_NOTIFICATION_CN return ans + LENGTH_NOTIFICATION_CN
return ans + LENGTH_NOTIFICATION_EN return ans + LENGTH_NOTIFICATION_EN
def _exceptions(self, e, attempt): @property
def _retryable_errors(self) -> set[str]:
return {
LLMErrorCode.ERROR_RATE_LIMIT,
LLMErrorCode.ERROR_SERVER,
}
def _should_retry(self, error_code: str) -> bool:
return error_code in self._retryable_errors
def _exceptions(self, e, attempt) -> str | None:
logging.exception("OpenAI chat_with_tools") logging.exception("OpenAI chat_with_tools")
# Classify the error # Classify the error
error_code = self._classify_error(e) error_code = self._classify_error(e)
if attempt == self.max_retries: if attempt == self.max_retries:
error_code = LLMErrorCode.ERROR_MAX_RETRIES error_code = LLMErrorCode.ERROR_MAX_RETRIES
# Check if it's a rate limit error or server error and not the last attempt if self._should_retry(error_code):
should_retry = error_code == LLMErrorCode.ERROR_RATE_LIMIT or error_code == LLMErrorCode.ERROR_SERVER
if not should_retry:
return f"{ERROR_PREFIX}: {error_code} - {str(e)}"
delay = self._get_delay() delay = self._get_delay()
logging.warning(f"Error: {error_code}. Retrying in {delay:.2f} seconds... (Attempt {attempt + 1}/{self.max_retries})") logging.warning(f"Error: {error_code}. Retrying in {delay:.2f} seconds... (Attempt {attempt + 1}/{self.max_retries})")
time.sleep(delay) time.sleep(delay)
return None
return f"{ERROR_PREFIX}: {error_code} - {str(e)}"
def _verbose_tool_use(self, name, args, res): def _verbose_tool_use(self, name, args, res):
return "<tool_call>" + json.dumps({"name": name, "args": args, "result": res}, ensure_ascii=False, indent=2) + "</tool_call>" return "<tool_call>" + json.dumps({"name": name, "args": args, "result": res}, ensure_ascii=False, indent=2) + "</tool_call>"
@ -257,7 +266,7 @@ class Base(ABC):
try: try:
for _ in range(self.max_rounds + 1): for _ in range(self.max_rounds + 1):
logging.info(f"{self.tools=}") logging.info(f"{self.tools=}")
response = self.client.responses.create(model=self.model_name, messages=history, tools=self.tools, tool_choice="auto", **gen_conf) response = self.client.chat.completions.create(model=self.model_name, messages=history, tools=self.tools, tool_choice="auto", **gen_conf)
tk_count += self.total_token_count(response) tk_count += self.total_token_count(response)
if any([not response.choices, not response.choices[0].message]): if any([not response.choices, not response.choices[0].message]):
raise Exception(f"500 response structure error. Response: {response}") raise Exception(f"500 response structure error. Response: {response}")
@ -342,7 +351,7 @@ class Base(ABC):
for _ in range(self.max_rounds + 1): for _ in range(self.max_rounds + 1):
reasoning_start = False reasoning_start = False
logging.info(f"{tools=}") logging.info(f"{tools=}")
response = self.client.responses.create(model=self.model_name, messages=history, stream=True, tools=tools, tool_choice="auto", **gen_conf) response = self.client.chat.completions.create(model=self.model_name, messages=history, stream=True, tools=tools, tool_choice="auto", **gen_conf)
final_tool_calls = {} final_tool_calls = {}
answer = "" answer = ""
for resp in response: for resp in response:
@ -405,7 +414,7 @@ class Base(ABC):
logging.warning(f"Exceed max rounds: {self.max_rounds}") logging.warning(f"Exceed max rounds: {self.max_rounds}")
history.append({"role": "user", "content": f"Exceed max rounds: {self.max_rounds}"}) history.append({"role": "user", "content": f"Exceed max rounds: {self.max_rounds}"})
response = self.client.responses.create(model=self.model_name, messages=history, stream=True, **gen_conf) response = self.client.chat.completions.create(model=self.model_name, messages=history, stream=True, **gen_conf)
for resp in response: for resp in response:
if any([not resp.choices, not resp.choices[0].delta, not hasattr(resp.choices[0].delta, "content")]): if any([not resp.choices, not resp.choices[0].delta, not hasattr(resp.choices[0].delta, "content")]):
raise Exception("500 response structure error.") raise Exception("500 response structure error.")
@ -536,6 +545,14 @@ class AzureChat(Base):
self.client = AzureOpenAI(api_key=api_key, azure_endpoint=base_url, api_version=api_version) self.client = AzureOpenAI(api_key=api_key, azure_endpoint=base_url, api_version=api_version)
self.model_name = model_name self.model_name = model_name
@property
def _retryable_errors(self) -> set[str]:
return {
LLMErrorCode.ERROR_RATE_LIMIT,
LLMErrorCode.ERROR_SERVER,
LLMErrorCode.ERROR_QUOTA,
}
class BaiChuanChat(Base): class BaiChuanChat(Base):
_FACTORY_NAME = "BaiChuan" _FACTORY_NAME = "BaiChuan"
@ -559,7 +576,7 @@ class BaiChuanChat(Base):
} }
def _chat(self, history, gen_conf={}, **kwargs): def _chat(self, history, gen_conf={}, **kwargs):
response = self.client.responses.create( response = self.client.chat.completions.create(
model=self.model_name, model=self.model_name,
messages=history, messages=history,
extra_body={"tools": [{"type": "web_search", "web_search": {"enable": True, "search_mode": "performance_first"}}]}, extra_body={"tools": [{"type": "web_search", "web_search": {"enable": True, "search_mode": "performance_first"}}]},
@ -581,7 +598,7 @@ class BaiChuanChat(Base):
ans = "" ans = ""
total_tokens = 0 total_tokens = 0
try: try:
response = self.client.responses.create( response = self.client.chat.completions.create(
model=self.model_name, model=self.model_name,
messages=history, messages=history,
extra_body={"tools": [{"type": "web_search", "web_search": {"enable": True, "search_mode": "performance_first"}}]}, extra_body={"tools": [{"type": "web_search", "web_search": {"enable": True, "search_mode": "performance_first"}}]},
@ -651,7 +668,7 @@ class ZhipuChat(Base):
tk_count = 0 tk_count = 0
try: try:
logging.info(json.dumps(history, ensure_ascii=False, indent=2)) logging.info(json.dumps(history, ensure_ascii=False, indent=2))
response = self.client.responses.create(model=self.model_name, messages=history, stream=True, **gen_conf) response = self.client.chat.completions.create(model=self.model_name, messages=history, stream=True, **gen_conf)
for resp in response: for resp in response:
if not resp.choices[0].delta.content: if not resp.choices[0].delta.content:
continue continue
@ -1364,7 +1381,7 @@ class LiteLLMBase(ABC):
drop_params=True, drop_params=True,
timeout=self.timeout, timeout=self.timeout,
) )
# response = self.client.responses.create(model=self.model_name, messages=history, **gen_conf, **kwargs) # response = self.client.chat.completions.create(model=self.model_name, messages=history, **gen_conf, **kwargs)
if any([not response.choices, not response.choices[0].message, not response.choices[0].message.content]): if any([not response.choices, not response.choices[0].message, not response.choices[0].message.content]):
return "", 0 return "", 0
@ -1424,21 +1441,30 @@ class LiteLLMBase(ABC):
return ans + LENGTH_NOTIFICATION_CN return ans + LENGTH_NOTIFICATION_CN
return ans + LENGTH_NOTIFICATION_EN return ans + LENGTH_NOTIFICATION_EN
def _exceptions(self, e, attempt): @property
def _retryable_errors(self) -> set[str]:
return {
LLMErrorCode.ERROR_RATE_LIMIT,
LLMErrorCode.ERROR_SERVER,
}
def _should_retry(self, error_code: str) -> bool:
return error_code in self._retryable_errors
def _exceptions(self, e, attempt) -> str | None:
logging.exception("OpenAI chat_with_tools") logging.exception("OpenAI chat_with_tools")
# Classify the error # Classify the error
error_code = self._classify_error(e) error_code = self._classify_error(e)
if attempt == self.max_retries: if attempt == self.max_retries:
error_code = LLMErrorCode.ERROR_MAX_RETRIES error_code = LLMErrorCode.ERROR_MAX_RETRIES
# Check if it's a rate limit error or server error and not the last attempt if self._should_retry(error_code):
should_retry = error_code == LLMErrorCode.ERROR_RATE_LIMIT or error_code == LLMErrorCode.ERROR_SERVER
if not should_retry:
return f"{ERROR_PREFIX}: {error_code} - {str(e)}"
delay = self._get_delay() delay = self._get_delay()
logging.warning(f"Error: {error_code}. Retrying in {delay:.2f} seconds... (Attempt {attempt + 1}/{self.max_retries})") logging.warning(f"Error: {error_code}. Retrying in {delay:.2f} seconds... (Attempt {attempt + 1}/{self.max_retries})")
time.sleep(delay) time.sleep(delay)
return None
return f"{ERROR_PREFIX}: {error_code} - {str(e)}"
def _verbose_tool_use(self, name, args, res): def _verbose_tool_use(self, name, args, res):
return "<tool_call>" + json.dumps({"name": name, "args": args, "result": res}, ensure_ascii=False, indent=2) + "</tool_call>" return "<tool_call>" + json.dumps({"name": name, "args": args, "result": res}, ensure_ascii=False, indent=2) + "</tool_call>"

View File

@ -75,7 +75,7 @@ class Base(ABC):
def chat(self, system, history, gen_conf, images=[], **kwargs): def chat(self, system, history, gen_conf, images=[], **kwargs):
try: try:
response = self.client.responses.create( response = self.client.chat.completions.create(
model=self.model_name, model=self.model_name,
messages=self._form_history(system, history, images) messages=self._form_history(system, history, images)
) )
@ -87,7 +87,7 @@ class Base(ABC):
ans = "" ans = ""
tk_count = 0 tk_count = 0
try: try:
response = self.client.responses.create( response = self.client.chat.completions.create(
model=self.model_name, model=self.model_name,
messages=self._form_history(system, history, images), messages=self._form_history(system, history, images),
stream=True stream=True
@ -174,8 +174,7 @@ class GptV4(Base):
def describe(self, image): def describe(self, image):
b64 = self.image2base64(image) b64 = self.image2base64(image)
# Check if this is a GPT-5 model and use responses.create API res = self.client.chat.completions.create(
res = self.client.responses.create(
model=self.model_name, model=self.model_name,
messages=self.prompt(b64), messages=self.prompt(b64),
) )
@ -183,7 +182,7 @@ class GptV4(Base):
def describe_with_prompt(self, image, prompt=None): def describe_with_prompt(self, image, prompt=None):
b64 = self.image2base64(image) b64 = self.image2base64(image)
res = self.client.responses.create( res = self.client.chat.completions.create(
model=self.model_name, model=self.model_name,
messages=self.vision_llm_prompt(b64, prompt), messages=self.vision_llm_prompt(b64, prompt),
) )