From 10cbbb76f8c2a6ae76599746e9fdfded06905753 Mon Sep 17 00:00:00 2001 From: Billy Bao Date: Tue, 23 Sep 2025 16:06:12 +0800 Subject: [PATCH] revert gpt5 integration (#10228) ### What problem does this PR solve? Revert back to chat.completions. ### Type of change - [ ] Bug Fix (non-breaking change which fixes an issue) - [ ] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [x] Other (please describe): Revert back to chat.completions. --- api/apps/sdk/session.py | 2 +- rag/llm/chat_model.py | 20 ++++++++++---------- rag/llm/cv_model.py | 9 ++++----- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/api/apps/sdk/session.py b/api/apps/sdk/session.py index dc15c32d9..10b6e9752 100644 --- a/api/apps/sdk/session.py +++ b/api/apps/sdk/session.py @@ -182,7 +182,7 @@ def chat_completion_openai_like(tenant_id, chat_id): stream = True reference = True - completion = client.responses.create( + completion = client.chat.completions.create( model=model, messages=[ {"role": "system", "content": "You are a helpful assistant."}, diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index 428d8542d..d0b422215 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -144,7 +144,7 @@ class Base(ABC): if self.model_name.lower().find("qwen3") >= 0: kwargs["extra_body"] = {"enable_thinking": False} - response = self.client.responses.create(model=self.model_name, messages=history, **gen_conf, **kwargs) + response = self.client.chat.completions.create(model=self.model_name, messages=history, **gen_conf, **kwargs) if any([not response.choices, not response.choices[0].message, not response.choices[0].message.content]): return "", 0 @@ -158,9 +158,9 @@ class Base(ABC): reasoning_start = False if kwargs.get("stop") or "stop" in gen_conf: - response = self.client.responses.create(model=self.model_name, messages=history, stream=True, **gen_conf, stop=kwargs.get("stop")) + response = self.client.chat.completions.create(model=self.model_name, messages=history, stream=True, **gen_conf, stop=kwargs.get("stop")) else: - response = self.client.responses.create(model=self.model_name, messages=history, stream=True, **gen_conf) + response = self.client.chat.completions.create(model=self.model_name, messages=history, stream=True, **gen_conf) for resp in response: if not resp.choices: @@ -266,7 +266,7 @@ class Base(ABC): try: for _ in range(self.max_rounds + 1): logging.info(f"{self.tools=}") - response = self.client.responses.create(model=self.model_name, messages=history, tools=self.tools, tool_choice="auto", **gen_conf) + response = self.client.chat.completions.create(model=self.model_name, messages=history, tools=self.tools, tool_choice="auto", **gen_conf) tk_count += self.total_token_count(response) if any([not response.choices, not response.choices[0].message]): raise Exception(f"500 response structure error. Response: {response}") @@ -351,7 +351,7 @@ class Base(ABC): for _ in range(self.max_rounds + 1): reasoning_start = False logging.info(f"{tools=}") - response = self.client.responses.create(model=self.model_name, messages=history, stream=True, tools=tools, tool_choice="auto", **gen_conf) + response = self.client.chat.completions.create(model=self.model_name, messages=history, stream=True, tools=tools, tool_choice="auto", **gen_conf) final_tool_calls = {} answer = "" for resp in response: @@ -414,7 +414,7 @@ class Base(ABC): logging.warning(f"Exceed max rounds: {self.max_rounds}") history.append({"role": "user", "content": f"Exceed max rounds: {self.max_rounds}"}) - response = self.client.responses.create(model=self.model_name, messages=history, stream=True, **gen_conf) + response = self.client.chat.completions.create(model=self.model_name, messages=history, stream=True, **gen_conf) for resp in response: if any([not resp.choices, not resp.choices[0].delta, not hasattr(resp.choices[0].delta, "content")]): raise Exception("500 response structure error.") @@ -576,7 +576,7 @@ class BaiChuanChat(Base): } def _chat(self, history, gen_conf={}, **kwargs): - response = self.client.responses.create( + response = self.client.chat.completions.create( model=self.model_name, messages=history, extra_body={"tools": [{"type": "web_search", "web_search": {"enable": True, "search_mode": "performance_first"}}]}, @@ -598,7 +598,7 @@ class BaiChuanChat(Base): ans = "" total_tokens = 0 try: - response = self.client.responses.create( + response = self.client.chat.completions.create( model=self.model_name, messages=history, extra_body={"tools": [{"type": "web_search", "web_search": {"enable": True, "search_mode": "performance_first"}}]}, @@ -668,7 +668,7 @@ class ZhipuChat(Base): tk_count = 0 try: logging.info(json.dumps(history, ensure_ascii=False, indent=2)) - response = self.client.responses.create(model=self.model_name, messages=history, stream=True, **gen_conf) + response = self.client.chat.completions.create(model=self.model_name, messages=history, stream=True, **gen_conf) for resp in response: if not resp.choices[0].delta.content: continue @@ -1381,7 +1381,7 @@ class LiteLLMBase(ABC): drop_params=True, timeout=self.timeout, ) - # response = self.client.responses.create(model=self.model_name, messages=history, **gen_conf, **kwargs) + # response = self.client.chat.completions.create(model=self.model_name, messages=history, **gen_conf, **kwargs) if any([not response.choices, not response.choices[0].message, not response.choices[0].message.content]): return "", 0 diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py index c14b9d8d4..0a1559319 100644 --- a/rag/llm/cv_model.py +++ b/rag/llm/cv_model.py @@ -75,7 +75,7 @@ class Base(ABC): def chat(self, system, history, gen_conf, images=[], **kwargs): try: - response = self.client.responses.create( + response = self.client.chat.completions.create( model=self.model_name, messages=self._form_history(system, history, images) ) @@ -87,7 +87,7 @@ class Base(ABC): ans = "" tk_count = 0 try: - response = self.client.responses.create( + response = self.client.chat.completions.create( model=self.model_name, messages=self._form_history(system, history, images), stream=True @@ -174,8 +174,7 @@ class GptV4(Base): def describe(self, image): b64 = self.image2base64(image) - # Check if this is a GPT-5 model and use responses.create API - res = self.client.responses.create( + res = self.client.chat.completions.create( model=self.model_name, messages=self.prompt(b64), ) @@ -183,7 +182,7 @@ class GptV4(Base): def describe_with_prompt(self, image, prompt=None): b64 = self.image2base64(image) - res = self.client.responses.create( + res = self.client.chat.completions.create( model=self.model_name, messages=self.vision_llm_prompt(b64, prompt), )