From 10cbbb76f8c2a6ae76599746e9fdfded06905753 Mon Sep 17 00:00:00 2001
From: Billy Bao <newyorkupperbay@gmail.com>
Date: Tue, 23 Sep 2025 16:06:12 +0800
Subject: [PATCH] revert gpt5 integration (#10228)

### What problem does this PR solve?

  Revert back to chat.completions.

### Type of change

- [ ] Bug Fix (non-breaking change which fixes an issue)
- [ ] New Feature (non-breaking change which adds functionality)
- [ ] Documentation Update
- [ ] Refactoring
- [ ] Performance Improvement
- [x] Other (please describe):
  Revert back to chat.completions.
---
 api/apps/sdk/session.py |  2 +-
 rag/llm/chat_model.py   | 20 ++++++++++----------
 rag/llm/cv_model.py     |  9 ++++-----
 3 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/api/apps/sdk/session.py b/api/apps/sdk/session.py
index dc15c32d9..10b6e9752 100644
--- a/api/apps/sdk/session.py
+++ b/api/apps/sdk/session.py
@@ -182,7 +182,7 @@ def chat_completion_openai_like(tenant_id, chat_id):
     stream = True
     reference = True
 
-    completion = client.responses.create(
+    completion = client.chat.completions.create(
         model=model,
         messages=[
             {"role": "system", "content": "You are a helpful assistant."},
diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py
index 428d8542d..d0b422215 100644
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@@ -144,7 +144,7 @@ class Base(ABC):
         if self.model_name.lower().find("qwen3") >= 0:
             kwargs["extra_body"] = {"enable_thinking": False}
         
-        response = self.client.responses.create(model=self.model_name, messages=history, **gen_conf, **kwargs)
+        response = self.client.chat.completions.create(model=self.model_name, messages=history, **gen_conf, **kwargs)
 
         if any([not response.choices, not response.choices[0].message, not response.choices[0].message.content]):
             return "", 0
@@ -158,9 +158,9 @@ class Base(ABC):
         reasoning_start = False
         
         if kwargs.get("stop") or "stop" in gen_conf:
-            response = self.client.responses.create(model=self.model_name, messages=history, stream=True, **gen_conf, stop=kwargs.get("stop"))
+            response = self.client.chat.completions.create(model=self.model_name, messages=history, stream=True, **gen_conf, stop=kwargs.get("stop"))
         else:
-            response = self.client.responses.create(model=self.model_name, messages=history, stream=True, **gen_conf)
+            response = self.client.chat.completions.create(model=self.model_name, messages=history, stream=True, **gen_conf)
         
         for resp in response:
             if not resp.choices:
@@ -266,7 +266,7 @@ class Base(ABC):
             try:
                 for _ in range(self.max_rounds + 1):
                     logging.info(f"{self.tools=}")
-                    response = self.client.responses.create(model=self.model_name, messages=history, tools=self.tools, tool_choice="auto", **gen_conf)
+                    response = self.client.chat.completions.create(model=self.model_name, messages=history, tools=self.tools, tool_choice="auto", **gen_conf)
                     tk_count += self.total_token_count(response)
                     if any([not response.choices, not response.choices[0].message]):
                         raise Exception(f"500 response structure error. Response: {response}")
@@ -351,7 +351,7 @@ class Base(ABC):
                 for _ in range(self.max_rounds + 1):
                     reasoning_start = False
                     logging.info(f"{tools=}")
-                    response = self.client.responses.create(model=self.model_name, messages=history, stream=True, tools=tools, tool_choice="auto", **gen_conf)
+                    response = self.client.chat.completions.create(model=self.model_name, messages=history, stream=True, tools=tools, tool_choice="auto", **gen_conf)
                     final_tool_calls = {}
                     answer = ""
                     for resp in response:
@@ -414,7 +414,7 @@ class Base(ABC):
 
                 logging.warning(f"Exceed max rounds: {self.max_rounds}")
                 history.append({"role": "user", "content": f"Exceed max rounds: {self.max_rounds}"})
-                response = self.client.responses.create(model=self.model_name, messages=history, stream=True, **gen_conf)
+                response = self.client.chat.completions.create(model=self.model_name, messages=history, stream=True, **gen_conf)
                 for resp in response:
                     if any([not resp.choices, not resp.choices[0].delta, not hasattr(resp.choices[0].delta, "content")]):
                         raise Exception("500 response structure error.")
@@ -576,7 +576,7 @@ class BaiChuanChat(Base):
         }
 
     def _chat(self, history, gen_conf={}, **kwargs):
-        response = self.client.responses.create(
+        response = self.client.chat.completions.create(
             model=self.model_name,
             messages=history,
             extra_body={"tools": [{"type": "web_search", "web_search": {"enable": True, "search_mode": "performance_first"}}]},
@@ -598,7 +598,7 @@ class BaiChuanChat(Base):
         ans = ""
         total_tokens = 0
         try:
-            response = self.client.responses.create(
+            response = self.client.chat.completions.create(
                 model=self.model_name,
                 messages=history,
                 extra_body={"tools": [{"type": "web_search", "web_search": {"enable": True, "search_mode": "performance_first"}}]},
@@ -668,7 +668,7 @@ class ZhipuChat(Base):
         tk_count = 0
         try:
             logging.info(json.dumps(history, ensure_ascii=False, indent=2))
-            response = self.client.responses.create(model=self.model_name, messages=history, stream=True, **gen_conf)
+            response = self.client.chat.completions.create(model=self.model_name, messages=history, stream=True, **gen_conf)
             for resp in response:
                 if not resp.choices[0].delta.content:
                     continue
@@ -1381,7 +1381,7 @@ class LiteLLMBase(ABC):
             drop_params=True,
             timeout=self.timeout,
         )
-        # response = self.client.responses.create(model=self.model_name, messages=history, **gen_conf, **kwargs)
+        # response = self.client.chat.completions.create(model=self.model_name, messages=history, **gen_conf, **kwargs)
 
         if any([not response.choices, not response.choices[0].message, not response.choices[0].message.content]):
             return "", 0
diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py
index c14b9d8d4..0a1559319 100644
--- a/rag/llm/cv_model.py
+++ b/rag/llm/cv_model.py
@@ -75,7 +75,7 @@ class Base(ABC):
 
     def chat(self, system, history, gen_conf, images=[], **kwargs):
         try:
-            response = self.client.responses.create(
+            response = self.client.chat.completions.create(
                 model=self.model_name,
                 messages=self._form_history(system, history, images)
             )
@@ -87,7 +87,7 @@ class Base(ABC):
         ans = ""
         tk_count = 0
         try:
-            response = self.client.responses.create(
+            response = self.client.chat.completions.create(
                 model=self.model_name,
                 messages=self._form_history(system, history, images),
                 stream=True
@@ -174,8 +174,7 @@ class GptV4(Base):
 
     def describe(self, image):
         b64 = self.image2base64(image)
-        # Check if this is a GPT-5 model and use responses.create API
-        res = self.client.responses.create(
+        res = self.client.chat.completions.create(
             model=self.model_name,
             messages=self.prompt(b64),
         )
@@ -183,7 +182,7 @@ class GptV4(Base):
 
     def describe_with_prompt(self, image, prompt=None):
         b64 = self.image2base64(image)
-        res = self.client.responses.create(
+        res = self.client.chat.completions.create(
             model=self.model_name,
             messages=self.vision_llm_prompt(b64, prompt),
         )