From da82566304096d42f3c8008e1700328786d0494e Mon Sep 17 00:00:00 2001 From: Billy Bao Date: Tue, 23 Sep 2025 09:34:30 +0800 Subject: [PATCH] Fix: resolve hash collisions by switching to UUID &correct logic for always-true statements & Update GPT api integration & Support qianwen-deepresearch (#10208) ### What problem does this PR solve? Fix: resolve hash collisions by switching to UUID &correct logic for always-true statements, solved: #10165 Feat: Update GPT api integration, solved: #10204 Feat: Support qianwen-deepresearch, solved: #10163 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality) --- agent/canvas.py | 3 ++- api/apps/sdk/session.py | 2 +- conf/llm_factories.json | 7 +++++++ rag/llm/chat_model.py | 23 +++++++++++++---------- rag/llm/cv_model.py | 9 +++++---- 5 files changed, 28 insertions(+), 16 deletions(-) diff --git a/agent/canvas.py b/agent/canvas.py index ffa67c73d..cc6080e07 100644 --- a/agent/canvas.py +++ b/agent/canvas.py @@ -490,7 +490,8 @@ class Canvas(Graph): r = self.retrieval[-1] for ck in chunks_format({"chunks": chunks}): - cid = hash_str2int(ck["id"], 100) + cid = hash_str2int(ck["id"], 500) + # cid = uuid.uuid5(uuid.NAMESPACE_DNS, ck["id"]) if cid not in r: r["chunks"][cid] = ck diff --git a/api/apps/sdk/session.py b/api/apps/sdk/session.py index 8e4f5ee67..d9db95526 100644 --- a/api/apps/sdk/session.py +++ b/api/apps/sdk/session.py @@ -183,7 +183,7 @@ def chat_completion_openai_like(tenant_id, chat_id): stream = True reference = True - completion = client.chat.completions.create( + completion = client.responses.create( model=model, messages=[ {"role": "system", "content": "You are a helpful assistant."}, diff --git a/conf/llm_factories.json b/conf/llm_factories.json index 686e97373..b9f8d3869 100644 --- a/conf/llm_factories.json +++ b/conf/llm_factories.json @@ -622,6 +622,13 @@ "tags": "SPEECH2TEXT,8k", "max_tokens": 8000, "model_type": "speech2text" + }, + { + "llm_name": "qianwen-deepresearch-30b-a3b-131k", + "tags": "LLM,CHAT,1M,AGENT,DEEPRESEARCH", + "max_tokens": 1000000, + "model_type": "chat", + "is_tools": true } ] }, diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index a3fb357f3..5d92d01e7 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -143,7 +143,8 @@ class Base(ABC): logging.info("[HISTORY]" + json.dumps(history, ensure_ascii=False, indent=2)) if self.model_name.lower().find("qwen3") >= 0: kwargs["extra_body"] = {"enable_thinking": False} - response = self.client.chat.completions.create(model=self.model_name, messages=history, **gen_conf, **kwargs) + + response = self.client.responses.create(model=self.model_name, messages=history, **gen_conf, **kwargs) if any([not response.choices, not response.choices[0].message, not response.choices[0].message.content]): return "", 0 @@ -155,10 +156,12 @@ class Base(ABC): def _chat_streamly(self, history, gen_conf, **kwargs): logging.info("[HISTORY STREAMLY]" + json.dumps(history, ensure_ascii=False, indent=4)) reasoning_start = False + if kwargs.get("stop") or "stop" in gen_conf: - response = self.client.chat.completions.create(model=self.model_name, messages=history, stream=True, **gen_conf, stop=kwargs.get("stop")) + response = self.client.responses.create(model=self.model_name, messages=history, stream=True, **gen_conf, stop=kwargs.get("stop")) else: - response = self.client.chat.completions.create(model=self.model_name, messages=history, stream=True, **gen_conf) + response = self.client.responses.create(model=self.model_name, messages=history, stream=True, **gen_conf) + for resp in response: if not resp.choices: continue @@ -254,7 +257,7 @@ class Base(ABC): try: for _ in range(self.max_rounds + 1): logging.info(f"{self.tools=}") - response = self.client.chat.completions.create(model=self.model_name, messages=history, tools=self.tools, tool_choice="auto", **gen_conf) + response = self.client.responses.create(model=self.model_name, messages=history, tools=self.tools, tool_choice="auto", **gen_conf) tk_count += self.total_token_count(response) if any([not response.choices, not response.choices[0].message]): raise Exception(f"500 response structure error. Response: {response}") @@ -339,7 +342,7 @@ class Base(ABC): for _ in range(self.max_rounds + 1): reasoning_start = False logging.info(f"{tools=}") - response = self.client.chat.completions.create(model=self.model_name, messages=history, stream=True, tools=tools, tool_choice="auto", **gen_conf) + response = self.client.responses.create(model=self.model_name, messages=history, stream=True, tools=tools, tool_choice="auto", **gen_conf) final_tool_calls = {} answer = "" for resp in response: @@ -402,7 +405,7 @@ class Base(ABC): logging.warning(f"Exceed max rounds: {self.max_rounds}") history.append({"role": "user", "content": f"Exceed max rounds: {self.max_rounds}"}) - response = self.client.chat.completions.create(model=self.model_name, messages=history, stream=True, **gen_conf) + response = self.client.responses.create(model=self.model_name, messages=history, stream=True, **gen_conf) for resp in response: if any([not resp.choices, not resp.choices[0].delta, not hasattr(resp.choices[0].delta, "content")]): raise Exception("500 response structure error.") @@ -556,7 +559,7 @@ class BaiChuanChat(Base): } def _chat(self, history, gen_conf={}, **kwargs): - response = self.client.chat.completions.create( + response = self.client.responses.create( model=self.model_name, messages=history, extra_body={"tools": [{"type": "web_search", "web_search": {"enable": True, "search_mode": "performance_first"}}]}, @@ -578,7 +581,7 @@ class BaiChuanChat(Base): ans = "" total_tokens = 0 try: - response = self.client.chat.completions.create( + response = self.client.responses.create( model=self.model_name, messages=history, extra_body={"tools": [{"type": "web_search", "web_search": {"enable": True, "search_mode": "performance_first"}}]}, @@ -648,7 +651,7 @@ class ZhipuChat(Base): tk_count = 0 try: logging.info(json.dumps(history, ensure_ascii=False, indent=2)) - response = self.client.chat.completions.create(model=self.model_name, messages=history, stream=True, **gen_conf) + response = self.client.responses.create(model=self.model_name, messages=history, stream=True, **gen_conf) for resp in response: if not resp.choices[0].delta.content: continue @@ -1361,7 +1364,7 @@ class LiteLLMBase(ABC): drop_params=True, timeout=self.timeout, ) - # response = self.client.chat.completions.create(model=self.model_name, messages=history, **gen_conf, **kwargs) + # response = self.client.responses.create(model=self.model_name, messages=history, **gen_conf, **kwargs) if any([not response.choices, not response.choices[0].message, not response.choices[0].message.content]): return "", 0 diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py index bbb81f572..e9ae73770 100644 --- a/rag/llm/cv_model.py +++ b/rag/llm/cv_model.py @@ -75,7 +75,7 @@ class Base(ABC): def chat(self, system, history, gen_conf, images=[], **kwargs): try: - response = self.client.chat.completions.create( + response = self.client.responses.create( model=self.model_name, messages=self._form_history(system, history, images) ) @@ -87,7 +87,7 @@ class Base(ABC): ans = "" tk_count = 0 try: - response = self.client.chat.completions.create( + response = self.client.responses.create( model=self.model_name, messages=self._form_history(system, history, images), stream=True @@ -174,7 +174,8 @@ class GptV4(Base): def describe(self, image): b64 = self.image2base64(image) - res = self.client.chat.completions.create( + # Check if this is a GPT-5 model and use responses.create API + res = self.client.responses.create( model=self.model_name, messages=self.prompt(b64), ) @@ -182,7 +183,7 @@ class GptV4(Base): def describe_with_prompt(self, image, prompt=None): b64 = self.image2base64(image) - res = self.client.chat.completions.create( + res = self.client.responses.create( model=self.model_name, messages=self.vision_llm_prompt(b64, prompt), )