From 5d167cd77275b670bdf72b99d9bf3136814339a4 Mon Sep 17 00:00:00 2001
From: buua436 <66937541+buua436@users.noreply.github.com>
Date: Fri, 10 Oct 2025 16:38:04 +0800
Subject: [PATCH] feat: support qwq reasoning models with non-stream output
 (#10468)

### What problem does this PR solve?
issue:
[#6193](https://github.com/infiniflow/ragflow/issues/6193)
change:
support qwq reasoning models with non-stream output

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
---
 rag/llm/chat_model.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py
index 81e1a3459..dd088c83b 100644
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@@ -142,6 +142,22 @@ class Base(ABC):
 
     def _chat(self, history, gen_conf, **kwargs):
         logging.info("[HISTORY]" + json.dumps(history, ensure_ascii=False, indent=2))
+        if self.model_name.lower().find("qwq") >= 0:
+            logging.info(f"[INFO] {self.model_name} detected as reasoning model, using _chat_streamly")
+
+            final_ans = ""
+            tol_token = 0 
+            for delta, tol in self._chat_streamly(history, gen_conf, with_reasoning=False, **kwargs):
+                if delta.startswith("<think>") or delta.endswith("</think>"):
+                    continue
+                final_ans += delta
+                tol_token = tol
+
+            if len(final_ans.strip()) == 0:
+                final_ans = "**ERROR**: Empty response from reasoning model"
+
+            return final_ans.strip(), tol_token
+        
         if self.model_name.lower().find("qwen3") >= 0:
             kwargs["extra_body"] = {"enable_thinking": False}