From 0a08fc7b073acf6316c729e166f86e2f3742faf5 Mon Sep 17 00:00:00 2001
From: Magicbook1108 <newyorkupperbay@gmail.com>
Date: Thu, 5 Feb 2026 15:56:58 +0800
Subject: [PATCH] Fix: example code in session.py (#13004)

### What problem does this PR solve?

Fix: example code in session.py #12950

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

---------

Co-authored-by: Levi <stupse-tipp0j@icloud.com>
Co-authored-by: writinwaters <93570324+writinwaters@users.noreply.github.com>
Co-authored-by: Liu An <asiro@qq.com>
---
 api/apps/sdk/session.py                 | 34 ++++++++++++-------
 docs/references/python_api_reference.md | 43 +++++++++++++++++--------
 2 files changed, 51 insertions(+), 26 deletions(-)

diff --git a/api/apps/sdk/session.py b/api/apps/sdk/session.py
index f4a6f5477..589521f0d 100644
--- a/api/apps/sdk/session.py
+++ b/api/apps/sdk/session.py
@@ -207,7 +207,12 @@ async def chat_completion_openai_like(tenant_id, chat_id):
 
     Alternatively, you can use Python's `OpenAI` client:
 
+    NOTE: Streaming via `client.chat.completions.create(stream=True, ...)` does
+    not return `reference` currently. The only way to return `reference` is
+    non-stream mode with `with_raw_response`.
+
     from openai import OpenAI
+    import json
 
     model = "model"
     client = OpenAI(api_key="ragflow-api-key", base_url=f"http://ragflow_address/api/v1/chats_openai/<chat_id>")
@@ -215,15 +220,14 @@ async def chat_completion_openai_like(tenant_id, chat_id):
     stream = True
     reference = True
 
-    completion = client.chat.completions.create(
-        model=model,
+    request_kwargs = dict(
+        model="model",
         messages=[
             {"role": "system", "content": "You are a helpful assistant."},
             {"role": "user", "content": "Who are you?"},
             {"role": "assistant", "content": "I am an AI assistant named..."},
             {"role": "user", "content": "Can you tell me how to install neovim"},
         ],
-        stream=stream,
         extra_body={
             "reference": reference,
             "reference_metadata": {
@@ -240,19 +244,25 @@ async def chat_completion_openai_like(tenant_id, chat_id):
                     }
                 ]
             }
-        }
+        },
     )
 
     if stream:
-    for chunk in completion:
-        print(chunk)
-        if reference and chunk.choices[0].finish_reason == "stop":
-            print(f"Reference:\n{chunk.choices[0].delta.reference}")
-            print(f"Final content:\n{chunk.choices[0].delta.final_content}")
+        completion = client.chat.completions.create(stream=True, **request_kwargs)
+        for chunk in completion:
+            print(chunk)
     else:
-        print(completion.choices[0].message.content)
-        if reference:
-            print(completion.choices[0].message.reference)
+        resp = client.chat.completions.with_raw_response.create(
+            stream=False, **request_kwargs
+        )
+        print("status:", resp.http_response.status_code)
+        raw_text = resp.http_response.text
+        print("raw:", raw_text)
+
+        data = json.loads(raw_text)
+        print("assistant:", data["choices"][0]["message"].get("content"))
+        print("reference:", data["choices"][0]["message"].get("reference"))
+
     """
     req = await get_request_json()
 
diff --git a/docs/references/python_api_reference.md b/docs/references/python_api_reference.md
index fcea3f833..c0eeee3b3 100644
--- a/docs/references/python_api_reference.md
+++ b/docs/references/python_api_reference.md
@@ -65,8 +65,17 @@ Whether to receive the response as a stream. Set this to `false` explicitly if y
 
 #### Examples
 
+> **Note**
+> Streaming via `client.chat.completions.create(stream=True, ...)` does not
+> return `reference` currently because `reference` is only exposed in the
+> non-stream response payload. The only way to return `reference` is non-stream
+> mode with `with_raw_response`.
+:::caution NOTE
+Streaming via `client.chat.completions.create(stream=True, ...)` does not return `reference` because it is *only* included in the raw response payload in non-stream mode. To return `reference`, set `stream=False`.
+:::
 ```python
 from openai import OpenAI
+import json
 
 model = "model"
 client = OpenAI(api_key="ragflow-api-key", base_url=f"http://ragflow_address/api/v1/chats_openai/<chat_id>")
@@ -74,7 +83,7 @@ client = OpenAI(api_key="ragflow-api-key", base_url=f"http://ragflow_address/api
 stream = True
 reference = True
 
-completion = client.chat.completions.create(
+request_kwargs = dict(
     model=model,
     messages=[
         {"role": "system", "content": "You are a helpful assistant."},
@@ -82,26 +91,32 @@ completion = client.chat.completions.create(
         {"role": "assistant", "content": "I am an AI assistant named..."},
         {"role": "user", "content": "Can you tell me how to install neovim"},
     ],
-    stream=stream,
     extra_body={
-        "reference": reference,
-        "reference_metadata": {
-            "include": True,
-            "fields": ["author", "year", "source"],
-        },
-    }
+        "extra_body": {
+            "reference": reference,
+            "reference_metadata": {
+                "include": True,
+                "fields": ["author", "year", "source"],
+            },
+        }
+    },
 )
 
 if stream:
+    completion = client.chat.completions.create(stream=True, **request_kwargs)
     for chunk in completion:
         print(chunk)
-        if reference and chunk.choices[0].finish_reason == "stop":
-            print(f"Reference:\n{chunk.choices[0].delta.reference}")
-            print(f"Final content:\n{chunk.choices[0].delta.final_content}")
 else:
-    print(completion.choices[0].message.content)
-    if reference:
-        print(completion.choices[0].message.reference)
+    resp = client.chat.completions.with_raw_response.create(
+        stream=False, **request_kwargs
+    )
+    print("status:", resp.http_response.status_code)
+    raw_text = resp.http_response.text
+    print("raw:", raw_text)
+
+    data = json.loads(raw_text)
+    print("assistant:", data["choices"][0]["message"].get("content"))
+    print("reference:", data["choices"][0]["message"].get("reference"))
 ```
 
 When `extra_body.reference_metadata.include` is `true`, each reference chunk may include a `document_metadata` object in both streaming and non-streaming responses.