From 0a08fc7b073acf6316c729e166f86e2f3742faf5 Mon Sep 17 00:00:00 2001 From: Magicbook1108 Date: Thu, 5 Feb 2026 15:56:58 +0800 Subject: [PATCH] Fix: example code in session.py (#13004) ### What problem does this PR solve? Fix: example code in session.py #12950 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --------- Co-authored-by: Levi Co-authored-by: writinwaters <93570324+writinwaters@users.noreply.github.com> Co-authored-by: Liu An --- api/apps/sdk/session.py | 34 ++++++++++++------- docs/references/python_api_reference.md | 43 +++++++++++++++++-------- 2 files changed, 51 insertions(+), 26 deletions(-) diff --git a/api/apps/sdk/session.py b/api/apps/sdk/session.py index f4a6f5477..589521f0d 100644 --- a/api/apps/sdk/session.py +++ b/api/apps/sdk/session.py @@ -207,7 +207,12 @@ async def chat_completion_openai_like(tenant_id, chat_id): Alternatively, you can use Python's `OpenAI` client: + NOTE: Streaming via `client.chat.completions.create(stream=True, ...)` does + not return `reference` currently. The only way to return `reference` is + non-stream mode with `with_raw_response`. + from openai import OpenAI + import json model = "model" client = OpenAI(api_key="ragflow-api-key", base_url=f"http://ragflow_address/api/v1/chats_openai/") @@ -215,15 +220,14 @@ async def chat_completion_openai_like(tenant_id, chat_id): stream = True reference = True - completion = client.chat.completions.create( - model=model, + request_kwargs = dict( + model="model", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Who are you?"}, {"role": "assistant", "content": "I am an AI assistant named..."}, {"role": "user", "content": "Can you tell me how to install neovim"}, ], - stream=stream, extra_body={ "reference": reference, "reference_metadata": { @@ -240,19 +244,25 @@ async def chat_completion_openai_like(tenant_id, chat_id): } ] } - } + }, ) if stream: - for chunk in completion: - print(chunk) - if reference and chunk.choices[0].finish_reason == "stop": - print(f"Reference:\n{chunk.choices[0].delta.reference}") - print(f"Final content:\n{chunk.choices[0].delta.final_content}") + completion = client.chat.completions.create(stream=True, **request_kwargs) + for chunk in completion: + print(chunk) else: - print(completion.choices[0].message.content) - if reference: - print(completion.choices[0].message.reference) + resp = client.chat.completions.with_raw_response.create( + stream=False, **request_kwargs + ) + print("status:", resp.http_response.status_code) + raw_text = resp.http_response.text + print("raw:", raw_text) + + data = json.loads(raw_text) + print("assistant:", data["choices"][0]["message"].get("content")) + print("reference:", data["choices"][0]["message"].get("reference")) + """ req = await get_request_json() diff --git a/docs/references/python_api_reference.md b/docs/references/python_api_reference.md index fcea3f833..c0eeee3b3 100644 --- a/docs/references/python_api_reference.md +++ b/docs/references/python_api_reference.md @@ -65,8 +65,17 @@ Whether to receive the response as a stream. Set this to `false` explicitly if y #### Examples +> **Note** +> Streaming via `client.chat.completions.create(stream=True, ...)` does not +> return `reference` currently because `reference` is only exposed in the +> non-stream response payload. The only way to return `reference` is non-stream +> mode with `with_raw_response`. +:::caution NOTE +Streaming via `client.chat.completions.create(stream=True, ...)` does not return `reference` because it is *only* included in the raw response payload in non-stream mode. To return `reference`, set `stream=False`. +::: ```python from openai import OpenAI +import json model = "model" client = OpenAI(api_key="ragflow-api-key", base_url=f"http://ragflow_address/api/v1/chats_openai/") @@ -74,7 +83,7 @@ client = OpenAI(api_key="ragflow-api-key", base_url=f"http://ragflow_address/api stream = True reference = True -completion = client.chat.completions.create( +request_kwargs = dict( model=model, messages=[ {"role": "system", "content": "You are a helpful assistant."}, @@ -82,26 +91,32 @@ completion = client.chat.completions.create( {"role": "assistant", "content": "I am an AI assistant named..."}, {"role": "user", "content": "Can you tell me how to install neovim"}, ], - stream=stream, extra_body={ - "reference": reference, - "reference_metadata": { - "include": True, - "fields": ["author", "year", "source"], - }, - } + "extra_body": { + "reference": reference, + "reference_metadata": { + "include": True, + "fields": ["author", "year", "source"], + }, + } + }, ) if stream: + completion = client.chat.completions.create(stream=True, **request_kwargs) for chunk in completion: print(chunk) - if reference and chunk.choices[0].finish_reason == "stop": - print(f"Reference:\n{chunk.choices[0].delta.reference}") - print(f"Final content:\n{chunk.choices[0].delta.final_content}") else: - print(completion.choices[0].message.content) - if reference: - print(completion.choices[0].message.reference) + resp = client.chat.completions.with_raw_response.create( + stream=False, **request_kwargs + ) + print("status:", resp.http_response.status_code) + raw_text = resp.http_response.text + print("raw:", raw_text) + + data = json.loads(raw_text) + print("assistant:", data["choices"][0]["message"].get("content")) + print("reference:", data["choices"][0]["message"].get("reference")) ``` When `extra_body.reference_metadata.include` is `true`, each reference chunk may include a `document_metadata` object in both streaming and non-streaming responses.