feat: Add optional document metadata in OpenAI-compatible response references (#12950)

### What problem does this PR solve? This PR adds an opt‑in way to include document‑level metadata in OpenAI‑compatible reference chunks. Until now, metadata could be used for filtering but wasn’t returned in responses. The change enables clients to show richer citations (author/year/source, etc.) while keeping payload size and privacy under control via an explicit request flag and optional field allowlist. ### Type of change - [ ] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality) - [x] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [ ] Other (please describe): Contribution during my time at RAGcon GmbH.
2026-02-06 10:35:06 +08:00 · 2026-02-05 02:54:33 +01:00
parent 2843570d8e
commit 803b480f9c
3 changed files with 97 additions and 5 deletions
--- a/api/apps/sdk/session.py
+++ b/api/apps/sdk/session.py
@ -192,6 +192,7 @@ async def chat_completion_openai_like(tenant_id, chat_id):

    - If `stream` is True, the final answer and reference information will appear in the **last chunk** of the stream.
    - If `stream` is False, the reference will be included in `choices[0].message.reference`.
+    - If `extra_body.reference_metadata.include` is True, each reference chunk may include `document_metadata` in both streaming and non-streaming responses.

    Example usage:

@ -225,6 +226,10 @@ async def chat_completion_openai_like(tenant_id, chat_id):
        stream=stream,
        extra_body={
            "reference": reference,
+            "reference_metadata": {
+                "include": True,
+                "fields": ["author", "year", "source"],
+            },
            "metadata_condition": {
                "logic": "and",
                "conditions": [
@ -256,6 +261,13 @@ async def chat_completion_openai_like(tenant_id, chat_id):
        return get_error_data_result("extra_body must be an object.")

    need_reference = bool(extra_body.get("reference", False))
+    reference_metadata = extra_body.get("reference_metadata") or {}
+    if reference_metadata and not isinstance(reference_metadata, dict):
+        return get_error_data_result("reference_metadata must be an object.")
+    include_reference_metadata = bool(reference_metadata.get("include", False))
+    metadata_fields = reference_metadata.get("fields")
+    if metadata_fields is not None and not isinstance(metadata_fields, list):
+        return get_error_data_result("reference_metadata.fields must be an array.")

    messages = req.get("messages", [])
    # To prevent empty [] input
@ -381,7 +393,11 @@ async def chat_completion_openai_like(tenant_id, chat_id):
            response["usage"] = {"prompt_tokens": prompt_tokens, "completion_tokens": token_used, "total_tokens": prompt_tokens + token_used}
            if need_reference:
                reference_payload = final_reference if final_reference is not None else last_ans.get("reference", [])
-                response["choices"][0]["delta"]["reference"] = chunks_format(reference_payload)
+                response["choices"][0]["delta"]["reference"] = _build_reference_chunks(
+                    reference_payload,
+                    include_metadata=include_reference_metadata,
+                    metadata_fields=metadata_fields,
+                )
                response["choices"][0]["delta"]["final_content"] = final_answer if final_answer is not None else full_content
            yield f"data:{json.dumps(response, ensure_ascii=False)}\n\n"
            yield "data:[DONE]\n\n"
@ -431,7 +447,11 @@ async def chat_completion_openai_like(tenant_id, chat_id):
            ],
        }
        if need_reference:
-            response["choices"][0]["message"]["reference"] = chunks_format(answer.get("reference", {}))
+            response["choices"][0]["message"]["reference"] = _build_reference_chunks(
+                answer.get("reference", {}),
+                include_metadata=include_reference_metadata,
+                metadata_fields=metadata_fields,
+            )

        return jsonify(response)

@ -1326,4 +1346,46 @@ async def tts(tenant_id):
    resp.headers.add_header("Connection", "keep-alive")
    resp.headers.add_header("X-Accel-Buffering", "no")

-    return resp
+    return resp
+
+
+def _build_reference_chunks(reference, include_metadata=False, metadata_fields=None):
+    chunks = chunks_format(reference)
+    if not include_metadata:
+        return chunks
+
+    doc_ids_by_kb = {}
+    for chunk in chunks:
+        kb_id = chunk.get("dataset_id")
+        doc_id = chunk.get("document_id")
+        if not kb_id or not doc_id:
+            continue
+        doc_ids_by_kb.setdefault(kb_id, set()).add(doc_id)
+
+    if not doc_ids_by_kb:
+        return chunks
+
+    meta_by_doc = {}
+    for kb_id, doc_ids in doc_ids_by_kb.items():
+        meta_map = DocMetadataService.get_metadata_for_documents(list(doc_ids), kb_id)
+        if meta_map:
+            meta_by_doc.update(meta_map)
+
+    if metadata_fields is not None:
+        metadata_fields = {f for f in metadata_fields if isinstance(f, str)}
+        if not metadata_fields:
+            return chunks
+
+    for chunk in chunks:
+        doc_id = chunk.get("document_id")
+        if not doc_id:
+            continue
+        meta = meta_by_doc.get(doc_id)
+        if not meta:
+            continue
+        if metadata_fields is not None:
+            meta = {k: v for k, v in meta.items() if k in metadata_fields}
+        if meta:
+            chunk["document_metadata"] = meta
+
+    return chunks