feat: Add optional document metadata in OpenAI-compatible response references (#12950)

### What problem does this PR solve? This PR adds an opt‑in way to include document‑level metadata in OpenAI‑compatible reference chunks. Until now, metadata could be used for filtering but wasn’t returned in responses. The change enables clients to show richer citations (author/year/source, etc.) while keeping payload size and privacy under control via an explicit request flag and optional field allowlist. ### Type of change - [ ] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality) - [x] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [ ] Other (please describe): Contribution during my time at RAGcon GmbH.
2026-02-06 18:45:08 +08:00 · 2026-02-05 02:54:33 +01:00
parent 2843570d8e
commit 803b480f9c
3 changed files with 97 additions and 5 deletions
--- a/api/apps/sdk/session.py
+++ b/api/apps/sdk/session.py
@ -192,6 +192,7 @@ async def chat_completion_openai_like(tenant_id, chat_id):
    - If `stream` is True, the final answer and reference information will appear in the **last chunk** of the stream.
    - If `stream` is False, the reference will be included in `choices[0].message.reference`.
    - If `extra_body.reference_metadata.include` is True, each reference chunk may include `document_metadata` in both streaming and non-streaming responses.
    Example usage:
@ -225,6 +226,10 @@ async def chat_completion_openai_like(tenant_id, chat_id):
        stream=stream,
        extra_body={
            "reference": reference,
            "reference_metadata": {
                "include": True,
                "fields": ["author", "year", "source"],
            },
            "metadata_condition": {
                "logic": "and",
                "conditions": [
@ -256,6 +261,13 @@ async def chat_completion_openai_like(tenant_id, chat_id):
        return get_error_data_result("extra_body must be an object.")
    need_reference = bool(extra_body.get("reference", False))
    reference_metadata = extra_body.get("reference_metadata") or {}
    if reference_metadata and not isinstance(reference_metadata, dict):
        return get_error_data_result("reference_metadata must be an object.")
    include_reference_metadata = bool(reference_metadata.get("include", False))
    metadata_fields = reference_metadata.get("fields")
    if metadata_fields is not None and not isinstance(metadata_fields, list):
        return get_error_data_result("reference_metadata.fields must be an array.")
    messages = req.get("messages", [])
    # To prevent empty [] input
@ -381,7 +393,11 @@ async def chat_completion_openai_like(tenant_id, chat_id):
            response["usage"] = {"prompt_tokens": prompt_tokens, "completion_tokens": token_used, "total_tokens": prompt_tokens + token_used}
            if need_reference:
                reference_payload = final_reference if final_reference is not None else last_ans.get("reference", [])
-                response["choices"][0]["delta"]["reference"] = chunks_format(reference_payload)
+                response["choices"][0]["delta"]["reference"] = _build_reference_chunks(
                    reference_payload,
                    include_metadata=include_reference_metadata,
                    metadata_fields=metadata_fields,
                )
                response["choices"][0]["delta"]["final_content"] = final_answer if final_answer is not None else full_content
            yield f"data:{json.dumps(response, ensure_ascii=False)}\n\n"
            yield "data:[DONE]\n\n"
@ -431,7 +447,11 @@ async def chat_completion_openai_like(tenant_id, chat_id):
            ],
        }
        if need_reference:
-            response["choices"][0]["message"]["reference"] = chunks_format(answer.get("reference", {}))
+            response["choices"][0]["message"]["reference"] = _build_reference_chunks(
                answer.get("reference", {}),
                include_metadata=include_reference_metadata,
                metadata_fields=metadata_fields,
            )
        return jsonify(response)
@ -1327,3 +1347,45 @@ async def tts(tenant_id):
    resp.headers.add_header("X-Accel-Buffering", "no")
    return resp
 def _build_reference_chunks(reference, include_metadata=False, metadata_fields=None):
    chunks = chunks_format(reference)
    if not include_metadata:
        return chunks
    doc_ids_by_kb = {}
    for chunk in chunks:
        kb_id = chunk.get("dataset_id")
        doc_id = chunk.get("document_id")
        if not kb_id or not doc_id:
            continue
        doc_ids_by_kb.setdefault(kb_id, set()).add(doc_id)
    if not doc_ids_by_kb:
        return chunks
    meta_by_doc = {}
    for kb_id, doc_ids in doc_ids_by_kb.items():
        meta_map = DocMetadataService.get_metadata_for_documents(list(doc_ids), kb_id)
        if meta_map:
            meta_by_doc.update(meta_map)
    if metadata_fields is not None:
        metadata_fields = {f for f in metadata_fields if isinstance(f, str)}
        if not metadata_fields:
            return chunks
    for chunk in chunks:
        doc_id = chunk.get("document_id")
        if not doc_id:
            continue
        meta = meta_by_doc.get(doc_id)
        if not meta:
            continue
        if metadata_fields is not None:
            meta = {k: v for k, v in meta.items() if k in metadata_fields}
        if meta:
            chunk["document_metadata"] = meta
    return chunks
--- a/docs/references/http_api_reference.md
+++ b/docs/references/http_api_reference.md
@ -65,6 +65,10 @@ curl --request POST \
        "stream": true,
        "extra_body": {
          "reference": true,
          "reference_metadata": {
            "include": true,
            "fields": ["author", "year", "source"]
          },
          "metadata_condition": {
            "logic": "and",
            "conditions": [
@ -93,6 +97,9 @@ curl --request POST \
 - `extra_body` (*Body parameter*) `object`  
  Extra request parameters:  
  - `reference`: `boolean` - include reference in the final chunk (stream) or in the final message (non-stream).
  - `reference_metadata`: `object` - include document metadata in each reference chunk.
    - `include`: `boolean` - enable document metadata in reference chunks.
    - `fields`: `list[string]` - optional allowlist of metadata keys. Omit to include all. Use an empty list to include none.
  - `metadata_condition`: `object` - metadata filter conditions applied to retrieval results.
 #### Response
@ -275,6 +282,11 @@ data: {
                            "content": "```cd /usr/ports/editors/neovim/ && make install```## Android[Termux](https://github.com/termux/termux-app) offers a Neovim package.",
                            "document_id": "4bdd2ff65e1511f0907f09f583941b45",
                            "document_name": "INSTALL22.md",
                            "document_metadata": {
                                "author": "bob",
                                "year": "2023",
                                "source": "internal"
                            },
                            "dataset_id": "456ce60c5e1511f0907f09f583941b45",
                            "image_id": "",
                            "positions": [
@ -345,6 +357,11 @@ Non-stream:
                            "doc_type": "",
                            "document_id": "4bdd2ff65e1511f0907f09f583941b45",
                            "document_name": "INSTALL22.md",
                            "document_metadata": {
                                "author": "bob",
                                "year": "2023",
                                "source": "internal"
                            },
                            "id": "4b8935ac0a22deb1",
                            "image_id": "",
                            "positions": [
@ -3948,6 +3965,8 @@ data: {
 data:[DONE]
 ```
 When `extra_body.reference_metadata.include` is `true`, each reference chunk may include a `document_metadata` object.
 Non-stream:
 ```json
--- a/docs/references/python_api_reference.md
+++ b/docs/references/python_api_reference.md
@ -83,7 +83,13 @@ completion = client.chat.completions.create(
        {"role": "user", "content": "Can you tell me how to install neovim"},
    ],
    stream=stream,
-    extra_body={"reference": reference}
+    extra_body={
        "reference": reference,
        "reference_metadata": {
            "include": True,
            "fields": ["author", "year", "source"],
        },
    }
 )
 if stream:
@ -98,6 +104,8 @@ else:
        print(completion.choices[0].message.reference)
 ```
 When `extra_body.reference_metadata.include` is `true`, each reference chunk may include a `document_metadata` object in both streaming and non-streaming responses.
 ## DATASET MANAGEMENT
 ---
@ -1518,6 +1526,8 @@ A list of `Chunk` objects representing references to the message, each containin
  The ID of the referenced document.
 - `document_name` `str`  
  The name of the referenced document.
 - `document_metadata` `dict`  
  Optional document metadata, returned only when `extra_body.reference_metadata.include` is `true`.
 - `position` `list[str]`  
  The location information of the chunk within the referenced document.
 - `dataset_id` `str`  
@ -1643,6 +1653,8 @@ A list of `Chunk` objects representing references to the message, each containin
  The ID of the referenced document.
 - `document_name` `str`  
  The name of the referenced document.
 - `document_metadata` `dict`  
  Optional document metadata, returned only when `extra_body.reference_metadata.include` is `true`.
 - `position` `list[str]`  
  The location information of the chunk within the referenced document.
 - `dataset_id` `str`  
@ -2596,4 +2608,3 @@ memory_object.get_message_content(message_id)
 ```
 ---