Feat: chats completions API supports metadata filtering (#12023)

### What problem does this PR solve? Chats completions API supports metadata filtering. ### Type of change - [x] New Feature (non-breaking change which adds functionality)
2026-02-06 02:25:05 +08:00 · 2025-12-19 11:36:35 +08:00
parent 2844700dc4
commit 6cd1824a77
2 changed files with 107 additions and 11 deletions
--- a/api/apps/sdk/session.py
+++ b/api/apps/sdk/session.py
@ -33,7 +33,7 @@ from api.db.services.dialog_service import DialogService, async_ask, async_chat,
 from api.db.services.document_service import DocumentService
 from api.db.services.knowledgebase_service import KnowledgebaseService
 from api.db.services.llm_service import LLMBundle
-from common.metadata_utils import apply_meta_data_filter
+from common.metadata_utils import apply_meta_data_filter, convert_conditions, meta_filter
 from api.db.services.search_service import SearchService
 from api.db.services.user_service import UserTenantService
 from common.misc_utils import get_uuid
@ -129,11 +129,33 @@ async def chat_completion(tenant_id, chat_id):
        req = {"question": ""}
    if not req.get("session_id"):
        req["question"] = ""
-    if not DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value):
+    dia = DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value)
    if not dia:
        return get_error_data_result(f"You don't own the chat {chat_id}")
    dia = dia[0]
    if req.get("session_id"):
        if not ConversationService.query(id=req["session_id"], dialog_id=chat_id):
            return get_error_data_result(f"You don't own the session {req['session_id']}")
    metadata_condition = req.get("metadata_condition") or {}
    if metadata_condition and not isinstance(metadata_condition, dict):
        return get_error_data_result(message="metadata_condition must be an object.")
    if metadata_condition and req.get("question"):
        metas = DocumentService.get_meta_by_kbs(dia.kb_ids or [])
        filtered_doc_ids = meta_filter(
            metas,
            convert_conditions(metadata_condition),
            metadata_condition.get("logic", "and"),
        )
        if metadata_condition.get("conditions") and not filtered_doc_ids:
            filtered_doc_ids = ["-999"]
        if filtered_doc_ids:
            req["doc_ids"] = ",".join(filtered_doc_ids)
        else:
            req.pop("doc_ids", None)
    if req.get("stream", True):
        resp = Response(rag_completion(tenant_id, chat_id, **req), mimetype="text/event-stream")
        resp.headers.add_header("Cache-control", "no-cache")
@ -196,7 +218,19 @@ async def chat_completion_openai_like(tenant_id, chat_id):
            {"role": "user", "content": "Can you tell me how to install neovim"},
        ],
        stream=stream,
-        extra_body={"reference": reference}
+        extra_body={
            "reference": reference,
            "metadata_condition": {
                "logic": "and",
                "conditions": [
                    {
                        "name": "author",
                        "comparison_operator": "is",
                        "value": "bob"
                    }
                ]
            }
        }
    )
    if stream:
@ -212,7 +246,11 @@ async def chat_completion_openai_like(tenant_id, chat_id):
    """
    req = await get_request_json()
-    need_reference = bool(req.get("reference", False))
+    extra_body = req.get("extra_body") or {}
    if extra_body and not isinstance(extra_body, dict):
        return get_error_data_result("extra_body must be an object.")
    need_reference = bool(extra_body.get("reference", False))
    messages = req.get("messages", [])
    # To prevent empty [] input
@ -230,6 +268,22 @@ async def chat_completion_openai_like(tenant_id, chat_id):
        return get_error_data_result(f"You don't own the chat {chat_id}")
    dia = dia[0]
    metadata_condition = extra_body.get("metadata_condition") or {}
    if metadata_condition and not isinstance(metadata_condition, dict):
        return get_error_data_result(message="metadata_condition must be an object.")
    doc_ids_str = None
    if metadata_condition:
        metas = DocumentService.get_meta_by_kbs(dia.kb_ids or [])
        filtered_doc_ids = meta_filter(
            metas,
            convert_conditions(metadata_condition),
            metadata_condition.get("logic", "and"),
        )
        if metadata_condition.get("conditions") and not filtered_doc_ids:
            filtered_doc_ids = ["-999"]
        doc_ids_str = ",".join(filtered_doc_ids) if filtered_doc_ids else None
    # Filter system and non-sense assistant messages
    msg = []
    for m in messages:
@ -277,7 +331,10 @@ async def chat_completion_openai_like(tenant_id, chat_id):
            }
            try:
-                async for ans in async_chat(dia, msg, True, toolcall_session=toolcall_session, tools=tools, quote=need_reference):
+                chat_kwargs = {"toolcall_session": toolcall_session, "tools": tools, "quote": need_reference}
                if doc_ids_str:
                    chat_kwargs["doc_ids"] = doc_ids_str
                async for ans in async_chat(dia, msg, True, **chat_kwargs):
                    last_ans = ans
                    answer = ans["answer"]
@ -329,8 +386,7 @@ async def chat_completion_openai_like(tenant_id, chat_id):
            response["choices"][0]["delta"]["content"] = None
            response["choices"][0]["delta"]["reasoning_content"] = None
            response["choices"][0]["finish_reason"] = "stop"
-            response["usage"] = {"prompt_tokens": len(prompt), "completion_tokens": token_used,
+            response["usage"] = {"prompt_tokens": len(prompt), "completion_tokens": token_used, "total_tokens": len(prompt) + token_used}
                                 "total_tokens": len(prompt) + token_used}
            if need_reference:
                response["choices"][0]["delta"]["reference"] = chunks_format(last_ans.get("reference", []))
                response["choices"][0]["delta"]["final_content"] = last_ans.get("answer", "")
@ -345,7 +401,10 @@ async def chat_completion_openai_like(tenant_id, chat_id):
        return resp
    else:
        answer = None
-        async for ans in async_chat(dia, msg, False, toolcall_session=toolcall_session, tools=tools, quote=need_reference):
+        chat_kwargs = {"toolcall_session": toolcall_session, "tools": tools, "quote": need_reference}
        if doc_ids_str:
            chat_kwargs["doc_ids"] = doc_ids_str
        async for ans in async_chat(dia, msg, False, **chat_kwargs):
            # focus answer content only
            answer = ans
            break
--- a/docs/references/http_api_reference.md
+++ b/docs/references/http_api_reference.md
@ -48,6 +48,7 @@ This API follows the same request and response format as OpenAI's API. It allows
  - `"model"`: `string`
  - `"messages"`: `object list`
  - `"stream"`: `boolean`
  - `"extra_body"`: `object` (optional)
 ##### Request example
@ -59,7 +60,20 @@ curl --request POST \
     --data '{
        "model": "model",
        "messages": [{"role": "user", "content": "Say this is a test!"}],
-        "stream": true
+        "stream": true,
        "extra_body": {
          "reference": true,
          "metadata_condition": {
            "logic": "and",
            "conditions": [
              {
                "name": "author",
                "comparison_operator": "is",
                "value": "bob"
              }
            ]
          }
        }
      }'
 ```
@ -74,6 +88,11 @@ curl --request POST \
 - `stream` (*Body parameter*) `boolean`  
  Whether to receive the response as a stream. Set this to `false` explicitly if you prefer to receive the entire response in one go instead of as a stream.
 - `extra_body` (*Body parameter*) `object`  
  Extra request parameters:  
  - `reference`: `boolean` - include reference in the final chunk (stream) or in the final message (non-stream).
  - `metadata_condition`: `object` - metadata filter conditions applied to retrieval results.
 #### Response
 Stream:
@ -3185,6 +3204,7 @@ Asks a specified chat assistant a question to start an AI-powered conversation.
  - `"stream"`: `boolean`
  - `"session_id"`: `string` (optional)
  - `"user_id`: `string` (optional)
  - `"metadata_condition"`: `object` (optional)
 ##### Request example
@ -3207,7 +3227,17 @@ curl --request POST \
     {
          "question": "Who are you",
          "stream": true,
-          "session_id":"9fa7691cb85c11ef9c5f0242ac120005"
+          "session_id":"9fa7691cb85c11ef9c5f0242ac120005",
          "metadata_condition": {
            "logic": "and",
            "conditions": [
              {
                "name": "author",
                "comparison_operator": "is",
                "value": "bob"
              }
            ]
          }
     }'
 ```
@ -3225,6 +3255,13 @@ curl --request POST \
  The ID of session. If it is not provided, a new session will be generated.
 - `"user_id"`: (*Body parameter*), `string`  
  The optional user-defined ID. Valid *only* when no `session_id` is provided.
 - `"metadata_condition"`: (*Body parameter*), `object`  
  Optional metadata filter conditions applied to retrieval results.  
  - `logic`: `string`, one of `and` / `or`
  - `conditions`: `list[object]` where each condition contains:
    - `name`: `string` metadata key
    - `comparison_operator`: `string` (e.g. `is`, `not is`, `contains`, `not contains`, `start with`, `end with`, `empty`, `not empty`, `>`, `<`, `≥`, `≤`)
    - `value`: `string|number|boolean` (optional for `empty`/`not empty`)
 #### Response