From b36d9744ae44d3843107a16438658003bd64a060 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mathias=20Panzenb=C3=B6ck?=
 <134175+panzi@users.noreply.github.com>
Date: Tue, 27 Jan 2026 05:45:58 +0100
Subject: [PATCH] shortcut metadata_condition if there is none (#12835)

### What problem does this PR solve?

If no `metadata_condition` parameter is given then don't load the
metadata of all documents into memory. Instead just pass `doc_ids` as
`None` to the `retrieval()` method, which means to use all documents of
the given datasets.

This is relevant if you have *a lot* of documents!

### Type of change

- [x] Performance Improvement
---
 api/apps/sdk/doc.py | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py
index 2e97c1668..d073f334f 100644
--- a/api/apps/sdk/doc.py
+++ b/api/apps/sdk/doc.py
@@ -1526,14 +1526,18 @@ async def retrieval_test(tenant_id):
             if doc_id not in doc_ids_list:
                 return get_error_data_result(f"The datasets don't own the document {doc_id}")
     if not doc_ids:
-        metadata_condition = req.get("metadata_condition", {}) or {}
-        metas = DocumentService.get_meta_by_kbs(kb_ids)
-        doc_ids = meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and"))
-        # If metadata_condition has conditions but no docs match, return empty result
-        if not doc_ids and metadata_condition.get("conditions"):
-            return get_result(data={"total": 0, "chunks": [], "doc_aggs": {}})
-        if metadata_condition and not doc_ids:
-            doc_ids = ["-999"]
+        metadata_condition = req.get("metadata_condition")
+        if metadata_condition:
+            metas = DocumentService.get_meta_by_kbs(kb_ids)
+            doc_ids = meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and"))
+            # If metadata_condition has conditions but no docs match, return empty result
+            if not doc_ids and metadata_condition.get("conditions"):
+                return get_result(data={"total": 0, "chunks": [], "doc_aggs": {}})
+            if metadata_condition and not doc_ids:
+                doc_ids = ["-999"]
+        else:
+            # If doc_ids is None all documents of the datasets are used
+            doc_ids = None
     similarity_threshold = float(req.get("similarity_threshold", 0.2))
     vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3))
     top = int(req.get("top_k", 1024))