From 3d0b440e9fcdd8d182dc810eae2785518c5b1b21 Mon Sep 17 00:00:00 2001
From: Wesley <134300034+RyzeAngler@users.noreply.github.com>
Date: Fri, 13 Jun 2025 14:56:25 +0800
Subject: [PATCH] fix(search.py):remove hard page_size (#8242)

### What problem does this PR solve?

Fix the restriction of forcing similarity_threshold=0 and page_size=30
when doc_ids is not empty

#8228

---------

Co-authored-by: shiqing.wusq <shiqing.wusq@dtzhejiang.com>
Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
---
 rag/nlp/search.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/rag/nlp/search.py b/rag/nlp/search.py
index c79bc2d27..855468c9e 100644
--- a/rag/nlp/search.py
+++ b/rag/nlp/search.py
@@ -380,15 +380,12 @@ class Dealer:
                 rank_feature=rank_feature)
         # Already paginated in search function
         idx = np.argsort(sim * -1)[(page - 1) * page_size:page * page_size]
-
-
         dim = len(sres.query_vector)
         vector_column = f"q_{dim}_vec"
         zero_vector = [0.0] * dim
-        if doc_ids:
-            similarity_threshold = 0
-            page_size = 30
         sim_np = np.array(sim)
+        if doc_ids:
+            similarity_threshold = 0 
         filtered_count = (sim_np >= similarity_threshold).sum()    
         ranks["total"] = int(filtered_count) # Convert from np.int64 to Python int otherwise JSON serializable error
         for i in idx: