From 3d0b440e9fcdd8d182dc810eae2785518c5b1b21 Mon Sep 17 00:00:00 2001 From: Wesley <134300034+RyzeAngler@users.noreply.github.com> Date: Fri, 13 Jun 2025 14:56:25 +0800 Subject: [PATCH] fix(search.py):remove hard page_size (#8242) ### What problem does this PR solve? Fix the restriction of forcing similarity_threshold=0 and page_size=30 when doc_ids is not empty #8228 --------- Co-authored-by: shiqing.wusq Co-authored-by: Kevin Hu --- rag/nlp/search.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/rag/nlp/search.py b/rag/nlp/search.py index c79bc2d27..855468c9e 100644 --- a/rag/nlp/search.py +++ b/rag/nlp/search.py @@ -380,15 +380,12 @@ class Dealer: rank_feature=rank_feature) # Already paginated in search function idx = np.argsort(sim * -1)[(page - 1) * page_size:page * page_size] - - dim = len(sres.query_vector) vector_column = f"q_{dim}_vec" zero_vector = [0.0] * dim - if doc_ids: - similarity_threshold = 0 - page_size = 30 sim_np = np.array(sim) + if doc_ids: + similarity_threshold = 0 filtered_count = (sim_np >= similarity_threshold).sum() ranks["total"] = int(filtered_count) # Convert from np.int64 to Python int otherwise JSON serializable error for i in idx: