From fb77f9917b270ace4db282dafb8315c20a7a4294 Mon Sep 17 00:00:00 2001 From: Stephen Hu Date: Mon, 18 Aug 2025 10:00:27 +0800 Subject: [PATCH] Refactor: Use Input Length In DefaultRerank (#9516) ### What problem does this PR solve? 1. Use input length to prepare res 2. Adjust torch_empty_cache code location ### Type of change - [x] Refactoring - [x] Performance Improvement --- rag/llm/rerank_model.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rag/llm/rerank_model.py b/rag/llm/rerank_model.py index 67155ee8d..3dca735e4 100644 --- a/rag/llm/rerank_model.py +++ b/rag/llm/rerank_model.py @@ -100,7 +100,7 @@ class DefaultRerank(Base): old_dynamic_batch_size = self._dynamic_batch_size if max_batch_size is not None: self._dynamic_batch_size = max_batch_size - res = np.array([], dtype=float) + res = np.array(len(pairs), dtype=float) i = 0 while i < len(pairs): cur_i = i @@ -111,7 +111,7 @@ class DefaultRerank(Base): try: # call subclass implemented batch processing calculation batch_scores = self._compute_batch_scores(pairs[i : i + current_batch]) - res = np.append(res, batch_scores) + res[i : i + current_batch] = batch_scores i += current_batch self._dynamic_batch_size = min(self._dynamic_batch_size * 2, 8) break @@ -125,8 +125,8 @@ class DefaultRerank(Base): raise if retry_count >= max_retries: raise RuntimeError("max retry times, still cannot process batch, please check your GPU memory") - self.torch_empty_cache() - + + self.torch_empty_cache() self._dynamic_batch_size = old_dynamic_batch_size return np.array(res)