diff --git a/rag/llm/rerank_model.py b/rag/llm/rerank_model.py index 88fda1478..4c21fd270 100644 --- a/rag/llm/rerank_model.py +++ b/rag/llm/rerank_model.py @@ -101,9 +101,10 @@ class DefaultRerank(Base): old_dynamic_batch_size = self._dynamic_batch_size if max_batch_size is not None: self._dynamic_batch_size = max_batch_size - res = [] + res = np.array([], dtype=float) i = 0 while i < len(pairs): + cur_i = i current_batch = self._dynamic_batch_size max_retries = 5 retry_count = 0 @@ -111,7 +112,7 @@ class DefaultRerank(Base): try: # call subclass implemented batch processing calculation batch_scores = self._compute_batch_scores(pairs[i : i + current_batch]) - res.extend(batch_scores) + res = np.append(res, batch_scores) i += current_batch self._dynamic_batch_size = min(self._dynamic_batch_size * 2, 8) break @@ -119,6 +120,7 @@ class DefaultRerank(Base): if "CUDA out of memory" in str(e) and current_batch > self._min_batch_size: current_batch = max(current_batch // 2, self._min_batch_size) self.torch_empty_cache() + i = cur_i # reset i to the start of the current batch retry_count += 1 else: raise @@ -134,7 +136,7 @@ class DefaultRerank(Base): scores = self._model.compute_score(batch_pairs) else: scores = self._model.compute_score(batch_pairs, max_length=max_length) - scores = sigmoid(np.array(scores)).tolist() + scores = sigmoid(np.array(scores)) if not isinstance(scores, Iterable): scores = [scores] return scores