Use Infinity single-field-multi-index (#11444)

### What problem does this PR solve?

Use Infinity single-field-multi-index

### Type of change

- [x] Refactoring
- [x] Performance Improvement
This commit is contained in:
Zhichang Yu
2025-11-26 11:06:37 +08:00
committed by GitHub
parent a28c672695
commit 40e84ca41a
22 changed files with 577 additions and 140 deletions

View File

@ -17,7 +17,6 @@ import json
import logging
import re
import math
import os
from collections import OrderedDict
from dataclasses import dataclass
@ -28,6 +27,7 @@ from rag.utils.doc_store_conn import DocStoreConnection, MatchDenseExpr, FusionE
from common.string_utils import remove_redundant_spaces
from common.float_utils import get_float
from common.constants import PAGERANK_FLD, TAG_FLD
from common import settings
def index_name(uid): return f"ragflow_{uid}"
@ -120,7 +120,8 @@ class Dealer:
else:
matchDense = self.get_vector(qst, emb_mdl, topk, req.get("similarity", 0.1))
q_vec = matchDense.embedding_data
src.append(f"q_{len(q_vec)}_vec")
if not settings.DOC_ENGINE_INFINITY:
src.append(f"q_{len(q_vec)}_vec")
fusionExpr = FusionExpr("weighted_sum", topk, {"weights": "0.05,0.95"})
matchExprs = [matchText, matchDense, fusionExpr]
@ -405,8 +406,13 @@ class Dealer:
rank_feature=rank_feature,
)
else:
lower_case_doc_engine = os.getenv("DOC_ENGINE", "elasticsearch")
if lower_case_doc_engine in ["elasticsearch", "opensearch"]:
if settings.DOC_ENGINE_INFINITY:
# Don't need rerank here since Infinity normalizes each way score before fusion.
sim = [sres.field[id].get("_score", 0.0) for id in sres.ids]
sim = [s if s is not None else 0.0 for s in sim]
tsim = sim
vsim = sim
else:
# ElasticSearch doesn't normalize each way score before fusion.
sim, tsim, vsim = self.rerank(
sres,
@ -415,12 +421,6 @@ class Dealer:
vector_similarity_weight,
rank_feature=rank_feature,
)
else:
# Don't need rerank here since Infinity normalizes each way score before fusion.
sim = [sres.field[id].get("_score", 0.0) for id in sres.ids]
sim = [s if s is not None else 0.0 for s in sim]
tsim = sim
vsim = sim
sim_np = np.array(sim, dtype=np.float64)
if sim_np.size == 0: