Refactor: rename rmSpace to remove_redundant_spaces (#10796)

### What problem does this PR solve?

- rename rmSpace to remove_redundant_spaces
- move clean_markdown_block to common module
- add unit tests for remove_redundant_spaces and clean_markdown_block

### Type of change

- [x] Refactoring

---------

Signed-off-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
Jin Hai
2025-10-28 09:46:32 +08:00
committed by GitHub
parent e59458c36b
commit 766d900a41
17 changed files with 604 additions and 75 deletions

View File

@ -23,10 +23,11 @@ from dataclasses import dataclass
from rag.prompts.generator import relevant_chunks_with_toc
from rag.settings import TAG_FLD, PAGERANK_FLD
from rag.utils import rmSpace, get_float
from rag.nlp import rag_tokenizer, query
import numpy as np
from rag.utils.doc_store_conn import DocStoreConnection, MatchDenseExpr, FusionExpr, OrderByExpr
from common.string_utils import remove_redundant_spaces
from common.float_utils import get_float
def index_name(uid): return f"ragflow_{uid}"
@ -342,7 +343,7 @@ class Dealer:
ins_tw.append(tks)
tksim = self.qryr.token_similarity(keywords, ins_tw)
vtsim, _ = rerank_mdl.similarity(query, [rmSpace(" ".join(tks)) for tks in ins_tw])
vtsim, _ = rerank_mdl.similarity(query, [remove_redundant_spaces(" ".join(tks)) for tks in ins_tw])
## For rank feature(tag_fea) scores.
rank_fea = self._rank_feature_scores(rank_feature, sres)
@ -442,7 +443,7 @@ class Dealer:
}
if highlight and sres.highlight:
if id in sres.highlight:
d["highlight"] = rmSpace(sres.highlight[id])
d["highlight"] = remove_redundant_spaces(sres.highlight[id])
else:
d["highlight"] = d["content_with_weight"]
ranks["chunks"].append(d)