mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 12:32:30 +08:00
Refactor: rename rmSpace to remove_redundant_spaces (#10796)
### What problem does this PR solve? - rename rmSpace to remove_redundant_spaces - move clean_markdown_block to common module - add unit tests for remove_redundant_spaces and clean_markdown_block ### Type of change - [x] Refactoring --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
@ -25,7 +25,7 @@ from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from rag.nlp import rag_tokenizer
|
||||
from deepdoc.parser.resume import refactor
|
||||
from deepdoc.parser.resume import step_one, step_two
|
||||
from rag.utils import rmSpace
|
||||
from common.string_utils import remove_redundant_spaces
|
||||
|
||||
forbidden_select_fields4resume = [
|
||||
"name_pinyin_kwd", "edu_first_fea_kwd", "degree_kwd", "sch_rank_kwd", "edu_fea_kwd"
|
||||
@ -130,7 +130,7 @@ def chunk(filename, binary=None, callback=None, **kwargs):
|
||||
if isinstance(v, list):
|
||||
v = v[0]
|
||||
if n.find("tks") > 0:
|
||||
v = rmSpace(v)
|
||||
v = remove_redundant_spaces(v)
|
||||
titles.append(str(v))
|
||||
doc = {
|
||||
"docnm_kwd": filename,
|
||||
@ -145,7 +145,7 @@ def chunk(filename, binary=None, callback=None, **kwargs):
|
||||
if isinstance(v, list):
|
||||
v = " ".join(v)
|
||||
if n.find("tks") > 0:
|
||||
v = rmSpace(v)
|
||||
v = remove_redundant_spaces(v)
|
||||
pairs.append((m, str(v)))
|
||||
|
||||
doc["content_with_weight"] = "\n".join(
|
||||
|
||||
Reference in New Issue
Block a user