mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-02-09 12:25:06 +08:00
Refactor: rename rmSpace to remove_redundant_spaces (#10796)
### What problem does this PR solve? - rename rmSpace to remove_redundant_spaces - move clean_markdown_block to common module - add unit tests for remove_redundant_spaces and clean_markdown_block ### Type of change - [x] Refactoring --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
@ -35,7 +35,7 @@ from rag.app.tag import label_question
|
||||
from rag.nlp import rag_tokenizer, search
|
||||
from rag.prompts.generator import gen_meta_filter, cross_languages, keyword_extraction
|
||||
from rag.settings import PAGERANK_FLD
|
||||
from rag.utils import rmSpace
|
||||
from common.string_utils import remove_redundant_spaces
|
||||
|
||||
|
||||
@manager.route('/list', methods=['POST']) # noqa: F821
|
||||
@ -65,7 +65,7 @@ def list_chunk():
|
||||
for id in sres.ids:
|
||||
d = {
|
||||
"chunk_id": id,
|
||||
"content_with_weight": rmSpace(sres.highlight[id]) if question and id in sres.highlight else sres.field[
|
||||
"content_with_weight": remove_redundant_spaces(sres.highlight[id]) if question and id in sres.highlight else sres.field[
|
||||
id].get(
|
||||
"content_with_weight", ""),
|
||||
"doc_id": sres.field[id]["doc_id"],
|
||||
|
||||
@ -41,8 +41,8 @@ from rag.app.qa import beAdoc, rmPrefix
|
||||
from rag.app.tag import label_question
|
||||
from rag.nlp import rag_tokenizer, search
|
||||
from rag.prompts.generator import cross_languages, keyword_extraction
|
||||
from rag.utils import rmSpace
|
||||
from rag.utils.storage_factory import STORAGE_IMPL
|
||||
from common.string_utils import remove_redundant_spaces
|
||||
|
||||
MAXIMUM_OF_UPLOADING_FILES = 256
|
||||
|
||||
@ -1000,7 +1000,7 @@ def list_chunks(tenant_id, dataset_id, document_id):
|
||||
for id in sres.ids:
|
||||
d = {
|
||||
"id": id,
|
||||
"content": (rmSpace(sres.highlight[id]) if question and id in sres.highlight else sres.field[id].get("content_with_weight", "")),
|
||||
"content": (remove_redundant_spaces(sres.highlight[id]) if question and id in sres.highlight else sres.field[id].get("content_with_weight", "")),
|
||||
"document_id": sres.field[id]["doc_id"],
|
||||
"docnm_kwd": sres.field[id]["docnm_kwd"],
|
||||
"important_keywords": sres.field[id].get("important_kwd", []),
|
||||
|
||||
@ -41,8 +41,9 @@ from rag.app.tag import label_question
|
||||
from rag.nlp.search import index_name
|
||||
from rag.prompts.generator import chunks_format, citation_prompt, cross_languages, full_question, kb_prompt, keyword_extraction, message_fit_in, \
|
||||
gen_meta_filter, PROMPT_JINJA_ENV, ASK_SUMMARY
|
||||
from rag.utils import num_tokens_from_string, rmSpace
|
||||
from rag.utils import num_tokens_from_string
|
||||
from rag.utils.tavily_conn import Tavily
|
||||
from common.string_utils import remove_redundant_spaces
|
||||
|
||||
|
||||
class DialogService(CommonService):
|
||||
@ -706,7 +707,7 @@ Please write the SQL, only SQL, without any other explanations or text.
|
||||
|
||||
line = "|" + "|".join(["------" for _ in range(len(column_idx))]) + ("|------|" if docid_idx and docid_idx else "")
|
||||
|
||||
rows = ["|" + "|".join([rmSpace(str(r[i])) for i in column_idx]).replace("None", " ") + "|" for r in tbl["rows"]]
|
||||
rows = ["|" + "|".join([remove_redundant_spaces(str(r[i])) for i in column_idx]).replace("None", " ") + "|" for r in tbl["rows"]]
|
||||
rows = [r for r in rows if re.sub(r"[ |]+", "", r)]
|
||||
if quota:
|
||||
rows = "\n".join([r + f" ##{ii}$$ |" for ii, r in enumerate(rows)])
|
||||
|
||||
Reference in New Issue
Block a user