mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Edit chunk shall update instead of insert it (#3709)
### What problem does this PR solve? Edit chunk shall update instead of insert it. Close #3679 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -47,7 +47,7 @@ def corpNorm(nm, add_region=True):
|
||||
nm = re.sub(r"(计算机|技术|(技术|科技|网络)*有限公司|公司|有限|研发中心|中国|总部)$", "", nm, 10000, re.IGNORECASE)
|
||||
if not nm or (len(nm)<5 and not regions.isName(nm[0:2])):return nm
|
||||
|
||||
tks = rag_tokenizer.tokenize(nm).split(" ")
|
||||
tks = rag_tokenizer.tokenize(nm).split()
|
||||
reg = [t for i,t in enumerate(tks) if regions.isName(t) and (t != "中国" or i > 0)]
|
||||
nm = ""
|
||||
for t in tks:
|
||||
|
||||
@ -44,7 +44,7 @@ loadRank(os.path.join(current_file_path, "res/school.rank.csv"))
|
||||
|
||||
def split(txt):
|
||||
tks = []
|
||||
for t in re.sub(r"[ \t]+", " ",txt).split(" "):
|
||||
for t in re.sub(r"[ \t]+", " ",txt).split():
|
||||
if tks and re.match(r".*[a-zA-Z]$", tks[-1]) and \
|
||||
re.match(r"[a-zA-Z]", t) and tks:
|
||||
tks[-1] = tks[-1] + " " + t
|
||||
|
||||
Reference in New Issue
Block a user