mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Edit chunk shall update instead of insert it (#3709)
### What problem does this PR solve? Edit chunk shall update instead of insert it. Close #3679 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -74,7 +74,7 @@ class FulltextQueryer:
|
||||
|
||||
if not self.isChinese(txt):
|
||||
txt = FulltextQueryer.rmWWW(txt)
|
||||
tks = rag_tokenizer.tokenize(txt).split(" ")
|
||||
tks = rag_tokenizer.tokenize(txt).split()
|
||||
keywords = [t for t in tks if t]
|
||||
tks_w = self.tw.weights(tks, preprocess=False)
|
||||
tks_w = [(re.sub(r"[ \\\"'^]", "", tk), w) for tk, w in tks_w]
|
||||
@ -83,7 +83,7 @@ class FulltextQueryer:
|
||||
syns = []
|
||||
for tk, w in tks_w:
|
||||
syn = self.syn.lookup(tk)
|
||||
syn = rag_tokenizer.tokenize(" ".join(syn)).split(" ")
|
||||
syn = rag_tokenizer.tokenize(" ".join(syn)).split()
|
||||
keywords.extend(syn)
|
||||
syn = ["\"{}\"^{:.4f}".format(s, w / 4.) for s in syn]
|
||||
syns.append(" ".join(syn))
|
||||
@ -114,7 +114,7 @@ class FulltextQueryer:
|
||||
|
||||
txt = FulltextQueryer.rmWWW(txt)
|
||||
qs, keywords = [], []
|
||||
for tt in self.tw.split(txt)[:256]: # .split(" "):
|
||||
for tt in self.tw.split(txt)[:256]: # .split():
|
||||
if not tt:
|
||||
continue
|
||||
keywords.append(tt)
|
||||
@ -125,7 +125,7 @@ class FulltextQueryer:
|
||||
tms = []
|
||||
for tk, w in sorted(twts, key=lambda x: x[1] * -1):
|
||||
sm = (
|
||||
rag_tokenizer.fine_grained_tokenize(tk).split(" ")
|
||||
rag_tokenizer.fine_grained_tokenize(tk).split()
|
||||
if need_fine_grained_tokenize(tk)
|
||||
else []
|
||||
)
|
||||
@ -194,7 +194,7 @@ class FulltextQueryer:
|
||||
def toDict(tks):
|
||||
d = {}
|
||||
if isinstance(tks, str):
|
||||
tks = tks.split(" ")
|
||||
tks = tks.split()
|
||||
for t, c in self.tw.weights(tks, preprocess=False):
|
||||
if t not in d:
|
||||
d[t] = 0
|
||||
|
||||
Reference in New Issue
Block a user