mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Enlarge the term weight difference (#3435)
### What problem does this PR solve? ### Type of change - [x] Performance Improvement
This commit is contained in:
@ -66,7 +66,7 @@ class FulltextQueryer:
|
||||
|
||||
def question(self, txt, tbl="qa", min_match:float=0.6):
|
||||
txt = re.sub(
|
||||
r"[ :\r\n\t,,。??/`!!&\^%%()^]+",
|
||||
r"[ :\r\n\t,,。??/`!!&\^%%()^\[\]]+",
|
||||
" ",
|
||||
rag_tokenizer.tradi2simp(rag_tokenizer.strQ2B(txt.lower())),
|
||||
).strip()
|
||||
|
||||
@ -228,6 +228,7 @@ class Dealer:
|
||||
idf2 = np.array([idf(df(t), 1000000000) for t in tks])
|
||||
wts = (0.3 * idf1 + 0.7 * idf2) * \
|
||||
np.array([ner(t) * postag(t) for t in tks])
|
||||
wts = [math.exp(s) for s in wts]
|
||||
tw = list(zip(tks, wts))
|
||||
else:
|
||||
for tk in tks:
|
||||
@ -236,6 +237,7 @@ class Dealer:
|
||||
idf2 = np.array([idf(df(t), 1000000000) for t in tt])
|
||||
wts = (0.3 * idf1 + 0.7 * idf2) * \
|
||||
np.array([ner(t) * postag(t) for t in tt])
|
||||
wts = [math.exp(s) for s in wts]
|
||||
tw.extend(zip(tt, wts))
|
||||
|
||||
S = np.sum([s for _, s in tw])
|
||||
|
||||
Reference in New Issue
Block a user