Enlarge the term weight difference (#3435)

### What problem does this PR solve?


### Type of change

- [x] Performance Improvement
This commit is contained in:
Kevin Hu
2024-11-15 15:41:50 +08:00
committed by GitHub
parent 6d451dbe06
commit ca9e97d2f2
3 changed files with 15 additions and 2 deletions

View File

@ -66,7 +66,7 @@ class FulltextQueryer:
def question(self, txt, tbl="qa", min_match:float=0.6):
txt = re.sub(
r"[ :\r\n\t,,。??/`!&\^%%()^]+",
r"[ :\r\n\t,,。??/`!&\^%%()^\[\]]+",
" ",
rag_tokenizer.tradi2simp(rag_tokenizer.strQ2B(txt.lower())),
).strip()

View File

@ -228,6 +228,7 @@ class Dealer:
idf2 = np.array([idf(df(t), 1000000000) for t in tks])
wts = (0.3 * idf1 + 0.7 * idf2) * \
np.array([ner(t) * postag(t) for t in tks])
wts = [math.exp(s) for s in wts]
tw = list(zip(tks, wts))
else:
for tk in tks:
@ -236,6 +237,7 @@ class Dealer:
idf2 = np.array([idf(df(t), 1000000000) for t in tt])
wts = (0.3 * idf1 + 0.7 * idf2) * \
np.array([ner(t) * postag(t) for t in tt])
wts = [math.exp(s) for s in wts]
tw.extend(zip(tt, wts))
S = np.sum([s for _, s in tw])