Feat: support verify to set llm key and boost bigrams. (#12980)

#12863

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Kevin Hu
2026-02-05 19:19:09 +08:00
committed by GitHub
parent bbd8ba64a1
commit 1262533b74
5 changed files with 175 additions and 19 deletions

View File

@ -55,13 +55,11 @@ class FulltextQueryer(QueryBase):
keywords = [t for t in tks if t]
tks_w = self.tw.weights(tks, preprocess=False)
tks_w = [(re.sub(r"[ \\\"'^]", "", tk), w) for tk, w in tks_w]
tks_w = [(re.sub(r"^[a-z0-9]$", "", tk), w) for tk, w in tks_w if tk]
tks_w = [(re.sub(r"^[\+-]", "", tk), w) for tk, w in tks_w if tk]
tks_w = [(tk.strip(), w) for tk, w in tks_w if tk.strip()]
syns = []
for tk, w in tks_w[:256]:
syn = self.syn.lookup(tk)
syn = rag_tokenizer.tokenize(" ".join(syn)).split()
syn = [rag_tokenizer.tokenize(s) for s in self.syn.lookup(tk)]
keywords.extend(syn)
syn = ["\"{}\"^{:.4f}".format(s, w / 4.) for s in syn if s.strip()]
syns.append(" ".join(syn))
@ -190,7 +188,10 @@ class FulltextQueryer(QueryBase):
d = defaultdict(int)
wts = self.tw.weights(tks, preprocess=False)
for i, (t, c) in enumerate(wts):
d[t] += c
d[t] += c * 0.4
if i+1 < len(wts):
_t, _c = wts[i+1]
d[t+_t] += max(c, _c) * 0.6
return d
atks = to_dict(atks)