mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-02-06 10:35:06 +08:00
Feat: support verify to set llm key and boost bigrams. (#12980)
#12863 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -55,13 +55,11 @@ class FulltextQueryer(QueryBase):
|
||||
keywords = [t for t in tks if t]
|
||||
tks_w = self.tw.weights(tks, preprocess=False)
|
||||
tks_w = [(re.sub(r"[ \\\"'^]", "", tk), w) for tk, w in tks_w]
|
||||
tks_w = [(re.sub(r"^[a-z0-9]$", "", tk), w) for tk, w in tks_w if tk]
|
||||
tks_w = [(re.sub(r"^[\+-]", "", tk), w) for tk, w in tks_w if tk]
|
||||
tks_w = [(tk.strip(), w) for tk, w in tks_w if tk.strip()]
|
||||
syns = []
|
||||
for tk, w in tks_w[:256]:
|
||||
syn = self.syn.lookup(tk)
|
||||
syn = rag_tokenizer.tokenize(" ".join(syn)).split()
|
||||
syn = [rag_tokenizer.tokenize(s) for s in self.syn.lookup(tk)]
|
||||
keywords.extend(syn)
|
||||
syn = ["\"{}\"^{:.4f}".format(s, w / 4.) for s in syn if s.strip()]
|
||||
syns.append(" ".join(syn))
|
||||
@ -190,7 +188,10 @@ class FulltextQueryer(QueryBase):
|
||||
d = defaultdict(int)
|
||||
wts = self.tw.weights(tks, preprocess=False)
|
||||
for i, (t, c) in enumerate(wts):
|
||||
d[t] += c
|
||||
d[t] += c * 0.4
|
||||
if i+1 < len(wts):
|
||||
_t, _c = wts[i+1]
|
||||
d[t+_t] += max(c, _c) * 0.6
|
||||
return d
|
||||
|
||||
atks = to_dict(atks)
|
||||
|
||||
Reference in New Issue
Block a user