mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Refactor: Use re compile for weight method (#9929)
### What problem does this PR solve? Use re compile for the weight method ### Type of change - [x] Refactoring - [x] Performance Improvement
This commit is contained in:
@ -160,15 +160,15 @@ class Dealer:
|
|||||||
return tks
|
return tks
|
||||||
|
|
||||||
def weights(self, tks, preprocess=True):
|
def weights(self, tks, preprocess=True):
|
||||||
def skill(t):
|
num_pattern = re.compile(r"[0-9,.]{2,}$")
|
||||||
if t not in self.sk:
|
short_letter_pattern = re.compile(r"[a-z]{1,2}$")
|
||||||
return 1
|
num_space_pattern = re.compile(r"[0-9. -]{2,}$")
|
||||||
return 6
|
letter_pattern = re.compile(r"[a-z. -]+$")
|
||||||
|
|
||||||
def ner(t):
|
def ner(t):
|
||||||
if re.match(r"[0-9,.]{2,}$", t):
|
if num_pattern.match(t):
|
||||||
return 2
|
return 2
|
||||||
if re.match(r"[a-z]{1,2}$", t):
|
if short_letter_pattern.match(t):
|
||||||
return 0.01
|
return 0.01
|
||||||
if not self.ne or t not in self.ne:
|
if not self.ne or t not in self.ne:
|
||||||
return 1
|
return 1
|
||||||
@ -189,10 +189,10 @@ class Dealer:
|
|||||||
return 1
|
return 1
|
||||||
|
|
||||||
def freq(t):
|
def freq(t):
|
||||||
if re.match(r"[0-9. -]{2,}$", t):
|
if num_space_pattern.match(t):
|
||||||
return 3
|
return 3
|
||||||
s = rag_tokenizer.freq(t)
|
s = rag_tokenizer.freq(t)
|
||||||
if not s and re.match(r"[a-z. -]+$", t):
|
if not s and letter_pattern.match(t):
|
||||||
return 300
|
return 300
|
||||||
if not s:
|
if not s:
|
||||||
s = 0
|
s = 0
|
||||||
@ -207,11 +207,11 @@ class Dealer:
|
|||||||
return max(s, 10)
|
return max(s, 10)
|
||||||
|
|
||||||
def df(t):
|
def df(t):
|
||||||
if re.match(r"[0-9. -]{2,}$", t):
|
if num_space_pattern.match(t):
|
||||||
return 5
|
return 5
|
||||||
if t in self.df:
|
if t in self.df:
|
||||||
return self.df[t] + 3
|
return self.df[t] + 3
|
||||||
elif re.match(r"[a-z. -]+$", t):
|
elif letter_pattern.match(t):
|
||||||
return 300
|
return 300
|
||||||
elif len(t) >= 4:
|
elif len(t) >= 4:
|
||||||
s = [tt for tt in rag_tokenizer.fine_grained_tokenize(t).split() if len(tt) > 1]
|
s = [tt for tt in rag_tokenizer.fine_grained_tokenize(t).split() if len(tt) > 1]
|
||||||
|
|||||||
Reference in New Issue
Block a user