Refactor: Use re compile for weight method (#9929)

### What problem does this PR solve?

Use re compile for the weight method

### Type of change

- [x] Refactoring
- [x] Performance Improvement
This commit is contained in:
Stephen Hu
2025-09-05 12:29:44 +08:00
committed by GitHub
parent 677c99b090
commit 4e16936fa4

View File

@ -160,15 +160,15 @@ class Dealer:
return tks return tks
def weights(self, tks, preprocess=True): def weights(self, tks, preprocess=True):
def skill(t): num_pattern = re.compile(r"[0-9,.]{2,}$")
if t not in self.sk: short_letter_pattern = re.compile(r"[a-z]{1,2}$")
return 1 num_space_pattern = re.compile(r"[0-9. -]{2,}$")
return 6 letter_pattern = re.compile(r"[a-z. -]+$")
def ner(t): def ner(t):
if re.match(r"[0-9,.]{2,}$", t): if num_pattern.match(t):
return 2 return 2
if re.match(r"[a-z]{1,2}$", t): if short_letter_pattern.match(t):
return 0.01 return 0.01
if not self.ne or t not in self.ne: if not self.ne or t not in self.ne:
return 1 return 1
@ -189,10 +189,10 @@ class Dealer:
return 1 return 1
def freq(t): def freq(t):
if re.match(r"[0-9. -]{2,}$", t): if num_space_pattern.match(t):
return 3 return 3
s = rag_tokenizer.freq(t) s = rag_tokenizer.freq(t)
if not s and re.match(r"[a-z. -]+$", t): if not s and letter_pattern.match(t):
return 300 return 300
if not s: if not s:
s = 0 s = 0
@ -207,11 +207,11 @@ class Dealer:
return max(s, 10) return max(s, 10)
def df(t): def df(t):
if re.match(r"[0-9. -]{2,}$", t): if num_space_pattern.match(t):
return 5 return 5
if t in self.df: if t in self.df:
return self.df[t] + 3 return self.df[t] + 3
elif re.match(r"[a-z. -]+$", t): elif letter_pattern.match(t):
return 300 return 300
elif len(t) >= 4: elif len(t) >= 4:
s = [tt for tt in rag_tokenizer.fine_grained_tokenize(t).split() if len(tt) > 1] s = [tt for tt in rag_tokenizer.fine_grained_tokenize(t).split() if len(tt) > 1]