mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
add rerank model (#969)
### What problem does this PR solve? feat: add rerank models to the project #724 #162 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -241,11 +241,14 @@ class RagTokenizer:
|
||||
|
||||
return self.score_(res[::-1])
|
||||
|
||||
def english_normalize_(self, tks):
|
||||
return [self.stemmer.stem(self.lemmatizer.lemmatize(t)) if re.match(r"[a-zA-Z_-]+$", t) else t for t in tks]
|
||||
|
||||
def tokenize(self, line):
|
||||
line = self._strQ2B(line).lower()
|
||||
line = self._tradi2simp(line)
|
||||
zh_num = len([1 for c in line if is_chinese(c)])
|
||||
if zh_num < len(line) * 0.2:
|
||||
if zh_num == 0:
|
||||
return " ".join([self.stemmer.stem(self.lemmatizer.lemmatize(t)) for t in word_tokenize(line)])
|
||||
|
||||
arr = re.split(self.SPLIT_CHAR, line)
|
||||
@ -293,7 +296,7 @@ class RagTokenizer:
|
||||
|
||||
i = e + 1
|
||||
|
||||
res = " ".join(res)
|
||||
res = " ".join(self.english_normalize_(res))
|
||||
if self.DEBUG:
|
||||
print("[TKS]", self.merge_(res))
|
||||
return self.merge_(res)
|
||||
@ -336,7 +339,7 @@ class RagTokenizer:
|
||||
|
||||
res.append(stk)
|
||||
|
||||
return " ".join(res)
|
||||
return " ".join(self.english_normalize_(res))
|
||||
|
||||
|
||||
def is_chinese(s):
|
||||
|
||||
Reference in New Issue
Block a user