mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
enable 3 char words to finegrind tokenize (#2210)
### What problem does this PR solve? ### Type of change - [x] Performance Improvement
This commit is contained in:
@ -83,7 +83,7 @@ class EsQueryer:
|
|||||||
), tks
|
), tks
|
||||||
|
|
||||||
def need_fine_grained_tokenize(tk):
|
def need_fine_grained_tokenize(tk):
|
||||||
if len(tk) < 4:
|
if len(tk) < 3:
|
||||||
return False
|
return False
|
||||||
if re.match(r"[0-9a-z\.\+#_\*-]+$", tk):
|
if re.match(r"[0-9a-z\.\+#_\*-]+$", tk):
|
||||||
return False
|
return False
|
||||||
|
|||||||
Reference in New Issue
Block a user