Fix: tokenizer issue. (#11902)

#11786
### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Kevin Hu
2025-12-11 17:38:17 +08:00
committed by GitHub
parent 22a51a3868
commit ea4a5cd665
17 changed files with 141 additions and 216 deletions

View File

@ -33,6 +33,22 @@ class RagTokenizer(infinity.rag_tokenizer.RagTokenizer):
return super().fine_grained_tokenize(tks)
def is_chinese(s):
return infinity.rag_tokenizer.is_chinese(s)
def is_number(s):
return infinity.rag_tokenizer.is_number(s)
def is_alphabet(s):
return infinity.rag_tokenizer.is_alphabet(s)
def naive_qie(txt):
return infinity.rag_tokenizer.naive_qie(txt)
tokenizer = RagTokenizer()
tokenize = tokenizer.tokenize
fine_grained_tokenize = tokenizer.fine_grained_tokenize