mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-01-03 02:55:29 +08:00
Fix: tokenizer issue. (#11902)
#11786 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -33,6 +33,22 @@ class RagTokenizer(infinity.rag_tokenizer.RagTokenizer):
|
||||
return super().fine_grained_tokenize(tks)
|
||||
|
||||
|
||||
def is_chinese(s):
|
||||
return infinity.rag_tokenizer.is_chinese(s)
|
||||
|
||||
|
||||
def is_number(s):
|
||||
return infinity.rag_tokenizer.is_number(s)
|
||||
|
||||
|
||||
def is_alphabet(s):
|
||||
return infinity.rag_tokenizer.is_alphabet(s)
|
||||
|
||||
|
||||
def naive_qie(txt):
|
||||
return infinity.rag_tokenizer.naive_qie(txt)
|
||||
|
||||
|
||||
tokenizer = RagTokenizer()
|
||||
tokenize = tokenizer.tokenize
|
||||
fine_grained_tokenize = tokenizer.fine_grained_tokenize
|
||||
|
||||
@ -13,6 +13,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
@ -607,7 +608,7 @@ class Dealer:
|
||||
if not toc:
|
||||
return chunks
|
||||
|
||||
ids = relevant_chunks_with_toc(query, toc, chat_mdl, topn*2)
|
||||
ids = asyncio.run(relevant_chunks_with_toc(query, toc, chat_mdl, topn*2))
|
||||
if not ids:
|
||||
return chunks
|
||||
|
||||
|
||||
Reference in New Issue
Block a user