mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Synchronize with enterprise version (#4325)
### Type of change - [x] Refactoring
This commit is contained in:
@ -9,7 +9,7 @@ def chunk(filename, binary, tenant_id, from_page=0, to_page=100000,
|
||||
lang="Chinese", callback=None, **kwargs):
|
||||
parser_config = kwargs.get(
|
||||
"parser_config", {
|
||||
"chunk_token_num": 512, "delimiter": "\n!?。;!?", "layout_recognize": True})
|
||||
"chunk_token_num": 512, "delimiter": "\n!?;。;!?", "layout_recognize": True})
|
||||
eng = lang.lower() == "english"
|
||||
|
||||
parser_config["layout_recognize"] = True
|
||||
@ -29,4 +29,4 @@ def chunk(filename, binary, tenant_id, from_page=0, to_page=100000,
|
||||
doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"])
|
||||
chunks.extend(tokenize_chunks(sections, doc, eng))
|
||||
|
||||
return chunks
|
||||
return chunks
|
||||
|
||||
Reference in New Issue
Block a user