mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Split Excel file into different chunks (#847)
### What problem does this PR solve? Split Excel into different chunk ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -134,7 +134,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
elif re.search(r"\.xlsx?$", filename, re.IGNORECASE):
|
||||
callback(0.1, "Start to parse.")
|
||||
excel_parser = ExcelParser()
|
||||
sections = [(excel_parser.html(binary), "")]
|
||||
sections = [(l, "") for l in excel_parser.html(binary) if l]
|
||||
|
||||
elif re.search(r"\.(txt|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt)$", filename, re.IGNORECASE):
|
||||
callback(0.1, "Start to parse.")
|
||||
|
||||
@ -78,7 +78,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
elif re.search(r"\.xlsx?$", filename, re.IGNORECASE):
|
||||
callback(0.1, "Start to parse.")
|
||||
excel_parser = ExcelParser()
|
||||
sections = [excel_parser.html(binary)]
|
||||
sections = excel_parser.html(binary , 10000000)
|
||||
|
||||
elif re.search(r"\.txt$", filename, re.IGNORECASE):
|
||||
callback(0.1, "Start to parse.")
|
||||
|
||||
Reference in New Issue
Block a user