Feat: add CSV file parsing support (#5989)

### What problem does this PR solve?

Add CSV file parsing support #4552, #5849, #5870

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Yongteng Lei
2025-03-12 19:20:50 +08:00
committed by GitHub
parent d660f6b9a5
commit 7cd37c37cd
2 changed files with 43 additions and 18 deletions

View File

@ -240,7 +240,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
callback=callback)
res = tokenize_table(tables, doc, is_english)
elif re.search(r"\.xlsx?$", filename, re.IGNORECASE):
elif re.search(r"\.(csv|xlsx?)$", filename, re.IGNORECASE):
callback(0.1, "Start to parse.")
excel_parser = ExcelParser()
if parser_config.get("html4excel"):
@ -307,9 +307,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
if __name__ == "__main__":
import sys
def dummy(prog=None, msg=""):
pass
chunk(sys.argv[1], from_page=0, to_page=10, callback=dummy)