add sql to naive parser (#1908)

### What problem does this PR solve?


### Type of change

- [ ] Bug Fix (non-breaking change which fixes an issue)
- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Kevin Hu
2024-08-12 15:29:33 +08:00
committed by GitHub
parent cd861e3653
commit cafdee536f
3 changed files with 14 additions and 8 deletions

View File

@ -224,9 +224,11 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
excel_parser = ExcelParser()
sections = [(l, "") for l in excel_parser.html(binary) if l]
elif re.search(r"\.(txt|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt)$", filename, re.IGNORECASE):
elif re.search(r"\.(txt|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|sql)$", filename, re.IGNORECASE):
callback(0.1, "Start to parse.")
sections = TxtParser()(filename,binary,parser_config.get("chunk_token_num", 128))
sections = TxtParser()(filename,binary,
parser_config.get("chunk_token_num", 128),
parser_config.get("delimiter", "\n!?;。;!?"))
callback(0.8, "Finish parsing.")
elif re.search(r"\.(md|markdown)$", filename, re.IGNORECASE):