Add 'One' chunk method (#137)

This commit is contained in:
KevinHuSh
2024-03-20 18:57:22 +08:00
committed by GitHub
parent fce14ee187
commit 5875c8ba08
11 changed files with 143 additions and 24 deletions

View File

@ -84,6 +84,7 @@ def dispatch():
pages = PdfParser.total_page_number(r["name"], MINIO.get(r["kb_id"], r["location"]))
page_size = 5
if r["parser_id"] == "paper": page_size = 12
if r["parser_id"] == "one": page_size = 1000000000
for s,e in r["parser_config"].get("pages", [(0,100000)]):
e = min(e, pages)
for p in range(s, e, page_size):