Add task moduel, and pipline the task and every parser (#49)

This commit is contained in:
KevinHuSh
2024-01-31 19:57:45 +08:00
committed by GitHub
parent af3ef26977
commit 6224edcd1b
15 changed files with 369 additions and 237 deletions

View File

@ -1559,6 +1559,15 @@ class HuParser:
return "\n\n".join(res)
@staticmethod
def total_page_number(fnm, binary=None):
try:
pdf = pdfplumber.open(fnm) if not binary else pdfplumber.open(BytesIO(binary))
return len(pdf.pages)
except Exception as e:
pdf = fitz.open(fnm) if not binary else fitz.open(stream=fnm, filetype="pdf")
return len(pdf)
def __images__(self, fnm, zoomin=3, page_from=0, page_to=299):
self.lefted_chars = []
self.mean_height = []