fix plainPdf bugs (#152)

This commit is contained in:
KevinHuSh
2024-03-26 15:11:07 +08:00
committed by GitHub
parent 75f7c6da2f
commit da21320b88
13 changed files with 36 additions and 33 deletions

View File

@ -66,7 +66,7 @@ class Pdf(PdfParser):
class PlainPdf(PlainParser):
def __call__(self, filename, binary=None, from_page=0, to_page=100000, callback=None, **kwargs):
self.pdf = pdf2_read(filename if not binary else BytesIO(filename))
self.pdf = pdf2_read(filename if not binary else BytesIO(binary))
page_txt = []
for page in self.pdf.pages[from_page: to_page]:
page_txt.append(page.extract_text())