Refa: PARALLEL_DEVICES is a static parameter. (#6168)

### What problem does this PR solve?


### Type of change

- [x] Refactoring
This commit is contained in:
Kevin Hu
2025-03-17 16:49:54 +08:00
committed by GitHub
parent 45fe02c8b3
commit 3a99c2b5f4
6 changed files with 29 additions and 28 deletions

View File

@ -128,8 +128,8 @@ class Docx(DocxParser):
class Pdf(PdfParser):
def __init__(self, parallel_devices = None):
super().__init__(parallel_devices)
def __init__(self):
super().__init__()
def __call__(self, filename, binary=None, from_page=0,
to_page=100000, zoomin=3, callback=None):
@ -197,7 +197,7 @@ class Markdown(MarkdownParser):
def chunk(filename, binary=None, from_page=0, to_page=100000,
lang="Chinese", callback=None, parallel_devices=None, **kwargs):
lang="Chinese", callback=None, **kwargs):
"""
Supported file formats are docx, pdf, excel, txt.
This method apply the naive ways to chunk files.
@ -237,7 +237,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
return res
elif re.search(r"\.pdf$", filename, re.IGNORECASE):
pdf_parser = Pdf(parallel_devices)
pdf_parser = Pdf()
if parser_config.get("layout_recognize", "DeepDOC") == "Plain Text":
pdf_parser = PlainParser()
sections, tables = pdf_parser(filename if not binary else binary, from_page=from_page, to_page=to_page,