refactor: docling parser will close bytes io (#12280)

### What problem does this PR solve?

docling parser will close bytes io

### Type of change

- [x] Refactoring
This commit is contained in:
Stephen Hu
2025-12-29 13:33:27 +08:00
committed by GitHub
parent 082c2ed11c
commit 0b5d1ebefa

View File

@ -78,14 +78,21 @@ class DoclingParser(RAGFlowPdfParser):
def __images__(self, fnm, zoomin: int = 1, page_from=0, page_to=600, callback=None): def __images__(self, fnm, zoomin: int = 1, page_from=0, page_to=600, callback=None):
self.page_from = page_from self.page_from = page_from
self.page_to = page_to self.page_to = page_to
bytes_io = None
try: try:
opener = pdfplumber.open(fnm) if isinstance(fnm, (str, PathLike)) else pdfplumber.open(BytesIO(fnm)) if not isinstance(fnm, (str, PathLike)):
bytes_io = BytesIO(fnm)
opener = pdfplumber.open(fnm) if isinstance(fnm, (str, PathLike)) else pdfplumber.open(bytes_io)
with opener as pdf: with opener as pdf:
pages = pdf.pages[page_from:page_to] pages = pdf.pages[page_from:page_to]
self.page_images = [p.to_image(resolution=72 * zoomin, antialias=True).original for p in pages] self.page_images = [p.to_image(resolution=72 * zoomin, antialias=True).original for p in pages]
except Exception as e: except Exception as e:
self.page_images = [] self.page_images = []
self.logger.exception(e) self.logger.exception(e)
finally:
if bytes_io:
bytes_io.close()
def _make_line_tag(self,bbox: _BBox) -> str: def _make_line_tag(self,bbox: _BBox) -> str:
if bbox is None: if bbox is None: