Fix: possible memory leaks close #5277 (#5500)

### What problem does this PR solve?

close #5277 by make sure the file close

### Type of change

- [x] Performance Improvement

---------

Signed-off-by: yihong0618 <zouzou0208@gmail.com>
This commit is contained in:
yihong
2025-03-03 10:26:45 +08:00
committed by GitHub
parent d6836444c9
commit 8a2542157f
4 changed files with 9 additions and 1 deletions

View File

@ -950,7 +950,9 @@ class RAGFlowPdfParser:
try:
pdf = pdfplumber.open(
fnm) if not binary else pdfplumber.open(BytesIO(binary))
return len(pdf.pages)
total_page = len(pdf.pages)
pdf.close()
return total_page
except Exception:
logging.exception("total_page_number")
@ -996,8 +998,11 @@ class RAGFlowPdfParser:
dfs(outlines, 0)
except Exception as e:
logging.warning(f"Outlines exception: {e}")
finally:
self.pdf.close()
if not self.outlines:
logging.warning("Miss outlines")
logging.debug("Images converted.")
self.is_english = [re.search(r"[a-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}", "".join(