From 62d35b1b73aa967c9bbc2cf9a6613612fbe579b6 Mon Sep 17 00:00:00 2001 From: Lynn Date: Thu, 18 Sep 2025 16:28:03 +0800 Subject: [PATCH] Fix: handle zero (#10149) ### What problem does this PR solve? Handle zero and nan in calculate. #10125 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- deepdoc/parser/pdf_parser.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/deepdoc/parser/pdf_parser.py b/deepdoc/parser/pdf_parser.py index dea0a93ef..6311ecc7f 100644 --- a/deepdoc/parser/pdf_parser.py +++ b/deepdoc/parser/pdf_parser.py @@ -402,9 +402,12 @@ class RAGFlowPdfParser: self.boxes = bxs def _naive_vertical_merge(self, zoomin=3): + import math bxs = Recognizer.sort_Y_firstly(self.boxes, np.median(self.mean_height) / 3) column_width = np.median([b["x1"] - b["x0"] for b in self.boxes]) + if not column_width or math.isnan(column_width): + column_width = self.mean_width[0] self.column_num = int(self.page_images[0].size[0] / zoomin / column_width) if column_width < self.page_images[0].size[0] / zoomin / self.column_num: logging.info("Multi-column................... {} {}".format(column_width, self.page_images[0].size[0] / zoomin / self.column_num))