Fix: handle zero (#10149)

### What problem does this PR solve?

Handle zero and nan in calculate.
#10125

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Lynn
2025-09-18 16:28:03 +08:00
committed by GitHub
parent 91b609447d
commit 62d35b1b73

View File

@ -402,9 +402,12 @@ class RAGFlowPdfParser:
self.boxes = bxs self.boxes = bxs
def _naive_vertical_merge(self, zoomin=3): def _naive_vertical_merge(self, zoomin=3):
import math
bxs = Recognizer.sort_Y_firstly(self.boxes, np.median(self.mean_height) / 3) bxs = Recognizer.sort_Y_firstly(self.boxes, np.median(self.mean_height) / 3)
column_width = np.median([b["x1"] - b["x0"] for b in self.boxes]) column_width = np.median([b["x1"] - b["x0"] for b in self.boxes])
if not column_width or math.isnan(column_width):
column_width = self.mean_width[0]
self.column_num = int(self.page_images[0].size[0] / zoomin / column_width) self.column_num = int(self.page_images[0].size[0] / zoomin / column_width)
if column_width < self.page_images[0].size[0] / zoomin / self.column_num: if column_width < self.page_images[0].size[0] / zoomin / self.column_num:
logging.info("Multi-column................... {} {}".format(column_width, self.page_images[0].size[0] / zoomin / self.column_num)) logging.info("Multi-column................... {} {}".format(column_width, self.page_images[0].size[0] / zoomin / self.column_num))