Fix: float transfer exception. (#6197)

### What problem does this PR solve?

#6177

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Kevin Hu
2025-03-18 11:13:44 +08:00
committed by GitHub
parent 222a2c8fa5
commit 1333d3c02a
5 changed files with 27 additions and 15 deletions

View File

@ -30,6 +30,8 @@ from docx import Document
from PIL import Image
from markdown import markdown
from rag.utils import get_float
class Excel(ExcelParser):
def __call__(self, fnm, binary=None, callback=None):
@ -126,8 +128,8 @@ class Pdf(PdfParser):
section, line_tag = box['text'], self._line_tag(box, zoomin)
has_bull, index = has_qbullet(reg, box, last_box, last_index, last_bull, bull_x0_list)
last_box, last_index, last_bull = box, index, has_bull
line_pn = float(line_tag.lstrip('@@').split('\t')[0])
line_top = float(line_tag.rstrip('##').split('\t')[3])
line_pn = get_float(line_tag.lstrip('@@').split('\t')[0])
line_top = get_float(line_tag.rstrip('##').split('\t')[3])
tbl_pn, tbl_left, tbl_right, tbl_top, tbl_bottom, tbl_tag, tbl_text = self.get_tbls_info(tbls, tbl_index)
if not has_bull: # No question bullet
if not last_q: