Fix: parsing excel with chartsheet & Clamp begin to a minimum of 0 to prevent negative indexing (#10819)

### What problem does this PR solve?

Fix: parsing excel with chartsheet #10815

Fix: Clamp begin to a minimum of 0 to prevent negative indexing #10804
### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Billy Bao
2025-10-28 09:40:37 +08:00
committed by GitHub
parent 850e119a81
commit e59458c36b
3 changed files with 27 additions and 6 deletions

View File

@ -123,7 +123,12 @@ class RAGFlowExcelParser:
for sheetname in wb.sheetnames: for sheetname in wb.sheetnames:
ws = wb[sheetname] ws = wb[sheetname]
rows = list(ws.rows) try:
rows = list(ws.rows)
except Exception as e:
logging.warning(f"Skip sheet '{sheetname}' due to rows access error: {e}")
continue
if not rows: if not rows:
continue continue
@ -170,7 +175,11 @@ class RAGFlowExcelParser:
res = [] res = []
for sheetname in wb.sheetnames: for sheetname in wb.sheetnames:
ws = wb[sheetname] ws = wb[sheetname]
rows = list(ws.rows) try:
rows = list(ws.rows)
except Exception as e:
logging.warning(f"Skip sheet '{sheetname}' due to rows access error: {e}")
continue
if not rows: if not rows:
continue continue
ti = list(rows[0]) ti = list(rows[0])
@ -193,9 +202,14 @@ class RAGFlowExcelParser:
if fnm.split(".")[-1].lower().find("xls") >= 0: if fnm.split(".")[-1].lower().find("xls") >= 0:
wb = RAGFlowExcelParser._load_excel_to_workbook(BytesIO(binary)) wb = RAGFlowExcelParser._load_excel_to_workbook(BytesIO(binary))
total = 0 total = 0
for sheetname in wb.sheetnames: for sheetname in wb.sheetnames:
ws = wb[sheetname] try:
total += len(list(ws.rows)) ws = wb[sheetname]
total += len(list(ws.rows))
except Exception as e:
logging.warning(f"Skip sheet '{sheetname}' due to rows access error: {e}")
continue
return total return total
if fnm.split(".")[-1].lower() in ["csv", "txt"]: if fnm.split(".")[-1].lower() in ["csv", "txt"]:

View File

@ -15,6 +15,7 @@
# #
import copy import copy
import logging
import re import re
from io import BytesIO from io import BytesIO
from xpinyin import Pinyin from xpinyin import Pinyin
@ -44,7 +45,11 @@ class Excel(ExcelParser):
rn = 0 rn = 0
for sheetname in wb.sheetnames: for sheetname in wb.sheetnames:
ws = wb[sheetname] ws = wb[sheetname]
rows = list(ws.rows) try:
rows = list(ws.rows)
except Exception as e:
logging.warning(f"Skip sheet '{sheetname}' due to rows access error: {e}")
continue
if not rows: if not rows:
continue continue
headers, header_rows = self._parse_headers(ws, rows) headers, header_rows = self._parse_headers(ws, rows)

View File

@ -395,7 +395,9 @@ class Dealer:
tsim = sim tsim = sim
vsim = sim vsim = sim
# Already paginated in search function # Already paginated in search function
begin = ((page % (RERANK_LIMIT//page_size)) - 1) * page_size max_pages = RERANK_LIMIT // page_size
page_index = (page % max_pages) - 1
begin = max(page_index * page_size, 0)
sim = sim[begin : begin + page_size] sim = sim[begin : begin + page_size]
sim_np = np.array(sim) sim_np = np.array(sim)
idx = np.argsort(sim_np * -1) idx = np.argsort(sim_np * -1)