mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Fix: parsing excel with chartsheet & Clamp begin to a minimum of 0 to prevent negative indexing (#10819)
### What problem does this PR solve? Fix: parsing excel with chartsheet #10815 Fix: Clamp begin to a minimum of 0 to prevent negative indexing #10804 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -123,7 +123,12 @@ class RAGFlowExcelParser:
|
||||
|
||||
for sheetname in wb.sheetnames:
|
||||
ws = wb[sheetname]
|
||||
try:
|
||||
rows = list(ws.rows)
|
||||
except Exception as e:
|
||||
logging.warning(f"Skip sheet '{sheetname}' due to rows access error: {e}")
|
||||
continue
|
||||
|
||||
if not rows:
|
||||
continue
|
||||
|
||||
@ -170,7 +175,11 @@ class RAGFlowExcelParser:
|
||||
res = []
|
||||
for sheetname in wb.sheetnames:
|
||||
ws = wb[sheetname]
|
||||
try:
|
||||
rows = list(ws.rows)
|
||||
except Exception as e:
|
||||
logging.warning(f"Skip sheet '{sheetname}' due to rows access error: {e}")
|
||||
continue
|
||||
if not rows:
|
||||
continue
|
||||
ti = list(rows[0])
|
||||
@ -193,9 +202,14 @@ class RAGFlowExcelParser:
|
||||
if fnm.split(".")[-1].lower().find("xls") >= 0:
|
||||
wb = RAGFlowExcelParser._load_excel_to_workbook(BytesIO(binary))
|
||||
total = 0
|
||||
|
||||
for sheetname in wb.sheetnames:
|
||||
try:
|
||||
ws = wb[sheetname]
|
||||
total += len(list(ws.rows))
|
||||
except Exception as e:
|
||||
logging.warning(f"Skip sheet '{sheetname}' due to rows access error: {e}")
|
||||
continue
|
||||
return total
|
||||
|
||||
if fnm.split(".")[-1].lower() in ["csv", "txt"]:
|
||||
|
||||
@ -15,6 +15,7 @@
|
||||
#
|
||||
|
||||
import copy
|
||||
import logging
|
||||
import re
|
||||
from io import BytesIO
|
||||
from xpinyin import Pinyin
|
||||
@ -44,7 +45,11 @@ class Excel(ExcelParser):
|
||||
rn = 0
|
||||
for sheetname in wb.sheetnames:
|
||||
ws = wb[sheetname]
|
||||
try:
|
||||
rows = list(ws.rows)
|
||||
except Exception as e:
|
||||
logging.warning(f"Skip sheet '{sheetname}' due to rows access error: {e}")
|
||||
continue
|
||||
if not rows:
|
||||
continue
|
||||
headers, header_rows = self._parse_headers(ws, rows)
|
||||
|
||||
@ -395,7 +395,9 @@ class Dealer:
|
||||
tsim = sim
|
||||
vsim = sim
|
||||
# Already paginated in search function
|
||||
begin = ((page % (RERANK_LIMIT//page_size)) - 1) * page_size
|
||||
max_pages = RERANK_LIMIT // page_size
|
||||
page_index = (page % max_pages) - 1
|
||||
begin = max(page_index * page_size, 0)
|
||||
sim = sim[begin : begin + page_size]
|
||||
sim_np = np.array(sim)
|
||||
idx = np.argsort(sim_np * -1)
|
||||
|
||||
Reference in New Issue
Block a user