mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Fix: parsing excel with chartsheet & Clamp begin to a minimum of 0 to prevent negative indexing (#10819)
### What problem does this PR solve? Fix: parsing excel with chartsheet #10815 Fix: Clamp begin to a minimum of 0 to prevent negative indexing #10804 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -123,7 +123,12 @@ class RAGFlowExcelParser:
|
|||||||
|
|
||||||
for sheetname in wb.sheetnames:
|
for sheetname in wb.sheetnames:
|
||||||
ws = wb[sheetname]
|
ws = wb[sheetname]
|
||||||
rows = list(ws.rows)
|
try:
|
||||||
|
rows = list(ws.rows)
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"Skip sheet '{sheetname}' due to rows access error: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
if not rows:
|
if not rows:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@ -170,7 +175,11 @@ class RAGFlowExcelParser:
|
|||||||
res = []
|
res = []
|
||||||
for sheetname in wb.sheetnames:
|
for sheetname in wb.sheetnames:
|
||||||
ws = wb[sheetname]
|
ws = wb[sheetname]
|
||||||
rows = list(ws.rows)
|
try:
|
||||||
|
rows = list(ws.rows)
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"Skip sheet '{sheetname}' due to rows access error: {e}")
|
||||||
|
continue
|
||||||
if not rows:
|
if not rows:
|
||||||
continue
|
continue
|
||||||
ti = list(rows[0])
|
ti = list(rows[0])
|
||||||
@ -193,9 +202,14 @@ class RAGFlowExcelParser:
|
|||||||
if fnm.split(".")[-1].lower().find("xls") >= 0:
|
if fnm.split(".")[-1].lower().find("xls") >= 0:
|
||||||
wb = RAGFlowExcelParser._load_excel_to_workbook(BytesIO(binary))
|
wb = RAGFlowExcelParser._load_excel_to_workbook(BytesIO(binary))
|
||||||
total = 0
|
total = 0
|
||||||
|
|
||||||
for sheetname in wb.sheetnames:
|
for sheetname in wb.sheetnames:
|
||||||
ws = wb[sheetname]
|
try:
|
||||||
total += len(list(ws.rows))
|
ws = wb[sheetname]
|
||||||
|
total += len(list(ws.rows))
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"Skip sheet '{sheetname}' due to rows access error: {e}")
|
||||||
|
continue
|
||||||
return total
|
return total
|
||||||
|
|
||||||
if fnm.split(".")[-1].lower() in ["csv", "txt"]:
|
if fnm.split(".")[-1].lower() in ["csv", "txt"]:
|
||||||
|
|||||||
@ -15,6 +15,7 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
import copy
|
import copy
|
||||||
|
import logging
|
||||||
import re
|
import re
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from xpinyin import Pinyin
|
from xpinyin import Pinyin
|
||||||
@ -44,7 +45,11 @@ class Excel(ExcelParser):
|
|||||||
rn = 0
|
rn = 0
|
||||||
for sheetname in wb.sheetnames:
|
for sheetname in wb.sheetnames:
|
||||||
ws = wb[sheetname]
|
ws = wb[sheetname]
|
||||||
rows = list(ws.rows)
|
try:
|
||||||
|
rows = list(ws.rows)
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"Skip sheet '{sheetname}' due to rows access error: {e}")
|
||||||
|
continue
|
||||||
if not rows:
|
if not rows:
|
||||||
continue
|
continue
|
||||||
headers, header_rows = self._parse_headers(ws, rows)
|
headers, header_rows = self._parse_headers(ws, rows)
|
||||||
|
|||||||
@ -395,7 +395,9 @@ class Dealer:
|
|||||||
tsim = sim
|
tsim = sim
|
||||||
vsim = sim
|
vsim = sim
|
||||||
# Already paginated in search function
|
# Already paginated in search function
|
||||||
begin = ((page % (RERANK_LIMIT//page_size)) - 1) * page_size
|
max_pages = RERANK_LIMIT // page_size
|
||||||
|
page_index = (page % max_pages) - 1
|
||||||
|
begin = max(page_index * page_size, 0)
|
||||||
sim = sim[begin : begin + page_size]
|
sim = sim[begin : begin + page_size]
|
||||||
sim_np = np.array(sim)
|
sim_np = np.array(sim)
|
||||||
idx = np.argsort(sim_np * -1)
|
idx = np.argsort(sim_np * -1)
|
||||||
|
|||||||
Reference in New Issue
Block a user