Fix: parsing excel with chartsheet & Clamp begin to a minimum of 0 to prevent negative indexing (#10819)

### What problem does this PR solve? Fix: parsing excel with chartsheet #10815 Fix: Clamp begin to a minimum of 0 to prevent negative indexing #10804 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
2026-02-01 16:15:07 +08:00 · 2025-10-28 09:40:37 +08:00
parent 850e119a81
commit e59458c36b
3 changed files with 27 additions and 6 deletions
--- a/deepdoc/parser/excel_parser.py
+++ b/deepdoc/parser/excel_parser.py
@ -123,7 +123,12 @@ class RAGFlowExcelParser:
        for sheetname in wb.sheetnames:
            ws = wb[sheetname]
-            rows = list(ws.rows)
+            try:
                rows = list(ws.rows)
            except Exception as e:
                logging.warning(f"Skip sheet '{sheetname}' due to rows access error: {e}")
                continue
            if not rows:
                continue
@ -170,7 +175,11 @@ class RAGFlowExcelParser:
        res = []
        for sheetname in wb.sheetnames:
            ws = wb[sheetname]
-            rows = list(ws.rows)
+            try:
                rows = list(ws.rows)
            except Exception as e:
                logging.warning(f"Skip sheet '{sheetname}' due to rows access error: {e}")
                continue
            if not rows:
                continue
            ti = list(rows[0])
@ -193,9 +202,14 @@ class RAGFlowExcelParser:
        if fnm.split(".")[-1].lower().find("xls") >= 0:
            wb = RAGFlowExcelParser._load_excel_to_workbook(BytesIO(binary))
            total = 0
            for sheetname in wb.sheetnames:
-                ws = wb[sheetname]
+               try:
-                total += len(list(ws.rows))
+                   ws = wb[sheetname]
                   total += len(list(ws.rows))
               except Exception as e:
                   logging.warning(f"Skip sheet '{sheetname}' due to rows access error: {e}")
                   continue
            return total
        if fnm.split(".")[-1].lower() in ["csv", "txt"]:
--- a/rag/app/table.py
+++ b/rag/app/table.py
@ -15,6 +15,7 @@
 #
 import copy
 import logging
 import re
 from io import BytesIO
 from xpinyin import Pinyin
@ -44,7 +45,11 @@ class Excel(ExcelParser):
        rn = 0
        for sheetname in wb.sheetnames:
            ws = wb[sheetname]
-            rows = list(ws.rows)
+            try:
                rows = list(ws.rows)
            except Exception as e:
                logging.warning(f"Skip sheet '{sheetname}' due to rows access error: {e}")
                continue
            if not rows:
                continue
            headers, header_rows = self._parse_headers(ws, rows)
--- a/rag/nlp/search.py
+++ b/rag/nlp/search.py
@ -395,7 +395,9 @@ class Dealer:
                tsim = sim
                vsim = sim
        # Already paginated in search function
-        begin = ((page % (RERANK_LIMIT//page_size)) - 1) * page_size
+        max_pages = RERANK_LIMIT // page_size
        page_index = (page % max_pages) - 1
        begin = max(page_index * page_size, 0)
        sim = sim[begin : begin + page_size]
        sim_np = np.array(sim)
        idx = np.argsort(sim_np * -1)