From e59458c36bd8b1492ced5d1509b4843cb92e15dd Mon Sep 17 00:00:00 2001
From: Billy Bao <newyorkupperbay@gmail.com>
Date: Tue, 28 Oct 2025 09:40:37 +0800
Subject: [PATCH] Fix: parsing excel with chartsheet & Clamp begin to a minimum
 of 0 to prevent negative indexing (#10819)

### What problem does this PR solve?

Fix: parsing excel with chartsheet #10815

Fix: Clamp begin to a minimum of 0 to prevent negative indexing #10804
### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
---
 deepdoc/parser/excel_parser.py | 22 ++++++++++++++++++----
 rag/app/table.py               |  7 ++++++-
 rag/nlp/search.py              |  4 +++-
 3 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/deepdoc/parser/excel_parser.py b/deepdoc/parser/excel_parser.py
index 4d0496a33..868fc5f41 100644
--- a/deepdoc/parser/excel_parser.py
+++ b/deepdoc/parser/excel_parser.py
@@ -123,7 +123,12 @@ class RAGFlowExcelParser:
 
         for sheetname in wb.sheetnames:
             ws = wb[sheetname]
-            rows = list(ws.rows)
+            try:
+                rows = list(ws.rows)
+            except Exception as e:
+                logging.warning(f"Skip sheet '{sheetname}' due to rows access error: {e}")
+                continue
+
             if not rows:
                 continue
 
@@ -170,7 +175,11 @@ class RAGFlowExcelParser:
         res = []
         for sheetname in wb.sheetnames:
             ws = wb[sheetname]
-            rows = list(ws.rows)
+            try:
+                rows = list(ws.rows)
+            except Exception as e:
+                logging.warning(f"Skip sheet '{sheetname}' due to rows access error: {e}")
+                continue
             if not rows:
                 continue
             ti = list(rows[0])
@@ -193,9 +202,14 @@ class RAGFlowExcelParser:
         if fnm.split(".")[-1].lower().find("xls") >= 0:
             wb = RAGFlowExcelParser._load_excel_to_workbook(BytesIO(binary))
             total = 0
+            
             for sheetname in wb.sheetnames:
-                ws = wb[sheetname]
-                total += len(list(ws.rows))
+               try:
+                   ws = wb[sheetname]
+                   total += len(list(ws.rows))
+               except Exception as e:
+                   logging.warning(f"Skip sheet '{sheetname}' due to rows access error: {e}")
+                   continue
             return total
 
         if fnm.split(".")[-1].lower() in ["csv", "txt"]:
diff --git a/rag/app/table.py b/rag/app/table.py
index b0c3e5bc2..7a21a738a 100644
--- a/rag/app/table.py
+++ b/rag/app/table.py
@@ -15,6 +15,7 @@
 #
 
 import copy
+import logging
 import re
 from io import BytesIO
 from xpinyin import Pinyin
@@ -44,7 +45,11 @@ class Excel(ExcelParser):
         rn = 0
         for sheetname in wb.sheetnames:
             ws = wb[sheetname]
-            rows = list(ws.rows)
+            try:
+                rows = list(ws.rows)
+            except Exception as e:
+                logging.warning(f"Skip sheet '{sheetname}' due to rows access error: {e}")
+                continue
             if not rows:
                 continue
             headers, header_rows = self._parse_headers(ws, rows)
diff --git a/rag/nlp/search.py b/rag/nlp/search.py
index 4256a638d..ecb22522f 100644
--- a/rag/nlp/search.py
+++ b/rag/nlp/search.py
@@ -395,7 +395,9 @@ class Dealer:
                 tsim = sim
                 vsim = sim
         # Already paginated in search function
-        begin = ((page % (RERANK_LIMIT//page_size)) - 1) * page_size
+        max_pages = RERANK_LIMIT // page_size
+        page_index = (page % max_pages) - 1
+        begin = max(page_index * page_size, 0)
         sim = sim[begin : begin + page_size]
         sim_np = np.array(sim)
         idx = np.argsort(sim_np * -1)