mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Fix: Excel2HTML can't support XLS(Excel 97-2003) (#10660)
### What problem does this PR solve? https://github.com/infiniflow/ragflow/issues/10602 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -54,8 +54,8 @@ class RAGFlowExcelParser:
|
||||
try:
|
||||
file_like_object.seek(0)
|
||||
try:
|
||||
df = pd.read_excel(file_like_object)
|
||||
return RAGFlowExcelParser._dataframe_to_workbook(df)
|
||||
dfs = pd.read_excel(file_like_object, sheet_name=None)
|
||||
return RAGFlowExcelParser._dataframe_to_workbook(dfs)
|
||||
except Exception as ex:
|
||||
logging.info(f"pandas with default engine load error: {ex}, try calamine instead")
|
||||
file_like_object.seek(0)
|
||||
@ -75,6 +75,10 @@ class RAGFlowExcelParser:
|
||||
|
||||
@staticmethod
|
||||
def _dataframe_to_workbook(df):
|
||||
# if contains multiple sheets use _dataframes_to_workbook
|
||||
if isinstance(df, dict) and len(df) > 1:
|
||||
return RAGFlowExcelParser._dataframes_to_workbook(df)
|
||||
|
||||
df = RAGFlowExcelParser._clean_dataframe(df)
|
||||
wb = Workbook()
|
||||
ws = wb.active
|
||||
@ -89,6 +93,22 @@ class RAGFlowExcelParser:
|
||||
|
||||
return wb
|
||||
|
||||
@staticmethod
|
||||
def _dataframes_to_workbook(dfs: dict):
|
||||
wb = Workbook()
|
||||
default_sheet = wb.active
|
||||
wb.remove(default_sheet)
|
||||
|
||||
for sheet_name, df in dfs.items():
|
||||
df = RAGFlowExcelParser._clean_dataframe(df)
|
||||
ws = wb.create_sheet(title=sheet_name)
|
||||
for col_num, column_name in enumerate(df.columns, 1):
|
||||
ws.cell(row=1, column=col_num, value=column_name)
|
||||
for row_num, row in enumerate(df.values, 2):
|
||||
for col_num, value in enumerate(row, 1):
|
||||
ws.cell(row=row_num, column=col_num, value=value)
|
||||
return wb
|
||||
|
||||
def html(self, fnm, chunk_rows=256):
|
||||
from html import escape
|
||||
|
||||
|
||||
Reference in New Issue
Block a user