mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Refactor parser code (#9042)
### What problem does this PR solve? Refactor code ### Type of change - [x] Refactoring Signed-off-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
@ -34,7 +34,7 @@ class RAGFlowExcelParser:
|
||||
file_like_object.seek(0)
|
||||
|
||||
if not (file_head.startswith(b'PK\x03\x04') or file_head.startswith(b'\xD0\xCF\x11\xE0')):
|
||||
logging.info("****wxy: Not an Excel file, converting CSV to Excel Workbook")
|
||||
logging.info("Not an Excel file, converting CSV to Excel Workbook")
|
||||
|
||||
try:
|
||||
file_like_object.seek(0)
|
||||
@ -42,18 +42,18 @@ class RAGFlowExcelParser:
|
||||
return RAGFlowExcelParser._dataframe_to_workbook(df)
|
||||
|
||||
except Exception as e_csv:
|
||||
raise Exception(f"****wxy: Failed to parse CSV and convert to Excel Workbook: {e_csv}")
|
||||
raise Exception(f"Failed to parse CSV and convert to Excel Workbook: {e_csv}")
|
||||
|
||||
try:
|
||||
return load_workbook(file_like_object,data_only= True)
|
||||
except Exception as e:
|
||||
logging.info(f"****wxy: openpyxl load error: {e}, try pandas instead")
|
||||
logging.info(f"openpyxl load error: {e}, try pandas instead")
|
||||
try:
|
||||
file_like_object.seek(0)
|
||||
df = pd.read_excel(file_like_object)
|
||||
return RAGFlowExcelParser._dataframe_to_workbook(df)
|
||||
except Exception as e_pandas:
|
||||
raise Exception(f"****wxy: pandas.read_excel error: {e_pandas}, original openpyxl error: {e}")
|
||||
raise Exception(f"pandas.read_excel error: {e_pandas}, original openpyxl error: {e}")
|
||||
|
||||
@staticmethod
|
||||
def _dataframe_to_workbook(df):
|
||||
|
||||
Reference in New Issue
Block a user