mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
refactor code (#583)
### What problem does this PR solve? ### Type of change - [x] Refactoring
This commit is contained in:
@ -1,6 +1,6 @@
|
||||
|
||||
|
||||
from .pdf_parser import HuParser as PdfParser, PlainParser
|
||||
from .docx_parser import HuDocxParser as DocxParser
|
||||
from .excel_parser import HuExcelParser as ExcelParser
|
||||
from .ppt_parser import HuPptParser as PptParser
|
||||
from .pdf_parser import RAGFlowPdfParser as PdfParser, PlainParser
|
||||
from .docx_parser import RAGFlowDocxParser as DocxParser
|
||||
from .excel_parser import RAGFlowExcelParser as ExcelParser
|
||||
from .ppt_parser import RAGFlowPptParser as PptParser
|
||||
|
||||
@ -7,7 +7,7 @@ from rag.nlp import huqie
|
||||
from io import BytesIO
|
||||
|
||||
|
||||
class HuDocxParser:
|
||||
class RAGFlowDocxParser:
|
||||
|
||||
def __extract_table_content(self, tb):
|
||||
df = []
|
||||
|
||||
@ -6,7 +6,7 @@ from io import BytesIO
|
||||
from rag.nlp import find_codec
|
||||
|
||||
|
||||
class HuExcelParser:
|
||||
class RAGFlowExcelParser:
|
||||
def html(self, fnm):
|
||||
if isinstance(fnm, str):
|
||||
wb = load_workbook(fnm)
|
||||
@ -74,5 +74,5 @@ class HuExcelParser:
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
psr = HuExcelParser()
|
||||
psr = RAGFlowExcelParser()
|
||||
psr(sys.argv[1])
|
||||
|
||||
@ -23,7 +23,7 @@ from huggingface_hub import snapshot_download
|
||||
logging.getLogger("pdfminer").setLevel(logging.WARNING)
|
||||
|
||||
|
||||
class HuParser:
|
||||
class RAGFlowPdfParser:
|
||||
def __init__(self):
|
||||
self.ocr = OCR()
|
||||
if hasattr(self, "model_speciess"):
|
||||
|
||||
@ -14,7 +14,7 @@ from io import BytesIO
|
||||
from pptx import Presentation
|
||||
|
||||
|
||||
class HuPptParser(object):
|
||||
class RAGFlowPptParser(object):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user