refactor code (#583)

### What problem does this PR solve?

### Type of change

- [x] Refactoring
This commit is contained in:
KevinHuSh
2024-04-28 13:19:54 +08:00
committed by GitHub
parent aadb9cbec8
commit 9d60a84958
25 changed files with 48 additions and 525 deletions

View File

@ -1,6 +1,6 @@
from .pdf_parser import HuParser as PdfParser, PlainParser
from .docx_parser import HuDocxParser as DocxParser
from .excel_parser import HuExcelParser as ExcelParser
from .ppt_parser import HuPptParser as PptParser
from .pdf_parser import RAGFlowPdfParser as PdfParser, PlainParser
from .docx_parser import RAGFlowDocxParser as DocxParser
from .excel_parser import RAGFlowExcelParser as ExcelParser
from .ppt_parser import RAGFlowPptParser as PptParser

View File

@ -7,7 +7,7 @@ from rag.nlp import huqie
from io import BytesIO
class HuDocxParser:
class RAGFlowDocxParser:
def __extract_table_content(self, tb):
df = []

View File

@ -6,7 +6,7 @@ from io import BytesIO
from rag.nlp import find_codec
class HuExcelParser:
class RAGFlowExcelParser:
def html(self, fnm):
if isinstance(fnm, str):
wb = load_workbook(fnm)
@ -74,5 +74,5 @@ class HuExcelParser:
if __name__ == "__main__":
psr = HuExcelParser()
psr = RAGFlowExcelParser()
psr(sys.argv[1])

View File

@ -23,7 +23,7 @@ from huggingface_hub import snapshot_download
logging.getLogger("pdfminer").setLevel(logging.WARNING)
class HuParser:
class RAGFlowPdfParser:
def __init__(self):
self.ocr = OCR()
if hasattr(self, "model_speciess"):

View File

@ -14,7 +14,7 @@ from io import BytesIO
from pptx import Presentation
class HuPptParser(object):
class RAGFlowPptParser(object):
def __init__(self):
super().__init__()