mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Fix: fix pdf_parser ignored in rag/app/naive.py (#11065)
### What problem does this PR solve? Fix: fix pdf_parser ignored in rag/app/naive.py #11000 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -23,7 +23,7 @@ from rag.app import naive
|
||||
from rag.nlp import rag_tokenizer, tokenize
|
||||
from deepdoc.parser import PdfParser, ExcelParser, HtmlParser
|
||||
from deepdoc.parser.figure_parser import vision_figure_parser_docx_wrapper
|
||||
from rag.app.naive import plaintext_parser, PARSERS
|
||||
from rag.app.naive import by_plaintext, PARSERS
|
||||
|
||||
class Pdf(PdfParser):
|
||||
def __call__(self, filename, binary=None, from_page=0,
|
||||
@ -88,10 +88,10 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
layout_recognizer = "DeepDOC" if layout_recognizer else "Plain Text"
|
||||
|
||||
name = layout_recognizer.strip().lower()
|
||||
parser = PARSERS.get(name, plaintext_parser)
|
||||
parser = PARSERS.get(name, by_plaintext)
|
||||
callback(0.1, "Start to parse.")
|
||||
|
||||
sections, tbls, _ = parser(
|
||||
sections, tbls, pdf_parser = parser(
|
||||
filename = filename,
|
||||
binary = binary,
|
||||
from_page = from_page,
|
||||
|
||||
Reference in New Issue
Block a user