Refa: treat MinerU as an OCR model (#11849)

### What problem does this PR solve?

 Treat MinerU as an OCR model.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
- [x] Refactoring
This commit is contained in:
Yongteng Lei
2025-12-09 18:54:14 +08:00
committed by GitHub
parent 30377319d8
commit a94b3b9df2
9 changed files with 283 additions and 43 deletions

View File

@ -39,7 +39,6 @@ from sklearn.metrics import silhouette_score
from common.file_utils import get_project_base_directory
from common.misc_utils import pip_install_torch
from deepdoc.vision import OCR, AscendLayoutRecognizer, LayoutRecognizer, Recognizer, TableStructureRecognizer
from rag.app.picture import vision_llm_chunk as picture_vision_llm_chunk
from rag.nlp import rag_tokenizer
from rag.prompts.generator import vision_llm_describe_prompt
from common import settings
@ -1455,6 +1454,8 @@ class VisionParser(RAGFlowPdfParser):
if pdf_page_num < start_page or pdf_page_num >= end_page:
continue
from rag.app.picture import vision_llm_chunk as picture_vision_llm_chunk
text = picture_vision_llm_chunk(
binary=img_binary,
vision_model=self.vision_model,