Feat:Vision Model Image Enhancement in Manual/Paper/Book/One chunker (#10640)

### What problem does this PR solve? issue: [#7472](https://github.com/infiniflow/ragflow/issues/7472) change: Vision Model Image Enhancement in Manual chunker ### Type of change - [x] New Feature (non-breaking change which adds functionality)
2026-02-01 08:05:07 +08:00 · 2025-10-21 09:36:27 +08:00
parent aaa4776657
commit 6ab96287c9
6 changed files with 71 additions and 46 deletions
--- a/rag/app/paper.py
+++ b/rag/app/paper.py
@ -18,12 +18,12 @@ import logging
 import copy
 import re

+from deepdoc.parser.figure_parser import vision_figure_parser_pdf_wrapper
 from api.db import ParserType
 from rag.nlp import rag_tokenizer, tokenize, tokenize_table, add_positions, bullets_category, title_frequency, tokenize_chunks
 from deepdoc.parser import PdfParser, PlainParser
 import numpy as np

-
 class Pdf(PdfParser):
    def __init__(self):
        self.model_speciess = ParserType.PAPER.value
@ -160,6 +160,9 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
            pdf_parser = Pdf()
            paper = pdf_parser(filename if not binary else binary,
                               from_page=from_page, to_page=to_page, callback=callback)
+        tbls=paper["tables"]
+        tbls=vision_figure_parser_pdf_wrapper(tbls=tbls,callback=callback,**kwargs)
+        paper["tables"] = tbls
    else:
        raise NotImplementedError("file type not supported yet(pdf supported)")