mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 12:32:30 +08:00
Feat: add vision LLM PDF parser (#6173)
### What problem does this PR solve? Add vision LLM PDF parser ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
This commit is contained in:
@ -21,8 +21,9 @@ from PIL import Image
|
||||
|
||||
from api.db import LLMType
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from rag.nlp import tokenize
|
||||
from deepdoc.vision import OCR
|
||||
from rag.nlp import tokenize
|
||||
from rag.utils import clean_markdown_block
|
||||
|
||||
ocr = OCR()
|
||||
|
||||
@ -57,3 +58,32 @@ def chunk(filename, binary, tenant_id, lang, callback=None, **kwargs):
|
||||
callback(prog=-1, msg=str(e))
|
||||
|
||||
return []
|
||||
|
||||
|
||||
def vision_llm_chunk(binary, vision_model, prompt=None, callback=None):
|
||||
"""
|
||||
A simple wrapper to process image to markdown texts via VLM.
|
||||
|
||||
Returns:
|
||||
Simple markdown texts generated by VLM.
|
||||
"""
|
||||
callback = callback or (lambda prog, msg: None)
|
||||
|
||||
img = binary
|
||||
txt = ""
|
||||
|
||||
try:
|
||||
img_binary = io.BytesIO()
|
||||
img.save(img_binary, format='JPEG')
|
||||
img_binary.seek(0)
|
||||
|
||||
ans = clean_markdown_block(vision_model.describe_with_prompt(img_binary.read(), prompt))
|
||||
|
||||
txt += "\n" + ans
|
||||
|
||||
return txt
|
||||
|
||||
except Exception as e:
|
||||
callback(-1, str(e))
|
||||
|
||||
return []
|
||||
|
||||
Reference in New Issue
Block a user