Fix: cannot parse images (#11044)

### What problem does this PR solve?

https://github.com/infiniflow/ragflow/issues/11043

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Stephen Hu
2025-11-10 09:31:19 +08:00
committed by GitHub
parent 4cdaa77545
commit 660386d3b5

View File

@ -114,6 +114,28 @@ class Base(ABC):
yield tk_count
@staticmethod
def image2base64_rawvalue(self, image):
# Return a base64 string without data URL header
if isinstance(image, bytes):
b64 = base64.b64encode(image).decode("utf-8")
return b64
if isinstance(image, BytesIO):
data = image.getvalue()
b64 = base64.b64encode(data).decode("utf-8")
return b64
with BytesIO() as buffered:
try:
image.save(buffered, format="JPEG")
except Exception:
# reset buffer before saving PNG
buffered.seek(0)
buffered.truncate()
image.save(buffered, format="PNG")
data = buffered.getvalue()
b64 = base64.b64encode(data).decode("utf-8")
return b64
@staticmethod
def image2base64(image):
# Return a data URL with the correct MIME to avoid provider mismatches
@ -614,7 +636,15 @@ class GeminiCV(Base):
if self.lang.lower() == "chinese"
else "Please describe the content of this picture, like where, when, who, what happen. If it has number data, please extract them out."
)
b64 = self.image2base64(image)
if image is bytes:
with BytesIO(image) as bio:
with open(bio) as img:
input = [prompt, img]
res = self.model.generate_content(input)
return res.text, total_token_count_from_response(res)
else:
b64 = self.image2base64_rawvalue(image)
with BytesIO(base64.b64decode(b64)) as bio:
with open(bio) as img:
input = [prompt, img]
@ -623,9 +653,16 @@ class GeminiCV(Base):
def describe_with_prompt(self, image, prompt=None):
from PIL.Image import open
b64 = self.image2base64(image)
vision_prompt = prompt if prompt else vision_llm_describe_prompt()
if image is bytes:
with BytesIO(image) as bio:
with open(bio) as img:
input = [vision_prompt, img]
res = self.model.generate_content(input)
return res.text, total_token_count_from_response(res)
else:
b64 = self.image2base64_rawvalue(image)
with BytesIO(base64.b64decode(b64)) as bio:
with open(bio) as img:
input = [vision_prompt, img]