Fix: cannot parse images (#11044)

### What problem does this PR solve?

https://github.com/infiniflow/ragflow/issues/11043

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Stephen Hu
2025-11-10 09:31:19 +08:00
committed by GitHub
parent 4cdaa77545
commit 660386d3b5

View File

@ -114,6 +114,28 @@ class Base(ABC):
yield tk_count yield tk_count
@staticmethod
def image2base64_rawvalue(self, image):
# Return a base64 string without data URL header
if isinstance(image, bytes):
b64 = base64.b64encode(image).decode("utf-8")
return b64
if isinstance(image, BytesIO):
data = image.getvalue()
b64 = base64.b64encode(data).decode("utf-8")
return b64
with BytesIO() as buffered:
try:
image.save(buffered, format="JPEG")
except Exception:
# reset buffer before saving PNG
buffered.seek(0)
buffered.truncate()
image.save(buffered, format="PNG")
data = buffered.getvalue()
b64 = base64.b64encode(data).decode("utf-8")
return b64
@staticmethod @staticmethod
def image2base64(image): def image2base64(image):
# Return a data URL with the correct MIME to avoid provider mismatches # Return a data URL with the correct MIME to avoid provider mismatches
@ -614,23 +636,38 @@ class GeminiCV(Base):
if self.lang.lower() == "chinese" if self.lang.lower() == "chinese"
else "Please describe the content of this picture, like where, when, who, what happen. If it has number data, please extract them out." else "Please describe the content of this picture, like where, when, who, what happen. If it has number data, please extract them out."
) )
b64 = self.image2base64(image)
with BytesIO(base64.b64decode(b64)) as bio: if image is bytes:
with open(bio) as img: with BytesIO(image) as bio:
input = [prompt, img] with open(bio) as img:
res = self.model.generate_content(input) input = [prompt, img]
return res.text, total_token_count_from_response(res) res = self.model.generate_content(input)
return res.text, total_token_count_from_response(res)
else:
b64 = self.image2base64_rawvalue(image)
with BytesIO(base64.b64decode(b64)) as bio:
with open(bio) as img:
input = [prompt, img]
res = self.model.generate_content(input)
return res.text, total_token_count_from_response(res)
def describe_with_prompt(self, image, prompt=None): def describe_with_prompt(self, image, prompt=None):
from PIL.Image import open from PIL.Image import open
b64 = self.image2base64(image)
vision_prompt = prompt if prompt else vision_llm_describe_prompt() vision_prompt = prompt if prompt else vision_llm_describe_prompt()
with BytesIO(base64.b64decode(b64)) as bio:
with open(bio) as img: if image is bytes:
input = [vision_prompt, img] with BytesIO(image) as bio:
res = self.model.generate_content(input) with open(bio) as img:
return res.text, total_token_count_from_response(res) input = [vision_prompt, img]
res = self.model.generate_content(input)
return res.text, total_token_count_from_response(res)
else:
b64 = self.image2base64_rawvalue(image)
with BytesIO(base64.b64decode(b64)) as bio:
with open(bio) as img:
input = [vision_prompt, img]
res = self.model.generate_content(input)
return res.text, total_token_count_from_response(res)
def chat(self, system, history, gen_conf, images=None, video_bytes=None, filename="", **kwargs): def chat(self, system, history, gen_conf, images=None, video_bytes=None, filename="", **kwargs):