mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 12:32:30 +08:00
Fix: cannot parse images (#11044)
### What problem does this PR solve? https://github.com/infiniflow/ragflow/issues/11043 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -114,6 +114,28 @@ class Base(ABC):
|
||||
|
||||
yield tk_count
|
||||
|
||||
@staticmethod
|
||||
def image2base64_rawvalue(self, image):
|
||||
# Return a base64 string without data URL header
|
||||
if isinstance(image, bytes):
|
||||
b64 = base64.b64encode(image).decode("utf-8")
|
||||
return b64
|
||||
if isinstance(image, BytesIO):
|
||||
data = image.getvalue()
|
||||
b64 = base64.b64encode(data).decode("utf-8")
|
||||
return b64
|
||||
with BytesIO() as buffered:
|
||||
try:
|
||||
image.save(buffered, format="JPEG")
|
||||
except Exception:
|
||||
# reset buffer before saving PNG
|
||||
buffered.seek(0)
|
||||
buffered.truncate()
|
||||
image.save(buffered, format="PNG")
|
||||
data = buffered.getvalue()
|
||||
b64 = base64.b64encode(data).decode("utf-8")
|
||||
return b64
|
||||
|
||||
@staticmethod
|
||||
def image2base64(image):
|
||||
# Return a data URL with the correct MIME to avoid provider mismatches
|
||||
@ -614,23 +636,38 @@ class GeminiCV(Base):
|
||||
if self.lang.lower() == "chinese"
|
||||
else "Please describe the content of this picture, like where, when, who, what happen. If it has number data, please extract them out."
|
||||
)
|
||||
b64 = self.image2base64(image)
|
||||
with BytesIO(base64.b64decode(b64)) as bio:
|
||||
with open(bio) as img:
|
||||
input = [prompt, img]
|
||||
res = self.model.generate_content(input)
|
||||
return res.text, total_token_count_from_response(res)
|
||||
|
||||
if image is bytes:
|
||||
with BytesIO(image) as bio:
|
||||
with open(bio) as img:
|
||||
input = [prompt, img]
|
||||
res = self.model.generate_content(input)
|
||||
return res.text, total_token_count_from_response(res)
|
||||
else:
|
||||
b64 = self.image2base64_rawvalue(image)
|
||||
with BytesIO(base64.b64decode(b64)) as bio:
|
||||
with open(bio) as img:
|
||||
input = [prompt, img]
|
||||
res = self.model.generate_content(input)
|
||||
return res.text, total_token_count_from_response(res)
|
||||
|
||||
def describe_with_prompt(self, image, prompt=None):
|
||||
from PIL.Image import open
|
||||
|
||||
b64 = self.image2base64(image)
|
||||
vision_prompt = prompt if prompt else vision_llm_describe_prompt()
|
||||
with BytesIO(base64.b64decode(b64)) as bio:
|
||||
with open(bio) as img:
|
||||
input = [vision_prompt, img]
|
||||
res = self.model.generate_content(input)
|
||||
return res.text, total_token_count_from_response(res)
|
||||
|
||||
if image is bytes:
|
||||
with BytesIO(image) as bio:
|
||||
with open(bio) as img:
|
||||
input = [vision_prompt, img]
|
||||
res = self.model.generate_content(input)
|
||||
return res.text, total_token_count_from_response(res)
|
||||
else:
|
||||
b64 = self.image2base64_rawvalue(image)
|
||||
with BytesIO(base64.b64decode(b64)) as bio:
|
||||
with open(bio) as img:
|
||||
input = [vision_prompt, img]
|
||||
res = self.model.generate_content(input)
|
||||
return res.text, total_token_count_from_response(res)
|
||||
|
||||
|
||||
def chat(self, system, history, gen_conf, images=None, video_bytes=None, filename="", **kwargs):
|
||||
|
||||
Reference in New Issue
Block a user