mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 12:32:30 +08:00
fix: preserve correct MIME & unify data URL handling for vision inputs (relates #9248) - Updated image2base64() to return a full data URL (data:image/<fmt>;base64,...) with accurate MIME - Removed hardcoded image/jpeg in Base._image_prompt(); pass through data URLs and default raw base64 to image/png - Set AnthropicCV._image_prompt() raw base64 media_type default to image/png - Ensures MIME type matches actual image content, fixing “cannot process base64 image” errors on vLLM/OpenAI-compatible backends ### What problem does this PR solve? This PR fixes a compatibility issue where base64-encoded images sent to vision models (e.g., vLLM/OpenAI-compatible backends) were rejected due to mismatched MIME type or incorrect decoding. Previously, the backend: - Always converted raw base64 into data:image/jpeg;base64,... even if the actual content was PNG. - In some cases, base64 decoding was attempted on the full data URL string instead of the pure base64 part. This caused errors like: ``` cannot process base64 image failed to decode base64 string: illegal base64 data at input byte 0 ``` by strict validators such as vLLM. With this fix, the MIME type in the request now matches the actual image content, and data URLs are correctly handled or passed through, ensuring vision models can decode and process images reliably. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -68,7 +68,7 @@ class Base(ABC):
|
||||
pmpt.append({
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/jpeg;base64,{img}" if img[:4] != "data" else img
|
||||
"url": img if isinstance(img, str) and img.startswith("data:") else f"data:image/png;base64,{img}"
|
||||
}
|
||||
})
|
||||
return pmpt
|
||||
@ -109,16 +109,33 @@ class Base(ABC):
|
||||
|
||||
@staticmethod
|
||||
def image2base64(image):
|
||||
# Return a data URL with the correct MIME to avoid provider mismatches
|
||||
if isinstance(image, bytes):
|
||||
return base64.b64encode(image).decode("utf-8")
|
||||
# Best-effort magic number sniffing
|
||||
mime = "image/png"
|
||||
if len(image) >= 2 and image[0] == 0xFF and image[1] == 0xD8:
|
||||
mime = "image/jpeg"
|
||||
b64 = base64.b64encode(image).decode("utf-8")
|
||||
return f"data:{mime};base64,{b64}"
|
||||
if isinstance(image, BytesIO):
|
||||
return base64.b64encode(image.getvalue()).decode("utf-8")
|
||||
data = image.getvalue()
|
||||
mime = "image/png"
|
||||
if len(data) >= 2 and data[0] == 0xFF and data[1] == 0xD8:
|
||||
mime = "image/jpeg"
|
||||
b64 = base64.b64encode(data).decode("utf-8")
|
||||
return f"data:{mime};base64,{b64}"
|
||||
buffered = BytesIO()
|
||||
fmt = "JPEG"
|
||||
try:
|
||||
image.save(buffered, format="JPEG")
|
||||
except Exception:
|
||||
buffered = BytesIO() # reset buffer before saving PNG
|
||||
image.save(buffered, format="PNG")
|
||||
return base64.b64encode(buffered.getvalue()).decode("utf-8")
|
||||
fmt = "PNG"
|
||||
data = buffered.getvalue()
|
||||
b64 = base64.b64encode(data).decode("utf-8")
|
||||
mime = f"image/{fmt.lower()}"
|
||||
return f"data:{mime};base64,{b64}"
|
||||
|
||||
def prompt(self, b64):
|
||||
return [
|
||||
@ -674,8 +691,8 @@ class AnthropicCV(Base):
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": "image/jpeg" if img[:4] != "data" else img.split(":")[1].split(";")[0],
|
||||
"data": img if img[:4] != "data" else img.split(",")[1]
|
||||
"media_type": (img.split(":")[1].split(";")[0] if isinstance(img, str) and img[:4] == "data" else "image/png"),
|
||||
"data": (img.split(",")[1] if isinstance(img, str) and img[:4] == "data" else img)
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user