Fix:Error retrieving DOCX image (docx.image.exceptions.UnrecognizedImageError) (#10794)

### What problem does this PR solve?

https://github.com/infiniflow/ragflow/issues/10776

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

---------

Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
This commit is contained in:
Stephen Hu
2025-10-27 13:23:16 +08:00
committed by GitHub
parent 7fbab750af
commit 56def59c2b

View File

@ -80,12 +80,21 @@ class Docx(DocxParser):
img = paragraph._element.xpath('.//pic:pic')
if not img:
return None
img = img[0]
embed = img.xpath('.//a:blip/@r:embed')[0]
related_part = document.part.related_parts[embed]
image = related_part.image
image = Image.open(BytesIO(image.blob))
return image
try:
img = img[0]
embed = img.xpath('.//a:blip/@r:embed')[0]
related_part = document.part.related_parts[embed]
image = related_part.image
if image is not None:
image = Image.open(BytesIO(image.blob))
return image
elif related_part.blob is not None:
image = Image.open(BytesIO(related_part.blob))
return image
else:
return None
except Exception:
return None
def concat_img(self, img1, img2):
if img1 and not img2: