Fix:Error retrieving DOCX image (docx.image.exceptions.UnrecognizedImageError) (#10794)

### What problem does this PR solve?

https://github.com/infiniflow/ragflow/issues/10776

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

---------

Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
This commit is contained in:
Stephen Hu
2025-10-27 13:23:16 +08:00
committed by GitHub
parent 7fbab750af
commit 56def59c2b

View File

@ -80,12 +80,21 @@ class Docx(DocxParser):
img = paragraph._element.xpath('.//pic:pic') img = paragraph._element.xpath('.//pic:pic')
if not img: if not img:
return None return None
img = img[0] try:
embed = img.xpath('.//a:blip/@r:embed')[0] img = img[0]
related_part = document.part.related_parts[embed] embed = img.xpath('.//a:blip/@r:embed')[0]
image = related_part.image related_part = document.part.related_parts[embed]
image = Image.open(BytesIO(image.blob)) image = related_part.image
return image if image is not None:
image = Image.open(BytesIO(image.blob))
return image
elif related_part.blob is not None:
image = Image.open(BytesIO(related_part.blob))
return image
else:
return None
except Exception:
return None
def concat_img(self, img1, img2): def concat_img(self, img1, img2):
if img1 and not img2: if img1 and not img2: