mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 12:32:30 +08:00
Fix:Error retrieving DOCX image (docx.image.exceptions.UnrecognizedImageError) (#10794)
### What problem does this PR solve? https://github.com/infiniflow/ragflow/issues/10776 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --------- Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
This commit is contained in:
@ -80,12 +80,21 @@ class Docx(DocxParser):
|
||||
img = paragraph._element.xpath('.//pic:pic')
|
||||
if not img:
|
||||
return None
|
||||
img = img[0]
|
||||
embed = img.xpath('.//a:blip/@r:embed')[0]
|
||||
related_part = document.part.related_parts[embed]
|
||||
image = related_part.image
|
||||
image = Image.open(BytesIO(image.blob))
|
||||
return image
|
||||
try:
|
||||
img = img[0]
|
||||
embed = img.xpath('.//a:blip/@r:embed')[0]
|
||||
related_part = document.part.related_parts[embed]
|
||||
image = related_part.image
|
||||
if image is not None:
|
||||
image = Image.open(BytesIO(image.blob))
|
||||
return image
|
||||
elif related_part.blob is not None:
|
||||
image = Image.open(BytesIO(related_part.blob))
|
||||
return image
|
||||
else:
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def concat_img(self, img1, img2):
|
||||
if img1 and not img2:
|
||||
|
||||
Reference in New Issue
Block a user