From 56def59c2b8889122275b1cf65384e726d5b8c5c Mon Sep 17 00:00:00 2001 From: Stephen Hu <812791840@qq.com> Date: Mon, 27 Oct 2025 13:23:16 +0800 Subject: [PATCH] Fix:Error retrieving DOCX image (docx.image.exceptions.UnrecognizedImageError) (#10794) ### What problem does this PR solve? https://github.com/infiniflow/ragflow/issues/10776 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --------- Co-authored-by: Kevin Hu --- rag/app/manual.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/rag/app/manual.py b/rag/app/manual.py index 32a38ed39..a433a10e2 100644 --- a/rag/app/manual.py +++ b/rag/app/manual.py @@ -80,12 +80,21 @@ class Docx(DocxParser): img = paragraph._element.xpath('.//pic:pic') if not img: return None - img = img[0] - embed = img.xpath('.//a:blip/@r:embed')[0] - related_part = document.part.related_parts[embed] - image = related_part.image - image = Image.open(BytesIO(image.blob)) - return image + try: + img = img[0] + embed = img.xpath('.//a:blip/@r:embed')[0] + related_part = document.part.related_parts[embed] + image = related_part.image + if image is not None: + image = Image.open(BytesIO(image.blob)) + return image + elif related_part.blob is not None: + image = Image.open(BytesIO(related_part.blob)) + return image + else: + return None + except Exception: + return None def concat_img(self, img1, img2): if img1 and not img2: