Fix: docx parser output consistent (#12965)

### What problem does this PR solve? Fix: docx parser output consistent > File "/home/bxy/ragflow/rag/flow/parser/parser.py", line 506, in _word > sections, tbls = docx_parser(name, binary=blob) > ^^^^^^^^^^^^^^ > ValueError: too many values to unpack (expected 2) > ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
2026-02-04 01:25:07 +08:00 · 2026-02-03 15:36:58 +08:00
parent deeae8dba4
commit f11ca54e0e
3 changed files with 12 additions and 4 deletions
--- a/rag/nlp/init.py
+++ b/rag/nlp/init.py
@ -1168,6 +1168,8 @@ def naive_merge_with_images(texts, images, chunk_token_num=128, delimiter="\n。
        cks, result_images, tk_nums = [], [], []
        for text, image in zip(texts, images):
            text_str = text[0] if isinstance(text, tuple) else text
+            if text_str is None:
+                text_str = ""
            text_pos = text[1] if isinstance(text, tuple) and len(text) > 1 else ""
            split_sec = re.split(r"(%s)" % custom_pattern, text_str)
            for sub_sec in split_sec:
@ -1187,11 +1189,11 @@ def naive_merge_with_images(texts, images, chunk_token_num=128, delimiter="\n。
    for text, image in zip(texts, images):
        # if text is tuple, unpack it
        if isinstance(text, tuple):
-            text_str = text[0]
+            text_str = text[0] if text[0] is not None else ""
            text_pos = text[1] if len(text) > 1 else ""
            add_chunk("\n" + text_str, image, text_pos)
        else:
-            add_chunk("\n" + text, image)
+            add_chunk("\n" + (text or ""), image)

    return cks, result_images