mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-02-04 17:45:07 +08:00
Fix: docx parser output consistent (#12965)
### What problem does this PR solve? Fix: docx parser output consistent > File "/home/bxy/ragflow/rag/flow/parser/parser.py", line 506, in _word > sections, tbls = docx_parser(name, binary=blob) > ^^^^^^^^^^^^^^ > ValueError: too many values to unpack (expected 2) > ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -503,7 +503,13 @@ class Parser(ProcessBase):
|
||||
docx_parser = Docx()
|
||||
|
||||
if conf.get("output_format") == "json":
|
||||
sections, tbls = docx_parser(name, binary=blob)
|
||||
main_sections = docx_parser(name, binary=blob)
|
||||
sections = []
|
||||
tbls = []
|
||||
for text, image, html in main_sections:
|
||||
sections.append((text, image))
|
||||
tbls.append(((None, html), ""))
|
||||
|
||||
sections = [{"text": section[0], "image": section[1]} for section in sections if section]
|
||||
sections.extend([{"text": tb, "image": None, "doc_type_kwd": "table"} for ((_, tb), _) in tbls])
|
||||
|
||||
|
||||
Reference in New Issue
Block a user