Fix: incorrect image merging for naive markdown parser (#11520)

### What problem does this PR solve?

Fix incorrect image merging for naive markdown parser. #9349 


[ragflow_readme.webm](https://github.com/user-attachments/assets/ca3f1e18-72b6-4a4c-80db-d03da9adf8dc)

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Yongteng Lei
2025-11-25 19:54:06 +08:00
committed by GitHub
parent 5d0981d046
commit 7c20c964b4
3 changed files with 231 additions and 76 deletions

View File

@ -482,17 +482,25 @@ class Parser(ProcessBase):
self.set_output("output_format", conf["output_format"])
markdown_parser = naive_markdown_parser()
sections, tables = markdown_parser(name, blob, separate_tables=False)
sections, tables, section_images = markdown_parser(
name,
blob,
separate_tables=False,
delimiter=conf.get("delimiter"),
return_section_images=True,
)
if conf.get("output_format") == "json":
json_results = []
for section_text, _ in sections:
for idx, (section_text, _) in enumerate(sections):
json_result = {
"text": section_text,
}
images = markdown_parser.get_pictures(section_text) if section_text else None
images = []
if section_images and len(section_images) > idx and section_images[idx] is not None:
images.append(section_images[idx])
if images:
# If multiple images found, combine them using concat_img
combined_image = reduce(concat_img, images) if len(images) > 1 else images[0]