Fix:wrong param in manual chunk (#10710)

### What problem does this PR solve?

change:
wrong param in manual chunk

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
buua436
2025-10-21 20:10:54 +08:00
committed by GitHub
parent 8d333f3590
commit 41fade3fe6
2 changed files with 8 additions and 8 deletions

View File

@ -41,13 +41,13 @@ def vision_figure_parser_docx_wrapper(sections,tbls,callback=None,**kwargs):
except Exception: except Exception:
vision_model = None vision_model = None
if vision_model: if vision_model:
figures_data = vision_figure_parser_figure_data_wrapper(sections) figures_data = vision_figure_parser_figure_data_wrapper(sections)
try: try:
docx_vision_parser = VisionFigureParser(vision_model=vision_model, figures_data=figures_data, **kwargs) docx_vision_parser = VisionFigureParser(vision_model=vision_model, figures_data=figures_data, **kwargs)
boosted_figures = docx_vision_parser(callback=callback) boosted_figures = docx_vision_parser(callback=callback)
tbls.extend(boosted_figures) tbls.extend(boosted_figures)
except Exception as e: except Exception as e:
callback(0.8, f"Visual model error: {e}. Skipping figure parsing enhancement.") callback(0.8, f"Visual model error: {e}. Skipping figure parsing enhancement.")
return tbls return tbls
def vision_figure_parser_pdf_wrapper(tbls,callback=None,**kwargs): def vision_figure_parser_pdf_wrapper(tbls,callback=None,**kwargs):

View File

@ -262,7 +262,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
docx_parser = Docx() docx_parser = Docx()
ti_list, tbls = docx_parser(filename, binary, ti_list, tbls = docx_parser(filename, binary,
from_page=0, to_page=10000, callback=callback) from_page=0, to_page=10000, callback=callback)
tbls=vision_figure_parser_docx_wrapper(sections=sections,tbls=tbls,callback=callback,**kwargs) tbls=vision_figure_parser_docx_wrapper(sections=ti_list,tbls=tbls,callback=callback,**kwargs)
res = tokenize_table(tbls, doc, eng) res = tokenize_table(tbls, doc, eng)
for text, image in ti_list: for text, image in ti_list:
d = copy.deepcopy(doc) d = copy.deepcopy(doc)