perf: optimze figure parser (#7392)

### What problem does this PR solve? When parsing documents containing images, the current code uses a single-threaded approach to call the VL model, resulting in extremely slow parsing speed (e.g., parsing a Word document with dozens of images takes over 20 minutes). By switching to a multithreaded approach to call the VL model, the parsing speed can be improved to an acceptable level. ### Type of change - [x] Performance Improvement --------- Co-authored-by: liuzhenghua-jk <liuzhenghua-jk@360shuke.com>
2026-01-30 07:06:39 +08:00 · 2025-05-06 14:39:45 +08:00
parent d6cc6453d1
commit 2f768b96e8
2 changed files with 43 additions and 30 deletions
--- a/deepdoc/parser/figure_parser.py
+++ b/deepdoc/parser/figure_parser.py
@ -13,7 +13,7 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
-
+from concurrent.futures import ThreadPoolExecutor, as_completed

 from PIL import Image

@ -28,6 +28,7 @@ def vision_figure_parser_figure_data_wraper(figures_data_without_positions):
    ) for figure_data in figures_data_without_positions if isinstance(figure_data[1], Image.Image)]


+shared_executor = ThreadPoolExecutor(max_workers=10)
 class VisionFigureParser:
    def __init__(self, vision_model, figures_data, *args, **kwargs):
        self.vision_model = vision_model
@ -73,16 +74,21 @@ class VisionFigureParser:
    def __call__(self, **kwargs):
        callback = kwargs.get("callback", lambda prog, msg: None)

-        for idx, img_binary in enumerate(self.figures or []):
-            figure_num = idx  # 0-based
-
-            txt = picture_vision_llm_chunk(
-                binary=img_binary,
+        def process(figure_idx, figure_binary):
+            description_text = picture_vision_llm_chunk(
+                binary=figure_binary,
                vision_model=self.vision_model,
                prompt=vision_llm_figure_describe_prompt(),
                callback=callback,
            )
+            return figure_idx, description_text

+        futures = []
+        for idx, img_binary in enumerate(self.figures or []):
+            futures.append(shared_executor.submit(process, idx, img_binary))
+
+        for future in as_completed(futures):
+            figure_num, txt = future.result()
            if txt:
                self.descriptions[figure_num] = txt + "\n".join(self.descriptions[figure_num])