deepdoc use GPU if possible (#4618)

### What problem does this PR solve? deepdoc use GPU if possible ### Type of change - [x] Refactoring
2026-01-31 07:36:46 +08:00 · 2025-01-24 09:48:02 +08:00
parent e14d6ae441
commit 4230402fbb
2 changed files with 43 additions and 10 deletions
--- a/deepdoc/vision/recognizer.py
+++ b/deepdoc/vision/recognizer.py
@ -60,12 +60,29 @@ class Recognizer(object):
        if not os.path.exists(model_file_path):
            raise ValueError("not find model file path {}".format(
                model_file_path))
-        if False and ort.get_device() == "GPU":
+        # https://github.com/microsoft/onnxruntime/issues/9509#issuecomment-951546580
+        # Shrink GPU memory after execution
+        self.run_options = ort.RunOptions()
+
+        if ort.get_device() == "GPU":
            options = ort.SessionOptions()
            options.enable_cpu_mem_arena = False
-            self.ort_sess = ort.InferenceSession(model_file_path, options=options, providers=[('CUDAExecutionProvider')])
+            cuda_provider_options = {
+                "device_id": 0, # Use specific GPU
+                "gpu_mem_limit": 512 * 1024 * 1024, # Limit gpu memory
+                "arena_extend_strategy": "kNextPowerOfTwo",  # gpu memory allocation strategy
+            }
+            self.ort_sess = ort.InferenceSession(
+                model_file_path, options=options,
+                providers=['CUDAExecutionProvider'],
+                provider_options=[cuda_provider_options]
+            )
+            self.run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "gpu:0")
+            logging.info(f"Recognizer {task_name} uses GPU")
        else:
            self.ort_sess = ort.InferenceSession(model_file_path, providers=['CPUExecutionProvider'])
+            self.run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "cpu")
+            logging.info(f"Recognizer {task_name} uses CPU")
        self.input_names = [node.name for node in self.ort_sess.get_inputs()]
        self.output_names = [node.name for node in self.ort_sess.get_outputs()]
        self.input_shape = self.ort_sess.get_inputs()[0].shape[2:4]
@ -454,7 +471,7 @@ class Recognizer(object):
            inputs = self.preprocess(batch_image_list)
            logging.debug("preprocess")
            for ins in inputs:
-                bb = self.postprocess(self.ort_sess.run(None, {k:v for k,v in ins.items() if k in self.input_names})[0], ins, thr)
+                bb = self.postprocess(self.ort_sess.run(None, {k:v for k,v in ins.items() if k in self.input_names}, self.run_options)[0], ins, thr)
                res.append(bb)

        #seeit.save_results(image_list, res, self.label_list, threshold=thr)