diff --git a/deepdoc/vision/ocr.py b/deepdoc/vision/ocr.py
index d91de2ab8..9b6e8fdf6 100644
--- a/deepdoc/vision/ocr.py
+++ b/deepdoc/vision/ocr.py
@@ -84,7 +84,8 @@ def load_model(model_dir, nm, device_id: int | None = None):
     def cuda_is_available():
         try:
             import torch
-            if torch.cuda.is_available() and torch.cuda.device_count() > device_id:
+            target_id = 0 if device_id is None else device_id
+            if torch.cuda.is_available() and torch.cuda.device_count() > target_id:
                 return True
         except Exception:
             return False
@@ -100,10 +101,13 @@ def load_model(model_dir, nm, device_id: int | None = None):
     # Shrink GPU memory after execution
     run_options = ort.RunOptions()
     if cuda_is_available():
+        gpu_mem_limit_mb = int(os.environ.get("OCR_GPU_MEM_LIMIT_MB", "2048"))
+        arena_strategy = os.environ.get("OCR_ARENA_EXTEND_STRATEGY", "kNextPowerOfTwo")
+        provider_device_id = 0 if device_id is None else device_id
         cuda_provider_options = {
-            "device_id": device_id, # Use specific GPU
-            "gpu_mem_limit": 512 * 1024 * 1024, # Limit gpu memory
-            "arena_extend_strategy": "kNextPowerOfTwo",  # gpu memory allocation strategy
+            "device_id": provider_device_id, # Use specific GPU
+            "gpu_mem_limit": max(gpu_mem_limit_mb, 0) * 1024 * 1024,
+            "arena_extend_strategy": arena_strategy,  # gpu memory allocation strategy
         }
         sess = ort.InferenceSession(
             model_file_path,
@@ -111,8 +115,8 @@ def load_model(model_dir, nm, device_id: int | None = None):
             providers=['CUDAExecutionProvider'],
             provider_options=[cuda_provider_options]
             )
-        run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "gpu:" + str(device_id))
-        logging.info(f"load_model {model_file_path} uses GPU")
+        run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "gpu:" + str(provider_device_id))
+        logging.info(f"load_model {model_file_path} uses GPU (device {provider_device_id}, gpu_mem_limit={cuda_provider_options['gpu_mem_limit']}, arena_strategy={arena_strategy})")
     else:
         sess = ort.InferenceSession(
             model_file_path,