diff --git a/deepdoc/vision/ocr.py b/deepdoc/vision/ocr.py index d91de2ab8..9b6e8fdf6 100644 --- a/deepdoc/vision/ocr.py +++ b/deepdoc/vision/ocr.py @@ -84,7 +84,8 @@ def load_model(model_dir, nm, device_id: int | None = None): def cuda_is_available(): try: import torch - if torch.cuda.is_available() and torch.cuda.device_count() > device_id: + target_id = 0 if device_id is None else device_id + if torch.cuda.is_available() and torch.cuda.device_count() > target_id: return True except Exception: return False @@ -100,10 +101,13 @@ def load_model(model_dir, nm, device_id: int | None = None): # Shrink GPU memory after execution run_options = ort.RunOptions() if cuda_is_available(): + gpu_mem_limit_mb = int(os.environ.get("OCR_GPU_MEM_LIMIT_MB", "2048")) + arena_strategy = os.environ.get("OCR_ARENA_EXTEND_STRATEGY", "kNextPowerOfTwo") + provider_device_id = 0 if device_id is None else device_id cuda_provider_options = { - "device_id": device_id, # Use specific GPU - "gpu_mem_limit": 512 * 1024 * 1024, # Limit gpu memory - "arena_extend_strategy": "kNextPowerOfTwo", # gpu memory allocation strategy + "device_id": provider_device_id, # Use specific GPU + "gpu_mem_limit": max(gpu_mem_limit_mb, 0) * 1024 * 1024, + "arena_extend_strategy": arena_strategy, # gpu memory allocation strategy } sess = ort.InferenceSession( model_file_path, @@ -111,8 +115,8 @@ def load_model(model_dir, nm, device_id: int | None = None): providers=['CUDAExecutionProvider'], provider_options=[cuda_provider_options] ) - run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "gpu:" + str(device_id)) - logging.info(f"load_model {model_file_path} uses GPU") + run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "gpu:" + str(provider_device_id)) + logging.info(f"load_model {model_file_path} uses GPU (device {provider_device_id}, gpu_mem_limit={cuda_provider_options['gpu_mem_limit']}, arena_strategy={arena_strategy})") else: sess = ort.InferenceSession( model_file_path,