From 2b7e2a5bd07a882a1a1f16e81025a74745ef0394 Mon Sep 17 00:00:00 2001 From: Tianlei Wu Date: Tue, 23 Jul 2024 11:58:04 -0700 Subject: [PATCH] [CUDA] Fix cuda provider fallback inconsistency (#21425) * Fix fallback setting (cuda still falls back to cuda). * Fix cuda provider fallback inconsistent with/without CUDA_PATH environment variable. * Add cuda and cudnn major version requirement in error message. Example result in Windows: ``` >>> import onnxruntime >>> ort_session = onnxruntime.InferenceSession("model.onnx", providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) 2024-07-19 17:43:44.2260019 [E:onnxruntime:Default, provider_bridge_ort.cc:1972 onnxruntime::TryGetProviderInfo_CUDA] D:\onnxruntime\onnxruntime\core\session\provider_bridge_ort.cc:1636 onnxruntime::ProviderLibrary::Get [ONNXRuntimeError] : 1 : FAIL : LoadLibrary failed with error 126 "" when trying to load "C:\Users\.conda\envs\py310\lib\site-packages\onnxruntime\capi\onnxruntime_providers_cuda.dll" 2024-07-19 17:43:44.2312351 [W:onnxruntime:Default, onnxruntime_pybind_state.cc:970 onnxruntime::python::CreateExecutionProviderInstance] Failed to create CUDAExecutionProvider. Require cuDNN 9.* and CUDA 12.*, and the latest MSVC runtime. Please install all dependencies as mentioned in the GPU requirements page (https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements), make sure they're in the PATH, and that your GPU is supported. >>> ort_session >>> ort_session.get_providers() ['CPUExecutionProvider'] ``` Example result in Linux: ``` >>> import onnxruntime >>> ort_session = onnxruntime.InferenceSession("resnet50-v2-7.onnx", providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) 2024-07-20 20:33:26.486974543 [E:onnxruntime:Default, provider_bridge_ort.cc:1972 TryGetProviderInfo_CUDA] /work/onnxruntime/onnxruntime/core/session/provider_bridge_ort.cc:1636 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory 2024-07-20 20:33:26.487034646 [W:onnxruntime:Default, onnxruntime_pybind_state.cc:961 CreateExecutionProviderInstance] Failed to create CUDAExecutionProvider. Require cuDNN 9.* and CUDA 12.*. Please install all dependencies as mentioned in the GPU requirements page (https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements), make sure they're in the PATH, and that your GPU is supported. >>> ort_session.get_providers() ['CPUExecutionProvider'] ``` ### Motivation and Context https://github.com/microsoft/onnxruntime/issues/21424 --- cmake/onnxruntime_python.cmake | 8 +++++-- .../onnxruntime_inference_collection.py | 16 +++++++++---- .../python/onnxruntime_pybind_state.cc | 24 ++++++++++--------- 3 files changed, 31 insertions(+), 17 deletions(-) diff --git a/cmake/onnxruntime_python.cmake b/cmake/onnxruntime_python.cmake index 07c65e7986..270139ceaf 100644 --- a/cmake/onnxruntime_python.cmake +++ b/cmake/onnxruntime_python.cmake @@ -97,8 +97,12 @@ endif() onnxruntime_add_include_to_target(onnxruntime_pybind11_state Python::Module Python::NumPy) target_include_directories(onnxruntime_pybind11_state PRIVATE ${ONNXRUNTIME_ROOT} ${pybind11_INCLUDE_DIRS}) -if(onnxruntime_USE_CUDA AND onnxruntime_CUDNN_HOME) - target_include_directories(onnxruntime_pybind11_state PRIVATE ${onnxruntime_CUDNN_HOME}/include) +if(onnxruntime_USE_CUDA) + target_include_directories(onnxruntime_pybind11_state PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) + # cudnn_home is optional for Window when cuda and cudnn are installed in the same directory. + if(onnxruntime_CUDNN_HOME) + target_include_directories(onnxruntime_pybind11_state PRIVATE ${onnxruntime_CUDNN_HOME}/include) + endif() endif() if(onnxruntime_USE_CANN) target_include_directories(onnxruntime_pybind11_state PRIVATE ${onnxruntime_CANN_HOME}/include) diff --git a/onnxruntime/python/onnxruntime_inference_collection.py b/onnxruntime/python/onnxruntime_inference_collection.py index ecae280e92..c3cfe2c97a 100644 --- a/onnxruntime/python/onnxruntime_inference_collection.py +++ b/onnxruntime/python/onnxruntime_inference_collection.py @@ -438,10 +438,18 @@ class InferenceSession(Session): # Tensorrt can fall back to CUDA if it's explicitly assigned. All others fall back to CPU. if "TensorrtExecutionProvider" in available_providers: - if providers and any( - provider == "CUDAExecutionProvider" - or (isinstance(provider, tuple) and provider[0] == "CUDAExecutionProvider") - for provider in providers + if ( + providers + and any( + provider == "CUDAExecutionProvider" + or (isinstance(provider, tuple) and provider[0] == "CUDAExecutionProvider") + for provider in providers + ) + and any( + provider == "TensorrtExecutionProvider" + or (isinstance(provider, tuple) and provider[0] == "TensorrtExecutionProvider") + for provider in providers + ) ): self._fallback_providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] else: diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index d7155b2b68..6b5daf8cb8 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -35,6 +35,11 @@ #include "contrib_ops/cpu/aten_ops/aten_op_executor.h" #endif +#ifdef USE_CUDA +#include // for CUDA_VERSION +#include // for CUDNN_MAJOR +#endif + #include // Explicitly provide a definition for the static const var 'GPU' in the OrtDevice struct, @@ -951,21 +956,18 @@ std::unique_ptr CreateExecutionProviderInstance( // external CUDA allocator. external_allocator_info = info.external_allocator_info; return cuda_provider_info->CreateExecutionProviderFactory(info)->CreateProvider(); - } else { - if (!Env::Default().GetEnvironmentVar("CUDA_PATH").empty()) { - ORT_THROW( - "CUDA_PATH is set but CUDA wasnt able to be loaded. Please install the correct version of CUDA and" - "cuDNN as mentioned in the GPU requirements page " - " (https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements), " - " make sure they're in the PATH, and that your GPU is supported."); - } } } LOGS_DEFAULT(WARNING) << "Failed to create " << type - << ". Please reference " - << "https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements" - << "to ensure all dependencies are met."; + << ". Require cuDNN " << CUDNN_MAJOR << ".* and " + << "CUDA " << (CUDA_VERSION / 1000) << ".*" +#if defined(_MSC_VER) + << ", and the latest MSVC runtime" +#endif + << ". Please install all dependencies as mentioned in the GPU requirements page" + " (https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements), " + "make sure they're in the PATH, and that your GPU is supported."; #endif } else if (type == kRocmExecutionProvider) { #ifdef USE_ROCM