mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-14 20:48:00 +00:00
[CUDA] Fix cuda provider fallback inconsistency (#21425)
* Fix fallback setting (cuda still falls back to cuda).
* Fix cuda provider fallback inconsistent with/without CUDA_PATH
environment variable.
* Add cuda and cudnn major version requirement in error message.
Example result in Windows:
```
>>> import onnxruntime
>>> ort_session = onnxruntime.InferenceSession("model.onnx", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
2024-07-19 17:43:44.2260019 [E:onnxruntime:Default, provider_bridge_ort.cc:1972 onnxruntime::TryGetProviderInfo_CUDA] D:\onnxruntime\onnxruntime\core\session\provider_bridge_ort.cc:1636 onnxruntime::ProviderLibrary::Get [ONNXRuntimeError] : 1 : FAIL : LoadLibrary failed with error 126 "" when trying to load "C:\Users\.conda\envs\py310\lib\site-packages\onnxruntime\capi\onnxruntime_providers_cuda.dll"
2024-07-19 17:43:44.2312351 [W:onnxruntime:Default, onnxruntime_pybind_state.cc:970 onnxruntime::python::CreateExecutionProviderInstance] Failed to create CUDAExecutionProvider. Require cuDNN 9.* and CUDA 12.*, and the latest MSVC runtime. Please install all dependencies as mentioned in the GPU requirements page (https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements), make sure they're in the PATH, and that your GPU is supported.
>>> ort_session
<onnxruntime.capi.onnxruntime_inference_collection.InferenceSession object at 0x0000016BB2DF7D60>
>>> ort_session.get_providers()
['CPUExecutionProvider']
```
Example result in Linux:
```
>>> import onnxruntime
>>> ort_session = onnxruntime.InferenceSession("resnet50-v2-7.onnx", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
2024-07-20 20:33:26.486974543 [E:onnxruntime:Default, provider_bridge_ort.cc:1972 TryGetProviderInfo_CUDA] /work/onnxruntime/onnxruntime/core/session/provider_bridge_ort.cc:1636 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory
2024-07-20 20:33:26.487034646 [W:onnxruntime:Default, onnxruntime_pybind_state.cc:961 CreateExecutionProviderInstance] Failed to create CUDAExecutionProvider. Require cuDNN 9.* and CUDA 12.*. Please install all dependencies as mentioned in the GPU requirements page (https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements), make sure they're in the PATH, and that your GPU is supported.
>>> ort_session.get_providers()
['CPUExecutionProvider']
```
### Motivation and Context
https://github.com/microsoft/onnxruntime/issues/21424
This commit is contained in:
parent
7af39c6955
commit
2b7e2a5bd0
3 changed files with 31 additions and 17 deletions
|
|
@ -97,8 +97,12 @@ endif()
|
|||
|
||||
onnxruntime_add_include_to_target(onnxruntime_pybind11_state Python::Module Python::NumPy)
|
||||
target_include_directories(onnxruntime_pybind11_state PRIVATE ${ONNXRUNTIME_ROOT} ${pybind11_INCLUDE_DIRS})
|
||||
if(onnxruntime_USE_CUDA AND onnxruntime_CUDNN_HOME)
|
||||
target_include_directories(onnxruntime_pybind11_state PRIVATE ${onnxruntime_CUDNN_HOME}/include)
|
||||
if(onnxruntime_USE_CUDA)
|
||||
target_include_directories(onnxruntime_pybind11_state PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
|
||||
# cudnn_home is optional for Window when cuda and cudnn are installed in the same directory.
|
||||
if(onnxruntime_CUDNN_HOME)
|
||||
target_include_directories(onnxruntime_pybind11_state PRIVATE ${onnxruntime_CUDNN_HOME}/include)
|
||||
endif()
|
||||
endif()
|
||||
if(onnxruntime_USE_CANN)
|
||||
target_include_directories(onnxruntime_pybind11_state PRIVATE ${onnxruntime_CANN_HOME}/include)
|
||||
|
|
|
|||
|
|
@ -438,10 +438,18 @@ class InferenceSession(Session):
|
|||
|
||||
# Tensorrt can fall back to CUDA if it's explicitly assigned. All others fall back to CPU.
|
||||
if "TensorrtExecutionProvider" in available_providers:
|
||||
if providers and any(
|
||||
provider == "CUDAExecutionProvider"
|
||||
or (isinstance(provider, tuple) and provider[0] == "CUDAExecutionProvider")
|
||||
for provider in providers
|
||||
if (
|
||||
providers
|
||||
and any(
|
||||
provider == "CUDAExecutionProvider"
|
||||
or (isinstance(provider, tuple) and provider[0] == "CUDAExecutionProvider")
|
||||
for provider in providers
|
||||
)
|
||||
and any(
|
||||
provider == "TensorrtExecutionProvider"
|
||||
or (isinstance(provider, tuple) and provider[0] == "TensorrtExecutionProvider")
|
||||
for provider in providers
|
||||
)
|
||||
):
|
||||
self._fallback_providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -35,6 +35,11 @@
|
|||
#include "contrib_ops/cpu/aten_ops/aten_op_executor.h"
|
||||
#endif
|
||||
|
||||
#ifdef USE_CUDA
|
||||
#include <cuda.h> // for CUDA_VERSION
|
||||
#include <cudnn.h> // for CUDNN_MAJOR
|
||||
#endif
|
||||
|
||||
#include <pybind11/functional.h>
|
||||
|
||||
// Explicitly provide a definition for the static const var 'GPU' in the OrtDevice struct,
|
||||
|
|
@ -951,21 +956,18 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
|
|||
// external CUDA allocator.
|
||||
external_allocator_info = info.external_allocator_info;
|
||||
return cuda_provider_info->CreateExecutionProviderFactory(info)->CreateProvider();
|
||||
} else {
|
||||
if (!Env::Default().GetEnvironmentVar("CUDA_PATH").empty()) {
|
||||
ORT_THROW(
|
||||
"CUDA_PATH is set but CUDA wasnt able to be loaded. Please install the correct version of CUDA and"
|
||||
"cuDNN as mentioned in the GPU requirements page "
|
||||
" (https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements), "
|
||||
" make sure they're in the PATH, and that your GPU is supported.");
|
||||
}
|
||||
}
|
||||
}
|
||||
LOGS_DEFAULT(WARNING) << "Failed to create "
|
||||
<< type
|
||||
<< ". Please reference "
|
||||
<< "https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements"
|
||||
<< "to ensure all dependencies are met.";
|
||||
<< ". Require cuDNN " << CUDNN_MAJOR << ".* and "
|
||||
<< "CUDA " << (CUDA_VERSION / 1000) << ".*"
|
||||
#if defined(_MSC_VER)
|
||||
<< ", and the latest MSVC runtime"
|
||||
#endif
|
||||
<< ". Please install all dependencies as mentioned in the GPU requirements page"
|
||||
" (https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements), "
|
||||
"make sure they're in the PATH, and that your GPU is supported.";
|
||||
#endif
|
||||
} else if (type == kRocmExecutionProvider) {
|
||||
#ifdef USE_ROCM
|
||||
|
|
|
|||
Loading…
Reference in a new issue