diff --git a/onnxruntime/python/backend/backend.py b/onnxruntime/python/backend/backend.py index 8736c9e275..fffbd51f0f 100644 --- a/onnxruntime/python/backend/backend.py +++ b/onnxruntime/python/backend/backend.py @@ -107,7 +107,11 @@ class OnnxRuntimeBackend(Backend): for k, v in kwargs.items(): if hasattr(options, k): setattr(options, k, v) - inf = InferenceSession(model, sess_options=options, providers=get_available_providers()) + + excluded_providers = os.getenv('ORT_ONNX_BACKEND_EXCLUDE_PROVIDERS', default="").split(',') + providers = [x for x in get_available_providers() if (x not in excluded_providers)] + + inf = InferenceSession(model, sess_options=options, providers=providers) # backend API is primarily used for ONNX test/validation. As such, we should disable session.run() fallback # which may hide test failures. inf.disable_fallback() diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index 65e86288a5..ea198fb45d 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -352,151 +352,159 @@ std::unique_ptr CreateExecutionProviderInstance( ->CreateProvider(); } else if (type == kTensorrtExecutionProvider) { #ifdef USE_TENSORRT - std::string calibration_table, cache_path, lib_path; - auto it = provider_options_map.find(type); - if (it != provider_options_map.end()) { - OrtTensorRTProviderOptions params{ - 0, - 0, - nullptr, - 1000, - 1, - 1 << 30, - 0, - 0, - nullptr, - 0, - 0, - 0, - 0, - 0, - nullptr, - 0, - nullptr, - 0}; - for (auto option : it->second) { - if (option.first == "device_id") { - if (!option.second.empty()) { - params.device_id = std::stoi(option.second); - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'device_id' should be a number i.e. '0'.\n"); - } - } else if (option.first == "trt_max_partition_iterations") { - if (!option.second.empty()) { - params.trt_max_partition_iterations = std::stoi(option.second); - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_partition_iterations' should be a positive integer number i.e. '1000'.\n"); - } - } else if (option.first == "trt_min_subgraph_size") { - if (!option.second.empty()) { - params.trt_min_subgraph_size = std::stoi(option.second); - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_min_subgraph_size' should be a positive integer number i.e. '1'.\n"); - } - } else if (option.first == "trt_max_workspace_size") { - if (!option.second.empty()) { - params.trt_max_workspace_size = std::stoull(option.second); - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_workspace_size' should be a number in byte i.e. '1073741824'.\n"); - } - } else if (option.first == "trt_fp16_enable") { - if (option.second == "True" || option.second == "true") { - params.trt_fp16_enable = true; - } else if (option.second == "False" || option.second == "false") { - params.trt_fp16_enable = false; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_fp16_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n"); - } - } else if (option.first == "trt_int8_enable") { - if (option.second == "True" || option.second == "true") { - params.trt_int8_enable = true; - } else if (option.second == "False" || option.second == "false") { - params.trt_int8_enable = false; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n"); - } - } else if (option.first == "trt_int8_calibration_table_name") { - if (!option.second.empty()) { - calibration_table = option.second; - params.trt_int8_calibration_table_name = calibration_table.c_str(); - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_calibration_table_name' should be a file name i.e. 'cal_table'.\n"); - } - } else if (option.first == "trt_int8_use_native_calibration_table") { - if (option.second == "True" || option.second == "true") { - params.trt_int8_use_native_calibration_table = true; - } else if (option.second == "False" || option.second == "false") { - params.trt_int8_use_native_calibration_table = false; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_use_native_calibration_table' should be a boolean i.e. 'True' or 'False'. Default value is False.\n"); - } - } else if (option.first == "trt_dla_enable") { - if (option.second == "True" || option.second == "true") { - params.trt_dla_enable = true; - } else if (option.second == "False" || option.second == "false") { - params.trt_dla_enable = false; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n"); - } - } else if (option.first == "trt_dla_core") { - if (!option.second.empty()) { - params.trt_dla_core = std::stoi(option.second); - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_core' should be a positive integer number i.e. '0'.\n"); - } - } else if (option.first == "trt_dump_subgraphs") { - if (option.second == "True" || option.second == "true") { - params.trt_dump_subgraphs = true; - } else if (option.second == "False" || option.second == "false") { - params.trt_dump_subgraphs = false; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dump_subgraphs' should be a boolean i.e. 'True' or 'False'. Default value is False.\n"); - } - } else if (option.first == "trt_engine_cache_enable") { - if (option.second == "True" || option.second == "true") { - params.trt_engine_cache_enable = true; - } else if (option.second == "False" || option.second == "false") { - params.trt_engine_cache_enable = false; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n"); - } - } else if (option.first == "trt_engine_cache_path") { - if (!option.second.empty()) { - cache_path = option.second; - params.trt_engine_cache_path = cache_path.c_str(); - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_path' should be a path string i.e. 'engine_cache'.\n"); - } - } else if (option.first == "trt_engine_decryption_enable") { - if (option.second == "True" || option.second == "true") { - params.trt_engine_decryption_enable = true; - } else if (option.second == "False" || option.second == "false") { - params.trt_engine_decryption_enable = false; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n"); - } - } else if (option.first == "trt_engine_decryption_lib_path") { - if (!option.second.empty()) { - lib_path = option.second; - params.trt_engine_decryption_lib_path = lib_path.c_str(); - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_lib_path' should be a path string i.e. 'decryption_lib'.\n"); - } - } else if (option.first == "trt_force_sequential_engine_build") { - if (option.second == "True" || option.second == "true") { - params.trt_force_sequential_engine_build = true; - } else if (option.second == "False" || option.second == "false") { - params.trt_force_sequential_engine_build = false; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_force_sequential_engine_build' should be a boolean i.e. 'True' or 'False'. Default value is False.\n"); + // If the environment variable 'ORT_TENSORRT_UNAVAILABLE' exists, then we do not load TensorRT. This is set by _ld_preload for the manylinux case + // as in that case, trying to load the library itself will result in a crash due to the way that auditwheel strips dependencies. + if (Env::Default().GetEnvironmentVar("ORT_TENSORRT_UNAVAILABLE").empty()) { + std::string calibration_table, cache_path, lib_path; + auto it = provider_options_map.find(type); + if (it != provider_options_map.end()) { + OrtTensorRTProviderOptions params{ + 0, + 0, + nullptr, + 1000, + 1, + 1 << 30, + 0, + 0, + nullptr, + 0, + 0, + 0, + 0, + 0, + nullptr, + 0, + nullptr, + 0}; + for (auto option : it->second) { + if (option.first == "device_id") { + if (!option.second.empty()) { + params.device_id = std::stoi(option.second); + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'device_id' should be a number i.e. '0'.\n"); + } + } else if (option.first == "trt_max_partition_iterations") { + if (!option.second.empty()) { + params.trt_max_partition_iterations = std::stoi(option.second); + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_partition_iterations' should be a positive integer number i.e. '1000'.\n"); + } + } else if (option.first == "trt_min_subgraph_size") { + if (!option.second.empty()) { + params.trt_min_subgraph_size = std::stoi(option.second); + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_min_subgraph_size' should be a positive integer number i.e. '1'.\n"); + } + } else if (option.first == "trt_max_workspace_size") { + if (!option.second.empty()) { + params.trt_max_workspace_size = std::stoull(option.second); + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_workspace_size' should be a number in byte i.e. '1073741824'.\n"); + } + } else if (option.first == "trt_fp16_enable") { + if (option.second == "True" || option.second == "true") { + params.trt_fp16_enable = true; + } else if (option.second == "False" || option.second == "false") { + params.trt_fp16_enable = false; + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_fp16_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n"); + } + } else if (option.first == "trt_int8_enable") { + if (option.second == "True" || option.second == "true") { + params.trt_int8_enable = true; + } else if (option.second == "False" || option.second == "false") { + params.trt_int8_enable = false; + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n"); + } + } else if (option.first == "trt_int8_calibration_table_name") { + if (!option.second.empty()) { + calibration_table = option.second; + params.trt_int8_calibration_table_name = calibration_table.c_str(); + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_calibration_table_name' should be a file name i.e. 'cal_table'.\n"); + } + } else if (option.first == "trt_int8_use_native_calibration_table") { + if (option.second == "True" || option.second == "true") { + params.trt_int8_use_native_calibration_table = true; + } else if (option.second == "False" || option.second == "false") { + params.trt_int8_use_native_calibration_table = false; + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_use_native_calibration_table' should be a boolean i.e. 'True' or 'False'. Default value is False.\n"); + } + } else if (option.first == "trt_dla_enable") { + if (option.second == "True" || option.second == "true") { + params.trt_dla_enable = true; + } else if (option.second == "False" || option.second == "false") { + params.trt_dla_enable = false; + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n"); + } + } else if (option.first == "trt_dla_core") { + if (!option.second.empty()) { + params.trt_dla_core = std::stoi(option.second); + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_core' should be a positive integer number i.e. '0'.\n"); + } + } else if (option.first == "trt_dump_subgraphs") { + if (option.second == "True" || option.second == "true") { + params.trt_dump_subgraphs = true; + } else if (option.second == "False" || option.second == "false") { + params.trt_dump_subgraphs = false; + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dump_subgraphs' should be a boolean i.e. 'True' or 'False'. Default value is False.\n"); + } + } else if (option.first == "trt_engine_cache_enable") { + if (option.second == "True" || option.second == "true") { + params.trt_engine_cache_enable = true; + } else if (option.second == "False" || option.second == "false") { + params.trt_engine_cache_enable = false; + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n"); + } + } else if (option.first == "trt_engine_cache_path") { + if (!option.second.empty()) { + cache_path = option.second; + params.trt_engine_cache_path = cache_path.c_str(); + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_path' should be a path string i.e. 'engine_cache'.\n"); + } + } else if (option.first == "trt_engine_decryption_enable") { + if (option.second == "True" || option.second == "true") { + params.trt_engine_decryption_enable = true; + } else if (option.second == "False" || option.second == "false") { + params.trt_engine_decryption_enable = false; + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n"); + } + } else if (option.first == "trt_engine_decryption_lib_path") { + if (!option.second.empty()) { + lib_path = option.second; + params.trt_engine_decryption_lib_path = lib_path.c_str(); + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_lib_path' should be a path string i.e. 'decryption_lib'.\n"); + } + } else if (option.first == "trt_force_sequential_engine_build") { + if (option.second == "True" || option.second == "true") { + params.trt_force_sequential_engine_build = true; + } else if (option.second == "False" || option.second == "false") { + params.trt_force_sequential_engine_build = false; + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_force_sequential_engine_build' should be a boolean i.e. 'True' or 'False'. Default value is False.\n"); + } + } else { + ORT_THROW("Invalid TensorRT EP option: ", option.first); + } } + return onnxruntime::CreateExecutionProviderFactory_Tensorrt(¶ms)->CreateProvider(); } else { - ORT_THROW("Invalid TensorRT EP option: ", option.first); + return onnxruntime::CreateExecutionProviderFactory_Tensorrt(cuda_device_id)->CreateProvider(); } - } - return onnxruntime::CreateExecutionProviderFactory_Tensorrt(¶ms)->CreateProvider(); } else { - return onnxruntime::CreateExecutionProviderFactory_Tensorrt(cuda_device_id)->CreateProvider(); + if (!Env::Default().GetEnvironmentVar("CUDA_PATH").empty()) { + ORT_THROW("CUDA_PATH is set but CUDA wasn't able to be loaded. Please install the correct version of CUDA and cuDNN as mentioned in the GPU requirements page (https://onnxruntime.ai/docs/reference/execution-providers/CUDA-ExecutionProvider.html#requirements) as well as TensorRT as mentioned in the TensorRT requirements page (https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html#requirements), make sure they're in the PATH, and that your GPU is supported."); + } } #endif } else if (type == kMIGraphXExecutionProvider) { diff --git a/onnxruntime/test/python/onnx_backend_test_series.py b/onnxruntime/test/python/onnx_backend_test_series.py index 28914dcf6d..9520b885b9 100644 --- a/onnxruntime/test/python/onnx_backend_test_series.py +++ b/onnxruntime/test/python/onnx_backend_test_series.py @@ -122,6 +122,9 @@ def create_backend_test(testname=None): backend_test.exclude('(' + '|'.join(filters) + ')') print('excluded tests:', filters) + # exclude TRT EP temporarily and only test CUDA EP to retain previous behavior + os.environ["ORT_ONNX_BACKEND_EXCLUDE_PROVIDERS"] = "TensorrtExecutionProvider" + # import all test cases at global scope to make # them visible to python.unittest. globals().update(backend_test.enable_report().test_cases) diff --git a/setup.py b/setup.py index 2d2e4047dd..b28f48fcbd 100644 --- a/setup.py +++ b/setup.py @@ -52,9 +52,7 @@ cuda_version = None rocm_version = None is_rocm = False # The following arguments are mutually exclusive -if parse_arg_remove_boolean(sys.argv, '--use_tensorrt'): - package_name = 'onnxruntime-gpu-tensorrt' if not nightly_build else 'ort-trt-nightly' -elif wheel_name_suffix == 'gpu': +if wheel_name_suffix == 'gpu': # TODO: how to support multiple CUDA versions? cuda_version = parse_arg_remove_string(sys.argv, '--cuda_version=') elif parse_arg_remove_boolean(sys.argv, '--use_rocm'): @@ -135,6 +133,17 @@ try: f.write(' import os\n') f.write(' os.environ["ORT_CUDA_UNAVAILABLE"] = "1"\n') + def _rewrite_ld_preload_tensorrt(self, to_preload): + with open('onnxruntime/capi/_ld_preload.py', 'a') as f: + if len(to_preload) > 0: + f.write('from ctypes import CDLL, RTLD_GLOBAL\n') + f.write('try:\n') + for library in to_preload: + f.write(' _{} = CDLL("{}", mode=RTLD_GLOBAL)\n'.format(library.split('.')[0], library)) + f.write('except OSError:\n') + f.write(' import os\n') + f.write(' os.environ["ORT_TENSORRT_UNAVAILABLE"] = "1"\n') + def run(self): if is_manylinux: source = 'onnxruntime/capi/onnxruntime_pybind11_state.so' @@ -147,6 +156,8 @@ try: 'libhsa-runtime64.so', 'libhsakmt.so'] to_preload = [] to_preload_cuda = [] + to_preload_tensorrt = [] + cuda_dependencies = [] args = ['patchelf', '--debug'] for line in result.stdout.split('\n'): for dependency in dependencies: @@ -162,7 +173,7 @@ try: result = subprocess.run(['patchelf', '--print-needed', dest], check=True, stdout=subprocess.PIPE, universal_newlines=True) cuda_dependencies = ['libcublas.so', 'libcublasLt.so', 'libcudnn.so', 'libcudart.so', - 'libcurand.so', 'libcufft.so', 'libnvToolsExt.so'] + 'libcurand.so', 'libcufft.so', 'libnvToolsExt.so', 'libcupti.so'] rocm_dependencies = ['librccl.so', 'libamdhip64.so', 'librocblas.so', 'libMIOpen.so', 'libhsa-runtime64.so', 'libhsakmt.so'] args = ['patchelf', '--debug'] @@ -176,8 +187,25 @@ try: if len(args) > 3: subprocess.run(args, check=True, stdout=subprocess.PIPE) + dest = 'onnxruntime/capi/libonnxruntime_providers_tensorrt.so' + if path.isfile(dest): + result = subprocess.run(['patchelf', '--print-needed', dest], + check=True, stdout=subprocess.PIPE, universal_newlines=True) + tensorrt_dependencies = ['libnvinfer.so', 'libnvinfer_plugin.so', 'libnvonnxparser.so'] + args = ['patchelf', '--debug'] + for line in result.stdout.split('\n'): + for dependency in (cuda_dependencies + tensorrt_dependencies): + if dependency in line: + if dependency not in (to_preload + to_preload_cuda): + to_preload_tensorrt.append(line) + args.extend(['--remove-needed', line]) + args.append(dest) + if len(args) > 3: + subprocess.run(args, check=True, stdout=subprocess.PIPE) + self._rewrite_ld_preload(to_preload) self._rewrite_ld_preload_cuda(to_preload_cuda) + self._rewrite_ld_preload_tensorrt(to_preload_tensorrt) _bdist_wheel.run(self) if is_manylinux and not disable_auditwheel_repair: file = glob(path.join(self.dist_dir, '*linux*.whl'))[0] @@ -202,6 +230,7 @@ if platform.system() == 'Linux': 'mimalloc.so'] dl_libs = ['libonnxruntime_providers_shared.so'] dl_libs.append(providers_cuda_or_rocm) + dl_libs.append('libonnxruntime_providers_tensorrt.so') # DNNL, TensorRT & OpenVINO EPs are built as shared libs libs.extend(['libonnxruntime_providers_shared.so']) libs.extend(['libonnxruntime_providers_dnnl.so']) diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 3e62c460b8..6797638b08 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -1715,9 +1715,7 @@ def build_python_wheel( args.append("--disable_auditwheel_repair") # The following arguments are mutually exclusive - if use_tensorrt: - args.append('--use_tensorrt') - elif use_cuda: + if use_cuda: # The following line assumes no other EP is enabled args.append('--wheel_name_suffix=gpu') if cuda_version: diff --git a/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml index 2c8680ea26..8f3fd62dfd 100644 --- a/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml @@ -210,10 +210,10 @@ stages: - template: get-docker-image-steps.yml parameters: - Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_cuda11 + Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_cuda11_4_tensorrt8_0 Context: tools/ci_build/github/linux/docker - DockerBuildArgs: "--network=host --build-arg POLICY=manylinux2014 --build-arg PLATFORM=x86_64 --build-arg BASEIMAGE=nvidia/cuda:11.4.0-cudnn8-devel-centos7 --build-arg DEVTOOLSET_ROOTPATH=/opt/rh/devtoolset-10/root --build-arg PREPEND_PATH=/opt/rh/devtoolset-10/root/usr/bin: --build-arg LD_LIBRARY_PATH_ARG=/opt/rh/devtoolset-10/root/usr/lib64:/opt/rh/devtoolset-10/root/usr/lib:/opt/rh/devtoolset-10/root/usr/lib64/dyninst:/opt/rh/devtoolset-10/root/usr/lib/dyninst:/usr/local/lib64 --build-arg BUILD_UID=$( id -u )" - Repository: onnxruntimecuda11build + DockerBuildArgs: "--network=host --build-arg POLICY=manylinux2014 --build-arg PLATFORM=x86_64 --build-arg DEVTOOLSET_ROOTPATH=/opt/rh/devtoolset-10/root --build-arg PREPEND_PATH=/opt/rh/devtoolset-10/root/usr/bin: --build-arg LD_LIBRARY_PATH_ARG=/opt/rh/devtoolset-10/root/usr/lib64:/opt/rh/devtoolset-10/root/usr/lib:/opt/rh/devtoolset-10/root/usr/lib64/dyninst:/opt/rh/devtoolset-10/root/usr/lib/dyninst:/usr/local/lib64 --build-arg BUILD_UID=$( id -u )" + Repository: onnxruntimecuda114xtrt80build - task: CmdLine@2 displayName: 'Build Python Wheel' @@ -228,14 +228,14 @@ stages: --volume $HOME/.onnx:/home/onnxruntimedev/.onnx \ -e NIGHTLY_BUILD \ -e BUILD_BUILDNUMBER \ - onnxruntimecuda11build \ + onnxruntimecuda114xtrt80build \ $(PythonManylinuxDir)/bin/python3 /onnxruntime_src/tools/ci_build/build.py \ --build_dir /build --cmake_generator Ninja \ --config Release --update --build \ --skip_submodule_sync \ --parallel \ --build_wheel \ - --enable_onnx_tests --use_cuda --cuda_version=11.4 --cuda_home=/usr/local/cuda-11.4 --cudnn_home=/usr/local/cuda-11.4 \ + --enable_onnx_tests --use_tensorrt --cuda_version=11.4 --tensorrt_home=/usr --cuda_home=/usr/local/cuda-11.4 --cudnn_home=/usr/local/cuda-11.4 \ ${{ parameters.build_py_parameters }} \ --cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-10/root/usr/bin/cc 'CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80' workingDirectory: $(Build.SourcesDirectory) @@ -266,7 +266,7 @@ stages: --skip_submodule_sync \ --parallel \ --build_wheel \ - --enable_onnx_tests --use_cuda --cuda_version=11.4 --cuda_home=/usr/local/cuda-11.4 --cudnn_home=/usr/local/cuda-11.4 \ + --enable_onnx_tests --use_tensorrt --cuda_version=11.4 --tensorrt_home=/usr --cuda_home=/usr/local/cuda-11.4 --cudnn_home=/usr/local/cuda-11.4 \ ${{ parameters.build_py_parameters }} --ctest_path '' \ --cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-10/root/usr/bin/cc 'CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80' @@ -810,25 +810,25 @@ stages: buildArch: x64 strategy: matrix: - Python36_cuda: + Python36_GPU: PythonVersion: '3.6' - EpBuildFlags: --use_cuda --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80" - EnvSetupScript: setup_env_cuda_11.bat + EpBuildFlags: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.0.3.4.Windows10.x86_64.cuda-11.3.cudnn8.2" --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80" + EnvSetupScript: setup_env_gpu.bat EP_NAME: gpu - Python37_cuda: + Python37_GPU: PythonVersion: '3.7' - EpBuildFlags: --use_cuda --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80" - EnvSetupScript: setup_env_cuda_11.bat + EpBuildFlags: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.0.3.4.Windows10.x86_64.cuda-11.3.cudnn8.2" --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80" + EnvSetupScript: setup_env_gpu.bat EP_NAME: gpu - Python38_cuda: + Python38_GPU: PythonVersion: '3.8' - EpBuildFlags: --use_cuda --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80" - EnvSetupScript: setup_env_cuda_11.bat + EpBuildFlags: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.0.3.4.Windows10.x86_64.cuda-11.3.cudnn8.2" --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80" + EnvSetupScript: setup_env_gpu.bat EP_NAME: gpu - Python39_cuda: + Python39_GPU: PythonVersion: '3.9' - EpBuildFlags: --use_cuda --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80" - EnvSetupScript: setup_env_cuda_11.bat + EpBuildFlags: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.0.3.4.Windows10.x86_64.cuda-11.3.cudnn8.2" --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80" + EnvSetupScript: setup_env_gpu.bat EP_NAME: gpu Python36_dml: PythonVersion: '3.6'