Integrate TensorRT into GPU Python package (#9785)

* add use_tensorrt build option

* Add use_tensorrt to running tests

* add use_tensorrt for Windows

* make trt ep to skip backend test

* make trt ep to skip backend test

* Fix bug

* Add/Modify description

* modify for debug

* swtich pool to test

* modify to debug

* modify to debug

* add vobersity

* refine the code

* refine the code

* refine the code

* fix flake8 warning

* refine the code

* add pre_load check for trt as well as add cupti lib to cuda depedencies

* modify script to make trt build path the same as cuda

* show error message when user wants to run TensorRT but TensorRT is not installed in the env

* fix bug

* fix bug

* add trt lib for manylinux

* include cuda_dependencies for trt

* rewrite the condition to throw exception

* make code more compact
This commit is contained in:
Chi Lo 2021-11-18 13:26:51 -08:00 committed by GitHub
parent 76715ad525
commit 7242627fec
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 209 additions and 167 deletions

View file

@ -107,7 +107,11 @@ class OnnxRuntimeBackend(Backend):
for k, v in kwargs.items():
if hasattr(options, k):
setattr(options, k, v)
inf = InferenceSession(model, sess_options=options, providers=get_available_providers())
excluded_providers = os.getenv('ORT_ONNX_BACKEND_EXCLUDE_PROVIDERS', default="").split(',')
providers = [x for x in get_available_providers() if (x not in excluded_providers)]
inf = InferenceSession(model, sess_options=options, providers=providers)
# backend API is primarily used for ONNX test/validation. As such, we should disable session.run() fallback
# which may hide test failures.
inf.disable_fallback()

View file

@ -352,151 +352,159 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
->CreateProvider();
} else if (type == kTensorrtExecutionProvider) {
#ifdef USE_TENSORRT
std::string calibration_table, cache_path, lib_path;
auto it = provider_options_map.find(type);
if (it != provider_options_map.end()) {
OrtTensorRTProviderOptions params{
0,
0,
nullptr,
1000,
1,
1 << 30,
0,
0,
nullptr,
0,
0,
0,
0,
0,
nullptr,
0,
nullptr,
0};
for (auto option : it->second) {
if (option.first == "device_id") {
if (!option.second.empty()) {
params.device_id = std::stoi(option.second);
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'device_id' should be a number i.e. '0'.\n");
}
} else if (option.first == "trt_max_partition_iterations") {
if (!option.second.empty()) {
params.trt_max_partition_iterations = std::stoi(option.second);
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_partition_iterations' should be a positive integer number i.e. '1000'.\n");
}
} else if (option.first == "trt_min_subgraph_size") {
if (!option.second.empty()) {
params.trt_min_subgraph_size = std::stoi(option.second);
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_min_subgraph_size' should be a positive integer number i.e. '1'.\n");
}
} else if (option.first == "trt_max_workspace_size") {
if (!option.second.empty()) {
params.trt_max_workspace_size = std::stoull(option.second);
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_workspace_size' should be a number in byte i.e. '1073741824'.\n");
}
} else if (option.first == "trt_fp16_enable") {
if (option.second == "True" || option.second == "true") {
params.trt_fp16_enable = true;
} else if (option.second == "False" || option.second == "false") {
params.trt_fp16_enable = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_fp16_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
}
} else if (option.first == "trt_int8_enable") {
if (option.second == "True" || option.second == "true") {
params.trt_int8_enable = true;
} else if (option.second == "False" || option.second == "false") {
params.trt_int8_enable = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
}
} else if (option.first == "trt_int8_calibration_table_name") {
if (!option.second.empty()) {
calibration_table = option.second;
params.trt_int8_calibration_table_name = calibration_table.c_str();
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_calibration_table_name' should be a file name i.e. 'cal_table'.\n");
}
} else if (option.first == "trt_int8_use_native_calibration_table") {
if (option.second == "True" || option.second == "true") {
params.trt_int8_use_native_calibration_table = true;
} else if (option.second == "False" || option.second == "false") {
params.trt_int8_use_native_calibration_table = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_use_native_calibration_table' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
}
} else if (option.first == "trt_dla_enable") {
if (option.second == "True" || option.second == "true") {
params.trt_dla_enable = true;
} else if (option.second == "False" || option.second == "false") {
params.trt_dla_enable = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
}
} else if (option.first == "trt_dla_core") {
if (!option.second.empty()) {
params.trt_dla_core = std::stoi(option.second);
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_core' should be a positive integer number i.e. '0'.\n");
}
} else if (option.first == "trt_dump_subgraphs") {
if (option.second == "True" || option.second == "true") {
params.trt_dump_subgraphs = true;
} else if (option.second == "False" || option.second == "false") {
params.trt_dump_subgraphs = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dump_subgraphs' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
}
} else if (option.first == "trt_engine_cache_enable") {
if (option.second == "True" || option.second == "true") {
params.trt_engine_cache_enable = true;
} else if (option.second == "False" || option.second == "false") {
params.trt_engine_cache_enable = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
}
} else if (option.first == "trt_engine_cache_path") {
if (!option.second.empty()) {
cache_path = option.second;
params.trt_engine_cache_path = cache_path.c_str();
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_path' should be a path string i.e. 'engine_cache'.\n");
}
} else if (option.first == "trt_engine_decryption_enable") {
if (option.second == "True" || option.second == "true") {
params.trt_engine_decryption_enable = true;
} else if (option.second == "False" || option.second == "false") {
params.trt_engine_decryption_enable = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
}
} else if (option.first == "trt_engine_decryption_lib_path") {
if (!option.second.empty()) {
lib_path = option.second;
params.trt_engine_decryption_lib_path = lib_path.c_str();
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_lib_path' should be a path string i.e. 'decryption_lib'.\n");
}
} else if (option.first == "trt_force_sequential_engine_build") {
if (option.second == "True" || option.second == "true") {
params.trt_force_sequential_engine_build = true;
} else if (option.second == "False" || option.second == "false") {
params.trt_force_sequential_engine_build = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_force_sequential_engine_build' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
// If the environment variable 'ORT_TENSORRT_UNAVAILABLE' exists, then we do not load TensorRT. This is set by _ld_preload for the manylinux case
// as in that case, trying to load the library itself will result in a crash due to the way that auditwheel strips dependencies.
if (Env::Default().GetEnvironmentVar("ORT_TENSORRT_UNAVAILABLE").empty()) {
std::string calibration_table, cache_path, lib_path;
auto it = provider_options_map.find(type);
if (it != provider_options_map.end()) {
OrtTensorRTProviderOptions params{
0,
0,
nullptr,
1000,
1,
1 << 30,
0,
0,
nullptr,
0,
0,
0,
0,
0,
nullptr,
0,
nullptr,
0};
for (auto option : it->second) {
if (option.first == "device_id") {
if (!option.second.empty()) {
params.device_id = std::stoi(option.second);
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'device_id' should be a number i.e. '0'.\n");
}
} else if (option.first == "trt_max_partition_iterations") {
if (!option.second.empty()) {
params.trt_max_partition_iterations = std::stoi(option.second);
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_partition_iterations' should be a positive integer number i.e. '1000'.\n");
}
} else if (option.first == "trt_min_subgraph_size") {
if (!option.second.empty()) {
params.trt_min_subgraph_size = std::stoi(option.second);
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_min_subgraph_size' should be a positive integer number i.e. '1'.\n");
}
} else if (option.first == "trt_max_workspace_size") {
if (!option.second.empty()) {
params.trt_max_workspace_size = std::stoull(option.second);
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_workspace_size' should be a number in byte i.e. '1073741824'.\n");
}
} else if (option.first == "trt_fp16_enable") {
if (option.second == "True" || option.second == "true") {
params.trt_fp16_enable = true;
} else if (option.second == "False" || option.second == "false") {
params.trt_fp16_enable = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_fp16_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
}
} else if (option.first == "trt_int8_enable") {
if (option.second == "True" || option.second == "true") {
params.trt_int8_enable = true;
} else if (option.second == "False" || option.second == "false") {
params.trt_int8_enable = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
}
} else if (option.first == "trt_int8_calibration_table_name") {
if (!option.second.empty()) {
calibration_table = option.second;
params.trt_int8_calibration_table_name = calibration_table.c_str();
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_calibration_table_name' should be a file name i.e. 'cal_table'.\n");
}
} else if (option.first == "trt_int8_use_native_calibration_table") {
if (option.second == "True" || option.second == "true") {
params.trt_int8_use_native_calibration_table = true;
} else if (option.second == "False" || option.second == "false") {
params.trt_int8_use_native_calibration_table = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_use_native_calibration_table' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
}
} else if (option.first == "trt_dla_enable") {
if (option.second == "True" || option.second == "true") {
params.trt_dla_enable = true;
} else if (option.second == "False" || option.second == "false") {
params.trt_dla_enable = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
}
} else if (option.first == "trt_dla_core") {
if (!option.second.empty()) {
params.trt_dla_core = std::stoi(option.second);
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_core' should be a positive integer number i.e. '0'.\n");
}
} else if (option.first == "trt_dump_subgraphs") {
if (option.second == "True" || option.second == "true") {
params.trt_dump_subgraphs = true;
} else if (option.second == "False" || option.second == "false") {
params.trt_dump_subgraphs = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dump_subgraphs' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
}
} else if (option.first == "trt_engine_cache_enable") {
if (option.second == "True" || option.second == "true") {
params.trt_engine_cache_enable = true;
} else if (option.second == "False" || option.second == "false") {
params.trt_engine_cache_enable = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
}
} else if (option.first == "trt_engine_cache_path") {
if (!option.second.empty()) {
cache_path = option.second;
params.trt_engine_cache_path = cache_path.c_str();
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_path' should be a path string i.e. 'engine_cache'.\n");
}
} else if (option.first == "trt_engine_decryption_enable") {
if (option.second == "True" || option.second == "true") {
params.trt_engine_decryption_enable = true;
} else if (option.second == "False" || option.second == "false") {
params.trt_engine_decryption_enable = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
}
} else if (option.first == "trt_engine_decryption_lib_path") {
if (!option.second.empty()) {
lib_path = option.second;
params.trt_engine_decryption_lib_path = lib_path.c_str();
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_lib_path' should be a path string i.e. 'decryption_lib'.\n");
}
} else if (option.first == "trt_force_sequential_engine_build") {
if (option.second == "True" || option.second == "true") {
params.trt_force_sequential_engine_build = true;
} else if (option.second == "False" || option.second == "false") {
params.trt_force_sequential_engine_build = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_force_sequential_engine_build' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
}
} else {
ORT_THROW("Invalid TensorRT EP option: ", option.first);
}
}
return onnxruntime::CreateExecutionProviderFactory_Tensorrt(&params)->CreateProvider();
} else {
ORT_THROW("Invalid TensorRT EP option: ", option.first);
return onnxruntime::CreateExecutionProviderFactory_Tensorrt(cuda_device_id)->CreateProvider();
}
}
return onnxruntime::CreateExecutionProviderFactory_Tensorrt(&params)->CreateProvider();
} else {
return onnxruntime::CreateExecutionProviderFactory_Tensorrt(cuda_device_id)->CreateProvider();
if (!Env::Default().GetEnvironmentVar("CUDA_PATH").empty()) {
ORT_THROW("CUDA_PATH is set but CUDA wasn't able to be loaded. Please install the correct version of CUDA and cuDNN as mentioned in the GPU requirements page (https://onnxruntime.ai/docs/reference/execution-providers/CUDA-ExecutionProvider.html#requirements) as well as TensorRT as mentioned in the TensorRT requirements page (https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html#requirements), make sure they're in the PATH, and that your GPU is supported.");
}
}
#endif
} else if (type == kMIGraphXExecutionProvider) {

View file

@ -122,6 +122,9 @@ def create_backend_test(testname=None):
backend_test.exclude('(' + '|'.join(filters) + ')')
print('excluded tests:', filters)
# exclude TRT EP temporarily and only test CUDA EP to retain previous behavior
os.environ["ORT_ONNX_BACKEND_EXCLUDE_PROVIDERS"] = "TensorrtExecutionProvider"
# import all test cases at global scope to make
# them visible to python.unittest.
globals().update(backend_test.enable_report().test_cases)

View file

@ -52,9 +52,7 @@ cuda_version = None
rocm_version = None
is_rocm = False
# The following arguments are mutually exclusive
if parse_arg_remove_boolean(sys.argv, '--use_tensorrt'):
package_name = 'onnxruntime-gpu-tensorrt' if not nightly_build else 'ort-trt-nightly'
elif wheel_name_suffix == 'gpu':
if wheel_name_suffix == 'gpu':
# TODO: how to support multiple CUDA versions?
cuda_version = parse_arg_remove_string(sys.argv, '--cuda_version=')
elif parse_arg_remove_boolean(sys.argv, '--use_rocm'):
@ -135,6 +133,17 @@ try:
f.write(' import os\n')
f.write(' os.environ["ORT_CUDA_UNAVAILABLE"] = "1"\n')
def _rewrite_ld_preload_tensorrt(self, to_preload):
with open('onnxruntime/capi/_ld_preload.py', 'a') as f:
if len(to_preload) > 0:
f.write('from ctypes import CDLL, RTLD_GLOBAL\n')
f.write('try:\n')
for library in to_preload:
f.write(' _{} = CDLL("{}", mode=RTLD_GLOBAL)\n'.format(library.split('.')[0], library))
f.write('except OSError:\n')
f.write(' import os\n')
f.write(' os.environ["ORT_TENSORRT_UNAVAILABLE"] = "1"\n')
def run(self):
if is_manylinux:
source = 'onnxruntime/capi/onnxruntime_pybind11_state.so'
@ -147,6 +156,8 @@ try:
'libhsa-runtime64.so', 'libhsakmt.so']
to_preload = []
to_preload_cuda = []
to_preload_tensorrt = []
cuda_dependencies = []
args = ['patchelf', '--debug']
for line in result.stdout.split('\n'):
for dependency in dependencies:
@ -162,7 +173,7 @@ try:
result = subprocess.run(['patchelf', '--print-needed', dest],
check=True, stdout=subprocess.PIPE, universal_newlines=True)
cuda_dependencies = ['libcublas.so', 'libcublasLt.so', 'libcudnn.so', 'libcudart.so',
'libcurand.so', 'libcufft.so', 'libnvToolsExt.so']
'libcurand.so', 'libcufft.so', 'libnvToolsExt.so', 'libcupti.so']
rocm_dependencies = ['librccl.so', 'libamdhip64.so', 'librocblas.so', 'libMIOpen.so',
'libhsa-runtime64.so', 'libhsakmt.so']
args = ['patchelf', '--debug']
@ -176,8 +187,25 @@ try:
if len(args) > 3:
subprocess.run(args, check=True, stdout=subprocess.PIPE)
dest = 'onnxruntime/capi/libonnxruntime_providers_tensorrt.so'
if path.isfile(dest):
result = subprocess.run(['patchelf', '--print-needed', dest],
check=True, stdout=subprocess.PIPE, universal_newlines=True)
tensorrt_dependencies = ['libnvinfer.so', 'libnvinfer_plugin.so', 'libnvonnxparser.so']
args = ['patchelf', '--debug']
for line in result.stdout.split('\n'):
for dependency in (cuda_dependencies + tensorrt_dependencies):
if dependency in line:
if dependency not in (to_preload + to_preload_cuda):
to_preload_tensorrt.append(line)
args.extend(['--remove-needed', line])
args.append(dest)
if len(args) > 3:
subprocess.run(args, check=True, stdout=subprocess.PIPE)
self._rewrite_ld_preload(to_preload)
self._rewrite_ld_preload_cuda(to_preload_cuda)
self._rewrite_ld_preload_tensorrt(to_preload_tensorrt)
_bdist_wheel.run(self)
if is_manylinux and not disable_auditwheel_repair:
file = glob(path.join(self.dist_dir, '*linux*.whl'))[0]
@ -202,6 +230,7 @@ if platform.system() == 'Linux':
'mimalloc.so']
dl_libs = ['libonnxruntime_providers_shared.so']
dl_libs.append(providers_cuda_or_rocm)
dl_libs.append('libonnxruntime_providers_tensorrt.so')
# DNNL, TensorRT & OpenVINO EPs are built as shared libs
libs.extend(['libonnxruntime_providers_shared.so'])
libs.extend(['libonnxruntime_providers_dnnl.so'])

View file

@ -1715,9 +1715,7 @@ def build_python_wheel(
args.append("--disable_auditwheel_repair")
# The following arguments are mutually exclusive
if use_tensorrt:
args.append('--use_tensorrt')
elif use_cuda:
if use_cuda:
# The following line assumes no other EP is enabled
args.append('--wheel_name_suffix=gpu')
if cuda_version:

View file

@ -210,10 +210,10 @@ stages:
- template: get-docker-image-steps.yml
parameters:
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_cuda11
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_cuda11_4_tensorrt8_0
Context: tools/ci_build/github/linux/docker
DockerBuildArgs: "--network=host --build-arg POLICY=manylinux2014 --build-arg PLATFORM=x86_64 --build-arg BASEIMAGE=nvidia/cuda:11.4.0-cudnn8-devel-centos7 --build-arg DEVTOOLSET_ROOTPATH=/opt/rh/devtoolset-10/root --build-arg PREPEND_PATH=/opt/rh/devtoolset-10/root/usr/bin: --build-arg LD_LIBRARY_PATH_ARG=/opt/rh/devtoolset-10/root/usr/lib64:/opt/rh/devtoolset-10/root/usr/lib:/opt/rh/devtoolset-10/root/usr/lib64/dyninst:/opt/rh/devtoolset-10/root/usr/lib/dyninst:/usr/local/lib64 --build-arg BUILD_UID=$( id -u )"
Repository: onnxruntimecuda11build
DockerBuildArgs: "--network=host --build-arg POLICY=manylinux2014 --build-arg PLATFORM=x86_64 --build-arg DEVTOOLSET_ROOTPATH=/opt/rh/devtoolset-10/root --build-arg PREPEND_PATH=/opt/rh/devtoolset-10/root/usr/bin: --build-arg LD_LIBRARY_PATH_ARG=/opt/rh/devtoolset-10/root/usr/lib64:/opt/rh/devtoolset-10/root/usr/lib:/opt/rh/devtoolset-10/root/usr/lib64/dyninst:/opt/rh/devtoolset-10/root/usr/lib/dyninst:/usr/local/lib64 --build-arg BUILD_UID=$( id -u )"
Repository: onnxruntimecuda114xtrt80build
- task: CmdLine@2
displayName: 'Build Python Wheel'
@ -228,14 +228,14 @@ stages:
--volume $HOME/.onnx:/home/onnxruntimedev/.onnx \
-e NIGHTLY_BUILD \
-e BUILD_BUILDNUMBER \
onnxruntimecuda11build \
onnxruntimecuda114xtrt80build \
$(PythonManylinuxDir)/bin/python3 /onnxruntime_src/tools/ci_build/build.py \
--build_dir /build --cmake_generator Ninja \
--config Release --update --build \
--skip_submodule_sync \
--parallel \
--build_wheel \
--enable_onnx_tests --use_cuda --cuda_version=11.4 --cuda_home=/usr/local/cuda-11.4 --cudnn_home=/usr/local/cuda-11.4 \
--enable_onnx_tests --use_tensorrt --cuda_version=11.4 --tensorrt_home=/usr --cuda_home=/usr/local/cuda-11.4 --cudnn_home=/usr/local/cuda-11.4 \
${{ parameters.build_py_parameters }} \
--cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-10/root/usr/bin/cc 'CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80'
workingDirectory: $(Build.SourcesDirectory)
@ -266,7 +266,7 @@ stages:
--skip_submodule_sync \
--parallel \
--build_wheel \
--enable_onnx_tests --use_cuda --cuda_version=11.4 --cuda_home=/usr/local/cuda-11.4 --cudnn_home=/usr/local/cuda-11.4 \
--enable_onnx_tests --use_tensorrt --cuda_version=11.4 --tensorrt_home=/usr --cuda_home=/usr/local/cuda-11.4 --cudnn_home=/usr/local/cuda-11.4 \
${{ parameters.build_py_parameters }} --ctest_path '' \
--cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-10/root/usr/bin/cc 'CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80'
@ -810,25 +810,25 @@ stages:
buildArch: x64
strategy:
matrix:
Python36_cuda:
Python36_GPU:
PythonVersion: '3.6'
EpBuildFlags: --use_cuda --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
EnvSetupScript: setup_env_cuda_11.bat
EpBuildFlags: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.0.3.4.Windows10.x86_64.cuda-11.3.cudnn8.2" --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
EnvSetupScript: setup_env_gpu.bat
EP_NAME: gpu
Python37_cuda:
Python37_GPU:
PythonVersion: '3.7'
EpBuildFlags: --use_cuda --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
EnvSetupScript: setup_env_cuda_11.bat
EpBuildFlags: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.0.3.4.Windows10.x86_64.cuda-11.3.cudnn8.2" --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
EnvSetupScript: setup_env_gpu.bat
EP_NAME: gpu
Python38_cuda:
Python38_GPU:
PythonVersion: '3.8'
EpBuildFlags: --use_cuda --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
EnvSetupScript: setup_env_cuda_11.bat
EpBuildFlags: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.0.3.4.Windows10.x86_64.cuda-11.3.cudnn8.2" --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
EnvSetupScript: setup_env_gpu.bat
EP_NAME: gpu
Python39_cuda:
Python39_GPU:
PythonVersion: '3.9'
EpBuildFlags: --use_cuda --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
EnvSetupScript: setup_env_cuda_11.bat
EpBuildFlags: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.0.3.4.Windows10.x86_64.cuda-11.3.cudnn8.2" --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
EnvSetupScript: setup_env_gpu.bat
EP_NAME: gpu
Python36_dml:
PythonVersion: '3.6'