Integrate TensorRT into GPU Python package (#9785)

* add use_tensorrt build option * Add use_tensorrt to running tests * add use_tensorrt for Windows * make trt ep to skip backend test * make trt ep to skip backend test * Fix bug * Add/Modify description * modify for debug * swtich pool to test * modify to debug * modify to debug * add vobersity * refine the code * refine the code * refine the code * fix flake8 warning * refine the code * add pre_load check for trt as well as add cupti lib to cuda depedencies * modify script to make trt build path the same as cuda * show error message when user wants to run TensorRT but TensorRT is not installed in the env * fix bug * fix bug * add trt lib for manylinux * include cuda_dependencies for trt * rewrite the condition to throw exception * make code more compact
2026-05-14 20:48:00 +00:00 · 2021-11-18 13:26:51 -08:00 · 2021-11-18 13:26:51 -08:00 · 7242627fec
commit 7242627fec
parent 76715ad525
6 changed files with 209 additions and 167 deletions
--- a/onnxruntime/python/backend/backend.py
+++ b/onnxruntime/python/backend/backend.py
@ -107,7 +107,11 @@ class OnnxRuntimeBackend(Backend):
            for k, v in kwargs.items():
                if hasattr(options, k):
                    setattr(options, k, v)
-            inf = InferenceSession(model, sess_options=options, providers=get_available_providers())
+
+            excluded_providers = os.getenv('ORT_ONNX_BACKEND_EXCLUDE_PROVIDERS', default="").split(',')
+            providers = [x for x in get_available_providers() if (x not in excluded_providers)]
+
+            inf = InferenceSession(model, sess_options=options, providers=providers)
            # backend API is primarily used for ONNX test/validation. As such, we should disable session.run() fallback
            # which may hide test failures.
            inf.disable_fallback()
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@ -352,151 +352,159 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
        ->CreateProvider();
  } else if (type == kTensorrtExecutionProvider) {
 #ifdef USE_TENSORRT
-    std::string calibration_table, cache_path, lib_path;
-    auto it = provider_options_map.find(type);
-    if (it != provider_options_map.end()) {
-      OrtTensorRTProviderOptions params{
-          0,
-          0,
-          nullptr,
-          1000,
-          1,
-          1 << 30,
-          0,
-          0,
-          nullptr,
-          0,
-          0,
-          0,
-          0,
-          0,
-          nullptr,
-          0,
-          nullptr,
-          0};
-      for (auto option : it->second) {
-        if (option.first == "device_id") {
-          if (!option.second.empty()) {
-            params.device_id = std::stoi(option.second);
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'device_id' should be a number i.e. '0'.\n");
-          }
-        } else if (option.first == "trt_max_partition_iterations") {
-          if (!option.second.empty()) {
-            params.trt_max_partition_iterations = std::stoi(option.second);
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_partition_iterations' should be a positive integer number i.e. '1000'.\n");
-          }
-        } else if (option.first == "trt_min_subgraph_size") {
-          if (!option.second.empty()) {
-            params.trt_min_subgraph_size = std::stoi(option.second);
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_min_subgraph_size' should be a positive integer number i.e. '1'.\n");
-          }
-        } else if (option.first == "trt_max_workspace_size") {
-          if (!option.second.empty()) {
-            params.trt_max_workspace_size = std::stoull(option.second);
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_workspace_size' should be a number in byte i.e. '1073741824'.\n");
-          }
-        } else if (option.first == "trt_fp16_enable") {
-          if (option.second == "True" || option.second == "true") {
-            params.trt_fp16_enable = true;
-          } else if (option.second == "False" || option.second == "false") {
-            params.trt_fp16_enable = false;
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_fp16_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
-          }
-        } else if (option.first == "trt_int8_enable") {
-          if (option.second == "True" || option.second == "true") {
-            params.trt_int8_enable = true;
-          } else if (option.second == "False" || option.second == "false") {
-            params.trt_int8_enable = false;
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
-          }
-        } else if (option.first == "trt_int8_calibration_table_name") {
-          if (!option.second.empty()) {
-            calibration_table = option.second;
-            params.trt_int8_calibration_table_name = calibration_table.c_str();
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_calibration_table_name' should be a file name i.e. 'cal_table'.\n");
-          }
-        } else if (option.first == "trt_int8_use_native_calibration_table") {
-          if (option.second == "True" || option.second == "true") {
-            params.trt_int8_use_native_calibration_table = true;
-          } else if (option.second == "False" || option.second == "false") {
-            params.trt_int8_use_native_calibration_table = false;
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_use_native_calibration_table' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
-          }
-        } else if (option.first == "trt_dla_enable") {
-          if (option.second == "True" || option.second == "true") {
-            params.trt_dla_enable = true;
-          } else if (option.second == "False" || option.second == "false") {
-            params.trt_dla_enable = false;
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
-          }
-        } else if (option.first == "trt_dla_core") {
-          if (!option.second.empty()) {
-            params.trt_dla_core = std::stoi(option.second);
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_core' should be a positive integer number i.e. '0'.\n");
-          }
-        } else if (option.first == "trt_dump_subgraphs") {
-          if (option.second == "True" || option.second == "true") {
-            params.trt_dump_subgraphs = true;
-          } else if (option.second == "False" || option.second == "false") {
-            params.trt_dump_subgraphs = false;
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dump_subgraphs' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
-          }
-        } else if (option.first == "trt_engine_cache_enable") {
-          if (option.second == "True" || option.second == "true") {
-            params.trt_engine_cache_enable = true;
-          } else if (option.second == "False" || option.second == "false") {
-            params.trt_engine_cache_enable = false;
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
-          }
-        } else if (option.first == "trt_engine_cache_path") {
-          if (!option.second.empty()) {
-            cache_path = option.second;
-            params.trt_engine_cache_path = cache_path.c_str();
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_path' should be a path string i.e. 'engine_cache'.\n");
-          }
-        } else if (option.first == "trt_engine_decryption_enable") {
-          if (option.second == "True" || option.second == "true") {
-            params.trt_engine_decryption_enable = true;
-          } else if (option.second == "False" || option.second == "false") {
-            params.trt_engine_decryption_enable = false;
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
-          }
-        } else if (option.first == "trt_engine_decryption_lib_path") {
-          if (!option.second.empty()) {
-            lib_path = option.second;
-            params.trt_engine_decryption_lib_path = lib_path.c_str();
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_lib_path' should be a path string i.e. 'decryption_lib'.\n");
-          }
-        } else if (option.first == "trt_force_sequential_engine_build") {
-          if (option.second == "True" || option.second == "true") {
-            params.trt_force_sequential_engine_build = true;
-          } else if (option.second == "False" || option.second == "false") {
-            params.trt_force_sequential_engine_build = false;
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_force_sequential_engine_build' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
+    // If the environment variable 'ORT_TENSORRT_UNAVAILABLE' exists, then we do not load TensorRT. This is set by _ld_preload for the manylinux case
+    // as in that case, trying to load the library itself will result in a crash due to the way that auditwheel strips dependencies.
+    if (Env::Default().GetEnvironmentVar("ORT_TENSORRT_UNAVAILABLE").empty()) {
+        std::string calibration_table, cache_path, lib_path;
+        auto it = provider_options_map.find(type);
+        if (it != provider_options_map.end()) {
+          OrtTensorRTProviderOptions params{
+              0,
+              0,
+              nullptr,
+              1000,
+              1,
+              1 << 30,
+              0,
+              0,
+              nullptr,
+              0,
+              0,
+              0,
+              0,
+              0,
+              nullptr,
+              0,
+              nullptr,
+              0};
+          for (auto option : it->second) {
+            if (option.first == "device_id") {
+              if (!option.second.empty()) {
+                params.device_id = std::stoi(option.second);
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'device_id' should be a number i.e. '0'.\n");
+              }
+            } else if (option.first == "trt_max_partition_iterations") {
+              if (!option.second.empty()) {
+                params.trt_max_partition_iterations = std::stoi(option.second);
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_partition_iterations' should be a positive integer number i.e. '1000'.\n");
+              }
+            } else if (option.first == "trt_min_subgraph_size") {
+              if (!option.second.empty()) {
+                params.trt_min_subgraph_size = std::stoi(option.second);
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_min_subgraph_size' should be a positive integer number i.e. '1'.\n");
+              }
+            } else if (option.first == "trt_max_workspace_size") {
+              if (!option.second.empty()) {
+                params.trt_max_workspace_size = std::stoull(option.second);
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_workspace_size' should be a number in byte i.e. '1073741824'.\n");
+              }
+            } else if (option.first == "trt_fp16_enable") {
+              if (option.second == "True" || option.second == "true") {
+                params.trt_fp16_enable = true;
+              } else if (option.second == "False" || option.second == "false") {
+                params.trt_fp16_enable = false;
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_fp16_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
+              }
+            } else if (option.first == "trt_int8_enable") {
+              if (option.second == "True" || option.second == "true") {
+                params.trt_int8_enable = true;
+              } else if (option.second == "False" || option.second == "false") {
+                params.trt_int8_enable = false;
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
+              }
+            } else if (option.first == "trt_int8_calibration_table_name") {
+              if (!option.second.empty()) {
+                calibration_table = option.second;
+                params.trt_int8_calibration_table_name = calibration_table.c_str();
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_calibration_table_name' should be a file name i.e. 'cal_table'.\n");
+              }
+            } else if (option.first == "trt_int8_use_native_calibration_table") {
+              if (option.second == "True" || option.second == "true") {
+                params.trt_int8_use_native_calibration_table = true;
+              } else if (option.second == "False" || option.second == "false") {
+                params.trt_int8_use_native_calibration_table = false;
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_use_native_calibration_table' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
+              }
+            } else if (option.first == "trt_dla_enable") {
+              if (option.second == "True" || option.second == "true") {
+                params.trt_dla_enable = true;
+              } else if (option.second == "False" || option.second == "false") {
+                params.trt_dla_enable = false;
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
+              }
+            } else if (option.first == "trt_dla_core") {
+              if (!option.second.empty()) {
+                params.trt_dla_core = std::stoi(option.second);
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_core' should be a positive integer number i.e. '0'.\n");
+              }
+            } else if (option.first == "trt_dump_subgraphs") {
+              if (option.second == "True" || option.second == "true") {
+                params.trt_dump_subgraphs = true;
+              } else if (option.second == "False" || option.second == "false") {
+                params.trt_dump_subgraphs = false;
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dump_subgraphs' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
+              }
+            } else if (option.first == "trt_engine_cache_enable") {
+              if (option.second == "True" || option.second == "true") {
+                params.trt_engine_cache_enable = true;
+              } else if (option.second == "False" || option.second == "false") {
+                params.trt_engine_cache_enable = false;
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
+              }
+            } else if (option.first == "trt_engine_cache_path") {
+              if (!option.second.empty()) {
+                cache_path = option.second;
+                params.trt_engine_cache_path = cache_path.c_str();
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_path' should be a path string i.e. 'engine_cache'.\n");
+              }
+            } else if (option.first == "trt_engine_decryption_enable") {
+              if (option.second == "True" || option.second == "true") {
+                params.trt_engine_decryption_enable = true;
+              } else if (option.second == "False" || option.second == "false") {
+                params.trt_engine_decryption_enable = false;
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
+              }
+            } else if (option.first == "trt_engine_decryption_lib_path") {
+              if (!option.second.empty()) {
+                lib_path = option.second;
+                params.trt_engine_decryption_lib_path = lib_path.c_str();
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_lib_path' should be a path string i.e. 'decryption_lib'.\n");
+              }
+            } else if (option.first == "trt_force_sequential_engine_build") {
+              if (option.second == "True" || option.second == "true") {
+                params.trt_force_sequential_engine_build = true;
+              } else if (option.second == "False" || option.second == "false") {
+                params.trt_force_sequential_engine_build = false;
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_force_sequential_engine_build' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
+              }
+            } else {
+              ORT_THROW("Invalid TensorRT EP option: ", option.first);
+            }
          }
+          return onnxruntime::CreateExecutionProviderFactory_Tensorrt(&params)->CreateProvider();
        } else {
-          ORT_THROW("Invalid TensorRT EP option: ", option.first);
+          return onnxruntime::CreateExecutionProviderFactory_Tensorrt(cuda_device_id)->CreateProvider();
        }
-      }
-      return onnxruntime::CreateExecutionProviderFactory_Tensorrt(&params)->CreateProvider();
    } else {
-      return onnxruntime::CreateExecutionProviderFactory_Tensorrt(cuda_device_id)->CreateProvider();
+      if (!Env::Default().GetEnvironmentVar("CUDA_PATH").empty()) {
+        ORT_THROW("CUDA_PATH is set but CUDA wasn't able to be loaded. Please install the correct version of CUDA and cuDNN as mentioned in the GPU requirements page (https://onnxruntime.ai/docs/reference/execution-providers/CUDA-ExecutionProvider.html#requirements) as well as TensorRT as mentioned in the TensorRT requirements page (https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html#requirements), make sure they're in the PATH, and that your GPU is supported.");
+      }
    }
 #endif
  } else if (type == kMIGraphXExecutionProvider) {
--- a/onnxruntime/test/python/onnx_backend_test_series.py
+++ b/onnxruntime/test/python/onnx_backend_test_series.py
@ -122,6 +122,9 @@ def create_backend_test(testname=None):
        backend_test.exclude('(' + '|'.join(filters) + ')')
        print('excluded tests:', filters)

+        # exclude TRT EP temporarily and only test CUDA EP to retain previous behavior 
+        os.environ["ORT_ONNX_BACKEND_EXCLUDE_PROVIDERS"] = "TensorrtExecutionProvider"
+
    # import all test cases at global scope to make
    # them visible to python.unittest.
    globals().update(backend_test.enable_report().test_cases)
--- a/setup.py
+++ b/setup.py
@ -52,9 +52,7 @@ cuda_version = None
 rocm_version = None
 is_rocm = False
 # The following arguments are mutually exclusive
-if parse_arg_remove_boolean(sys.argv, '--use_tensorrt'):
-    package_name = 'onnxruntime-gpu-tensorrt' if not nightly_build else 'ort-trt-nightly'
-elif wheel_name_suffix == 'gpu':
+if wheel_name_suffix == 'gpu':
    # TODO: how to support multiple CUDA versions?
    cuda_version = parse_arg_remove_string(sys.argv, '--cuda_version=')
 elif parse_arg_remove_boolean(sys.argv, '--use_rocm'):
@ -135,6 +133,17 @@ try:
                    f.write('    import os\n')
                    f.write('    os.environ["ORT_CUDA_UNAVAILABLE"] = "1"\n')

+        def _rewrite_ld_preload_tensorrt(self, to_preload):
+            with open('onnxruntime/capi/_ld_preload.py', 'a') as f:
+                if len(to_preload) > 0:
+                    f.write('from ctypes import CDLL, RTLD_GLOBAL\n')
+                    f.write('try:\n')
+                    for library in to_preload:
+                        f.write('    _{} = CDLL("{}", mode=RTLD_GLOBAL)\n'.format(library.split('.')[0], library))
+                    f.write('except OSError:\n')
+                    f.write('    import os\n')
+                    f.write('    os.environ["ORT_TENSORRT_UNAVAILABLE"] = "1"\n')
+
        def run(self):
            if is_manylinux:
                source = 'onnxruntime/capi/onnxruntime_pybind11_state.so'
@ -147,6 +156,8 @@ try:
                                'libhsa-runtime64.so', 'libhsakmt.so']
                to_preload = []
                to_preload_cuda = []
+                to_preload_tensorrt = []
+                cuda_dependencies = []
                args = ['patchelf', '--debug']
                for line in result.stdout.split('\n'):
                    for dependency in dependencies:
@ -162,7 +173,7 @@ try:
                    result = subprocess.run(['patchelf', '--print-needed', dest],
                                            check=True, stdout=subprocess.PIPE, universal_newlines=True)
                    cuda_dependencies = ['libcublas.so', 'libcublasLt.so', 'libcudnn.so', 'libcudart.so',
-                                         'libcurand.so', 'libcufft.so', 'libnvToolsExt.so']
+                                         'libcurand.so', 'libcufft.so', 'libnvToolsExt.so', 'libcupti.so']
                    rocm_dependencies = ['librccl.so', 'libamdhip64.so', 'librocblas.so', 'libMIOpen.so',
                                         'libhsa-runtime64.so', 'libhsakmt.so']
                    args = ['patchelf', '--debug']
@ -176,8 +187,25 @@ try:
                    if len(args) > 3:
                        subprocess.run(args, check=True, stdout=subprocess.PIPE)

+                dest = 'onnxruntime/capi/libonnxruntime_providers_tensorrt.so'
+                if path.isfile(dest):
+                    result = subprocess.run(['patchelf', '--print-needed', dest],
+                                            check=True, stdout=subprocess.PIPE, universal_newlines=True)
+                    tensorrt_dependencies = ['libnvinfer.so', 'libnvinfer_plugin.so', 'libnvonnxparser.so']
+                    args = ['patchelf', '--debug']
+                    for line in result.stdout.split('\n'):
+                        for dependency in (cuda_dependencies + tensorrt_dependencies):
+                            if dependency in line:
+                                if dependency not in (to_preload + to_preload_cuda):
+                                    to_preload_tensorrt.append(line)
+                                args.extend(['--remove-needed', line])
+                    args.append(dest)
+                    if len(args) > 3:
+                        subprocess.run(args, check=True, stdout=subprocess.PIPE)
+
                self._rewrite_ld_preload(to_preload)
                self._rewrite_ld_preload_cuda(to_preload_cuda)
+                self._rewrite_ld_preload_tensorrt(to_preload_tensorrt)
            _bdist_wheel.run(self)
            if is_manylinux and not disable_auditwheel_repair:
                file = glob(path.join(self.dist_dir, '*linux*.whl'))[0]
@ -202,6 +230,7 @@ if platform.system() == 'Linux':
            'mimalloc.so']
    dl_libs = ['libonnxruntime_providers_shared.so']
    dl_libs.append(providers_cuda_or_rocm)
+    dl_libs.append('libonnxruntime_providers_tensorrt.so')
    # DNNL, TensorRT & OpenVINO EPs are built as shared libs
    libs.extend(['libonnxruntime_providers_shared.so'])
    libs.extend(['libonnxruntime_providers_dnnl.so'])
--- a/tools/ci_build/build.py
+++ b/tools/ci_build/build.py
@ -1715,9 +1715,7 @@ def build_python_wheel(
            args.append("--disable_auditwheel_repair")

        # The following arguments are mutually exclusive
-        if use_tensorrt:
-            args.append('--use_tensorrt')
-        elif use_cuda:
+        if use_cuda:
            # The following line assumes no other EP is enabled
            args.append('--wheel_name_suffix=gpu')
            if cuda_version:
--- a/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml
@ -210,10 +210,10 @@ stages:

      - template: get-docker-image-steps.yml
        parameters:
-          Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_cuda11
+          Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_cuda11_4_tensorrt8_0
          Context: tools/ci_build/github/linux/docker
-          DockerBuildArgs: "--network=host --build-arg POLICY=manylinux2014 --build-arg PLATFORM=x86_64 --build-arg BASEIMAGE=nvidia/cuda:11.4.0-cudnn8-devel-centos7 --build-arg DEVTOOLSET_ROOTPATH=/opt/rh/devtoolset-10/root --build-arg PREPEND_PATH=/opt/rh/devtoolset-10/root/usr/bin: --build-arg LD_LIBRARY_PATH_ARG=/opt/rh/devtoolset-10/root/usr/lib64:/opt/rh/devtoolset-10/root/usr/lib:/opt/rh/devtoolset-10/root/usr/lib64/dyninst:/opt/rh/devtoolset-10/root/usr/lib/dyninst:/usr/local/lib64 --build-arg BUILD_UID=$( id -u )"
-          Repository: onnxruntimecuda11build
+          DockerBuildArgs: "--network=host --build-arg POLICY=manylinux2014 --build-arg PLATFORM=x86_64 --build-arg DEVTOOLSET_ROOTPATH=/opt/rh/devtoolset-10/root --build-arg PREPEND_PATH=/opt/rh/devtoolset-10/root/usr/bin: --build-arg LD_LIBRARY_PATH_ARG=/opt/rh/devtoolset-10/root/usr/lib64:/opt/rh/devtoolset-10/root/usr/lib:/opt/rh/devtoolset-10/root/usr/lib64/dyninst:/opt/rh/devtoolset-10/root/usr/lib/dyninst:/usr/local/lib64 --build-arg BUILD_UID=$( id -u )"
+          Repository: onnxruntimecuda114xtrt80build

      - task: CmdLine@2
        displayName: 'Build Python Wheel'
@ -228,14 +228,14 @@ stages:
              --volume $HOME/.onnx:/home/onnxruntimedev/.onnx \
              -e NIGHTLY_BUILD \
              -e BUILD_BUILDNUMBER \
-              onnxruntimecuda11build \
+              onnxruntimecuda114xtrt80build \
                $(PythonManylinuxDir)/bin/python3 /onnxruntime_src/tools/ci_build/build.py \
                  --build_dir /build --cmake_generator Ninja \
                  --config Release --update --build \
                  --skip_submodule_sync \
                  --parallel \
                  --build_wheel \
-                  --enable_onnx_tests --use_cuda --cuda_version=11.4 --cuda_home=/usr/local/cuda-11.4 --cudnn_home=/usr/local/cuda-11.4 \
+                  --enable_onnx_tests --use_tensorrt --cuda_version=11.4 --tensorrt_home=/usr --cuda_home=/usr/local/cuda-11.4 --cudnn_home=/usr/local/cuda-11.4 \
                  ${{ parameters.build_py_parameters }} \
                  --cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-10/root/usr/bin/cc 'CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80'
          workingDirectory: $(Build.SourcesDirectory)
@ -266,7 +266,7 @@ stages:
                  --skip_submodule_sync \
                  --parallel \
                  --build_wheel \
-                  --enable_onnx_tests --use_cuda --cuda_version=11.4 --cuda_home=/usr/local/cuda-11.4 --cudnn_home=/usr/local/cuda-11.4 \
+                  --enable_onnx_tests --use_tensorrt --cuda_version=11.4 --tensorrt_home=/usr --cuda_home=/usr/local/cuda-11.4 --cudnn_home=/usr/local/cuda-11.4 \
                  ${{ parameters.build_py_parameters }} --ctest_path '' \
                  --cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-10/root/usr/bin/cc 'CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80'

@ -810,25 +810,25 @@ stages:
        buildArch: x64
      strategy:
        matrix:
-          Python36_cuda:
+          Python36_GPU:
            PythonVersion: '3.6'
-            EpBuildFlags: --use_cuda --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
-            EnvSetupScript: setup_env_cuda_11.bat
+            EpBuildFlags: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.0.3.4.Windows10.x86_64.cuda-11.3.cudnn8.2" --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
+            EnvSetupScript: setup_env_gpu.bat
            EP_NAME: gpu
-          Python37_cuda:
+          Python37_GPU:
            PythonVersion: '3.7'
-            EpBuildFlags: --use_cuda --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
-            EnvSetupScript: setup_env_cuda_11.bat
+            EpBuildFlags: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.0.3.4.Windows10.x86_64.cuda-11.3.cudnn8.2" --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
+            EnvSetupScript: setup_env_gpu.bat
            EP_NAME: gpu
-          Python38_cuda:
+          Python38_GPU:
            PythonVersion: '3.8'
-            EpBuildFlags: --use_cuda --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
-            EnvSetupScript: setup_env_cuda_11.bat
+            EpBuildFlags: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.0.3.4.Windows10.x86_64.cuda-11.3.cudnn8.2" --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
+            EnvSetupScript: setup_env_gpu.bat
            EP_NAME: gpu
-          Python39_cuda:
+          Python39_GPU:
            PythonVersion: '3.9'
-            EpBuildFlags: --use_cuda --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
-            EnvSetupScript: setup_env_cuda_11.bat
+            EpBuildFlags: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.0.3.4.Windows10.x86_64.cuda-11.3.cudnn8.2" --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
+            EnvSetupScript: setup_env_gpu.bat
            EP_NAME: gpu
          Python36_dml:
            PythonVersion: '3.6'