diff --git a/onnxruntime/python/backend/backend.py b/onnxruntime/python/backend/backend.py
index 8736c9e275..fffbd51f0f 100644
--- a/onnxruntime/python/backend/backend.py
+++ b/onnxruntime/python/backend/backend.py
@@ -107,7 +107,11 @@ class OnnxRuntimeBackend(Backend):
             for k, v in kwargs.items():
                 if hasattr(options, k):
                     setattr(options, k, v)
-            inf = InferenceSession(model, sess_options=options, providers=get_available_providers())
+
+            excluded_providers = os.getenv('ORT_ONNX_BACKEND_EXCLUDE_PROVIDERS', default="").split(',')
+            providers = [x for x in get_available_providers() if (x not in excluded_providers)]
+
+            inf = InferenceSession(model, sess_options=options, providers=providers)
             # backend API is primarily used for ONNX test/validation. As such, we should disable session.run() fallback
             # which may hide test failures.
             inf.disable_fallback()
diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
index 65e86288a5..ea198fb45d 100644
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -352,151 +352,159 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
         ->CreateProvider();
   } else if (type == kTensorrtExecutionProvider) {
 #ifdef USE_TENSORRT
-    std::string calibration_table, cache_path, lib_path;
-    auto it = provider_options_map.find(type);
-    if (it != provider_options_map.end()) {
-      OrtTensorRTProviderOptions params{
-          0,
-          0,
-          nullptr,
-          1000,
-          1,
-          1 << 30,
-          0,
-          0,
-          nullptr,
-          0,
-          0,
-          0,
-          0,
-          0,
-          nullptr,
-          0,
-          nullptr,
-          0};
-      for (auto option : it->second) {
-        if (option.first == "device_id") {
-          if (!option.second.empty()) {
-            params.device_id = std::stoi(option.second);
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'device_id' should be a number i.e. '0'.\n");
-          }
-        } else if (option.first == "trt_max_partition_iterations") {
-          if (!option.second.empty()) {
-            params.trt_max_partition_iterations = std::stoi(option.second);
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_partition_iterations' should be a positive integer number i.e. '1000'.\n");
-          }
-        } else if (option.first == "trt_min_subgraph_size") {
-          if (!option.second.empty()) {
-            params.trt_min_subgraph_size = std::stoi(option.second);
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_min_subgraph_size' should be a positive integer number i.e. '1'.\n");
-          }
-        } else if (option.first == "trt_max_workspace_size") {
-          if (!option.second.empty()) {
-            params.trt_max_workspace_size = std::stoull(option.second);
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_workspace_size' should be a number in byte i.e. '1073741824'.\n");
-          }
-        } else if (option.first == "trt_fp16_enable") {
-          if (option.second == "True" || option.second == "true") {
-            params.trt_fp16_enable = true;
-          } else if (option.second == "False" || option.second == "false") {
-            params.trt_fp16_enable = false;
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_fp16_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
-          }
-        } else if (option.first == "trt_int8_enable") {
-          if (option.second == "True" || option.second == "true") {
-            params.trt_int8_enable = true;
-          } else if (option.second == "False" || option.second == "false") {
-            params.trt_int8_enable = false;
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
-          }
-        } else if (option.first == "trt_int8_calibration_table_name") {
-          if (!option.second.empty()) {
-            calibration_table = option.second;
-            params.trt_int8_calibration_table_name = calibration_table.c_str();
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_calibration_table_name' should be a file name i.e. 'cal_table'.\n");
-          }
-        } else if (option.first == "trt_int8_use_native_calibration_table") {
-          if (option.second == "True" || option.second == "true") {
-            params.trt_int8_use_native_calibration_table = true;
-          } else if (option.second == "False" || option.second == "false") {
-            params.trt_int8_use_native_calibration_table = false;
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_use_native_calibration_table' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
-          }
-        } else if (option.first == "trt_dla_enable") {
-          if (option.second == "True" || option.second == "true") {
-            params.trt_dla_enable = true;
-          } else if (option.second == "False" || option.second == "false") {
-            params.trt_dla_enable = false;
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
-          }
-        } else if (option.first == "trt_dla_core") {
-          if (!option.second.empty()) {
-            params.trt_dla_core = std::stoi(option.second);
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_core' should be a positive integer number i.e. '0'.\n");
-          }
-        } else if (option.first == "trt_dump_subgraphs") {
-          if (option.second == "True" || option.second == "true") {
-            params.trt_dump_subgraphs = true;
-          } else if (option.second == "False" || option.second == "false") {
-            params.trt_dump_subgraphs = false;
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dump_subgraphs' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
-          }
-        } else if (option.first == "trt_engine_cache_enable") {
-          if (option.second == "True" || option.second == "true") {
-            params.trt_engine_cache_enable = true;
-          } else if (option.second == "False" || option.second == "false") {
-            params.trt_engine_cache_enable = false;
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
-          }
-        } else if (option.first == "trt_engine_cache_path") {
-          if (!option.second.empty()) {
-            cache_path = option.second;
-            params.trt_engine_cache_path = cache_path.c_str();
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_path' should be a path string i.e. 'engine_cache'.\n");
-          }
-        } else if (option.first == "trt_engine_decryption_enable") {
-          if (option.second == "True" || option.second == "true") {
-            params.trt_engine_decryption_enable = true;
-          } else if (option.second == "False" || option.second == "false") {
-            params.trt_engine_decryption_enable = false;
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
-          }
-        } else if (option.first == "trt_engine_decryption_lib_path") {
-          if (!option.second.empty()) {
-            lib_path = option.second;
-            params.trt_engine_decryption_lib_path = lib_path.c_str();
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_lib_path' should be a path string i.e. 'decryption_lib'.\n");
-          }
-        } else if (option.first == "trt_force_sequential_engine_build") {
-          if (option.second == "True" || option.second == "true") {
-            params.trt_force_sequential_engine_build = true;
-          } else if (option.second == "False" || option.second == "false") {
-            params.trt_force_sequential_engine_build = false;
-          } else {
-            ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_force_sequential_engine_build' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
+    // If the environment variable 'ORT_TENSORRT_UNAVAILABLE' exists, then we do not load TensorRT. This is set by _ld_preload for the manylinux case
+    // as in that case, trying to load the library itself will result in a crash due to the way that auditwheel strips dependencies.
+    if (Env::Default().GetEnvironmentVar("ORT_TENSORRT_UNAVAILABLE").empty()) {
+        std::string calibration_table, cache_path, lib_path;
+        auto it = provider_options_map.find(type);
+        if (it != provider_options_map.end()) {
+          OrtTensorRTProviderOptions params{
+              0,
+              0,
+              nullptr,
+              1000,
+              1,
+              1 << 30,
+              0,
+              0,
+              nullptr,
+              0,
+              0,
+              0,
+              0,
+              0,
+              nullptr,
+              0,
+              nullptr,
+              0};
+          for (auto option : it->second) {
+            if (option.first == "device_id") {
+              if (!option.second.empty()) {
+                params.device_id = std::stoi(option.second);
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'device_id' should be a number i.e. '0'.\n");
+              }
+            } else if (option.first == "trt_max_partition_iterations") {
+              if (!option.second.empty()) {
+                params.trt_max_partition_iterations = std::stoi(option.second);
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_partition_iterations' should be a positive integer number i.e. '1000'.\n");
+              }
+            } else if (option.first == "trt_min_subgraph_size") {
+              if (!option.second.empty()) {
+                params.trt_min_subgraph_size = std::stoi(option.second);
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_min_subgraph_size' should be a positive integer number i.e. '1'.\n");
+              }
+            } else if (option.first == "trt_max_workspace_size") {
+              if (!option.second.empty()) {
+                params.trt_max_workspace_size = std::stoull(option.second);
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_workspace_size' should be a number in byte i.e. '1073741824'.\n");
+              }
+            } else if (option.first == "trt_fp16_enable") {
+              if (option.second == "True" || option.second == "true") {
+                params.trt_fp16_enable = true;
+              } else if (option.second == "False" || option.second == "false") {
+                params.trt_fp16_enable = false;
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_fp16_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
+              }
+            } else if (option.first == "trt_int8_enable") {
+              if (option.second == "True" || option.second == "true") {
+                params.trt_int8_enable = true;
+              } else if (option.second == "False" || option.second == "false") {
+                params.trt_int8_enable = false;
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
+              }
+            } else if (option.first == "trt_int8_calibration_table_name") {
+              if (!option.second.empty()) {
+                calibration_table = option.second;
+                params.trt_int8_calibration_table_name = calibration_table.c_str();
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_calibration_table_name' should be a file name i.e. 'cal_table'.\n");
+              }
+            } else if (option.first == "trt_int8_use_native_calibration_table") {
+              if (option.second == "True" || option.second == "true") {
+                params.trt_int8_use_native_calibration_table = true;
+              } else if (option.second == "False" || option.second == "false") {
+                params.trt_int8_use_native_calibration_table = false;
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_use_native_calibration_table' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
+              }
+            } else if (option.first == "trt_dla_enable") {
+              if (option.second == "True" || option.second == "true") {
+                params.trt_dla_enable = true;
+              } else if (option.second == "False" || option.second == "false") {
+                params.trt_dla_enable = false;
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
+              }
+            } else if (option.first == "trt_dla_core") {
+              if (!option.second.empty()) {
+                params.trt_dla_core = std::stoi(option.second);
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_core' should be a positive integer number i.e. '0'.\n");
+              }
+            } else if (option.first == "trt_dump_subgraphs") {
+              if (option.second == "True" || option.second == "true") {
+                params.trt_dump_subgraphs = true;
+              } else if (option.second == "False" || option.second == "false") {
+                params.trt_dump_subgraphs = false;
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dump_subgraphs' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
+              }
+            } else if (option.first == "trt_engine_cache_enable") {
+              if (option.second == "True" || option.second == "true") {
+                params.trt_engine_cache_enable = true;
+              } else if (option.second == "False" || option.second == "false") {
+                params.trt_engine_cache_enable = false;
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
+              }
+            } else if (option.first == "trt_engine_cache_path") {
+              if (!option.second.empty()) {
+                cache_path = option.second;
+                params.trt_engine_cache_path = cache_path.c_str();
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_path' should be a path string i.e. 'engine_cache'.\n");
+              }
+            } else if (option.first == "trt_engine_decryption_enable") {
+              if (option.second == "True" || option.second == "true") {
+                params.trt_engine_decryption_enable = true;
+              } else if (option.second == "False" || option.second == "false") {
+                params.trt_engine_decryption_enable = false;
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
+              }
+            } else if (option.first == "trt_engine_decryption_lib_path") {
+              if (!option.second.empty()) {
+                lib_path = option.second;
+                params.trt_engine_decryption_lib_path = lib_path.c_str();
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_lib_path' should be a path string i.e. 'decryption_lib'.\n");
+              }
+            } else if (option.first == "trt_force_sequential_engine_build") {
+              if (option.second == "True" || option.second == "true") {
+                params.trt_force_sequential_engine_build = true;
+              } else if (option.second == "False" || option.second == "false") {
+                params.trt_force_sequential_engine_build = false;
+              } else {
+                ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_force_sequential_engine_build' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
+              }
+            } else {
+              ORT_THROW("Invalid TensorRT EP option: ", option.first);
+            }
           }
+          return onnxruntime::CreateExecutionProviderFactory_Tensorrt(&params)->CreateProvider();
         } else {
-          ORT_THROW("Invalid TensorRT EP option: ", option.first);
+          return onnxruntime::CreateExecutionProviderFactory_Tensorrt(cuda_device_id)->CreateProvider();
         }
-      }
-      return onnxruntime::CreateExecutionProviderFactory_Tensorrt(&params)->CreateProvider();
     } else {
-      return onnxruntime::CreateExecutionProviderFactory_Tensorrt(cuda_device_id)->CreateProvider();
+      if (!Env::Default().GetEnvironmentVar("CUDA_PATH").empty()) {
+        ORT_THROW("CUDA_PATH is set but CUDA wasn't able to be loaded. Please install the correct version of CUDA and cuDNN as mentioned in the GPU requirements page (https://onnxruntime.ai/docs/reference/execution-providers/CUDA-ExecutionProvider.html#requirements) as well as TensorRT as mentioned in the TensorRT requirements page (https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html#requirements), make sure they're in the PATH, and that your GPU is supported.");
+      }
     }
 #endif
   } else if (type == kMIGraphXExecutionProvider) {
diff --git a/onnxruntime/test/python/onnx_backend_test_series.py b/onnxruntime/test/python/onnx_backend_test_series.py
index 28914dcf6d..9520b885b9 100644
--- a/onnxruntime/test/python/onnx_backend_test_series.py
+++ b/onnxruntime/test/python/onnx_backend_test_series.py
@@ -122,6 +122,9 @@ def create_backend_test(testname=None):
         backend_test.exclude('(' + '|'.join(filters) + ')')
         print('excluded tests:', filters)
 
+        # exclude TRT EP temporarily and only test CUDA EP to retain previous behavior 
+        os.environ["ORT_ONNX_BACKEND_EXCLUDE_PROVIDERS"] = "TensorrtExecutionProvider"
+
     # import all test cases at global scope to make
     # them visible to python.unittest.
     globals().update(backend_test.enable_report().test_cases)
diff --git a/setup.py b/setup.py
index 2d2e4047dd..b28f48fcbd 100644
--- a/setup.py
+++ b/setup.py
@@ -52,9 +52,7 @@ cuda_version = None
 rocm_version = None
 is_rocm = False
 # The following arguments are mutually exclusive
-if parse_arg_remove_boolean(sys.argv, '--use_tensorrt'):
-    package_name = 'onnxruntime-gpu-tensorrt' if not nightly_build else 'ort-trt-nightly'
-elif wheel_name_suffix == 'gpu':
+if wheel_name_suffix == 'gpu':
     # TODO: how to support multiple CUDA versions?
     cuda_version = parse_arg_remove_string(sys.argv, '--cuda_version=')
 elif parse_arg_remove_boolean(sys.argv, '--use_rocm'):
@@ -135,6 +133,17 @@ try:
                     f.write('    import os\n')
                     f.write('    os.environ["ORT_CUDA_UNAVAILABLE"] = "1"\n')
 
+        def _rewrite_ld_preload_tensorrt(self, to_preload):
+            with open('onnxruntime/capi/_ld_preload.py', 'a') as f:
+                if len(to_preload) > 0:
+                    f.write('from ctypes import CDLL, RTLD_GLOBAL\n')
+                    f.write('try:\n')
+                    for library in to_preload:
+                        f.write('    _{} = CDLL("{}", mode=RTLD_GLOBAL)\n'.format(library.split('.')[0], library))
+                    f.write('except OSError:\n')
+                    f.write('    import os\n')
+                    f.write('    os.environ["ORT_TENSORRT_UNAVAILABLE"] = "1"\n')
+
         def run(self):
             if is_manylinux:
                 source = 'onnxruntime/capi/onnxruntime_pybind11_state.so'
@@ -147,6 +156,8 @@ try:
                                 'libhsa-runtime64.so', 'libhsakmt.so']
                 to_preload = []
                 to_preload_cuda = []
+                to_preload_tensorrt = []
+                cuda_dependencies = []
                 args = ['patchelf', '--debug']
                 for line in result.stdout.split('\n'):
                     for dependency in dependencies:
@@ -162,7 +173,7 @@ try:
                     result = subprocess.run(['patchelf', '--print-needed', dest],
                                             check=True, stdout=subprocess.PIPE, universal_newlines=True)
                     cuda_dependencies = ['libcublas.so', 'libcublasLt.so', 'libcudnn.so', 'libcudart.so',
-                                         'libcurand.so', 'libcufft.so', 'libnvToolsExt.so']
+                                         'libcurand.so', 'libcufft.so', 'libnvToolsExt.so', 'libcupti.so']
                     rocm_dependencies = ['librccl.so', 'libamdhip64.so', 'librocblas.so', 'libMIOpen.so',
                                          'libhsa-runtime64.so', 'libhsakmt.so']
                     args = ['patchelf', '--debug']
@@ -176,8 +187,25 @@ try:
                     if len(args) > 3:
                         subprocess.run(args, check=True, stdout=subprocess.PIPE)
 
+                dest = 'onnxruntime/capi/libonnxruntime_providers_tensorrt.so'
+                if path.isfile(dest):
+                    result = subprocess.run(['patchelf', '--print-needed', dest],
+                                            check=True, stdout=subprocess.PIPE, universal_newlines=True)
+                    tensorrt_dependencies = ['libnvinfer.so', 'libnvinfer_plugin.so', 'libnvonnxparser.so']
+                    args = ['patchelf', '--debug']
+                    for line in result.stdout.split('\n'):
+                        for dependency in (cuda_dependencies + tensorrt_dependencies):
+                            if dependency in line:
+                                if dependency not in (to_preload + to_preload_cuda):
+                                    to_preload_tensorrt.append(line)
+                                args.extend(['--remove-needed', line])
+                    args.append(dest)
+                    if len(args) > 3:
+                        subprocess.run(args, check=True, stdout=subprocess.PIPE)
+
                 self._rewrite_ld_preload(to_preload)
                 self._rewrite_ld_preload_cuda(to_preload_cuda)
+                self._rewrite_ld_preload_tensorrt(to_preload_tensorrt)
             _bdist_wheel.run(self)
             if is_manylinux and not disable_auditwheel_repair:
                 file = glob(path.join(self.dist_dir, '*linux*.whl'))[0]
@@ -202,6 +230,7 @@ if platform.system() == 'Linux':
             'mimalloc.so']
     dl_libs = ['libonnxruntime_providers_shared.so']
     dl_libs.append(providers_cuda_or_rocm)
+    dl_libs.append('libonnxruntime_providers_tensorrt.so')
     # DNNL, TensorRT & OpenVINO EPs are built as shared libs
     libs.extend(['libonnxruntime_providers_shared.so'])
     libs.extend(['libonnxruntime_providers_dnnl.so'])
diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py
index 3e62c460b8..6797638b08 100644
--- a/tools/ci_build/build.py
+++ b/tools/ci_build/build.py
@@ -1715,9 +1715,7 @@ def build_python_wheel(
             args.append("--disable_auditwheel_repair")
 
         # The following arguments are mutually exclusive
-        if use_tensorrt:
-            args.append('--use_tensorrt')
-        elif use_cuda:
+        if use_cuda:
             # The following line assumes no other EP is enabled
             args.append('--wheel_name_suffix=gpu')
             if cuda_version:
diff --git a/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml
index 2c8680ea26..8f3fd62dfd 100644
--- a/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml
@@ -210,10 +210,10 @@ stages:
 
       - template: get-docker-image-steps.yml
         parameters:
-          Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_cuda11
+          Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_cuda11_4_tensorrt8_0
           Context: tools/ci_build/github/linux/docker
-          DockerBuildArgs: "--network=host --build-arg POLICY=manylinux2014 --build-arg PLATFORM=x86_64 --build-arg BASEIMAGE=nvidia/cuda:11.4.0-cudnn8-devel-centos7 --build-arg DEVTOOLSET_ROOTPATH=/opt/rh/devtoolset-10/root --build-arg PREPEND_PATH=/opt/rh/devtoolset-10/root/usr/bin: --build-arg LD_LIBRARY_PATH_ARG=/opt/rh/devtoolset-10/root/usr/lib64:/opt/rh/devtoolset-10/root/usr/lib:/opt/rh/devtoolset-10/root/usr/lib64/dyninst:/opt/rh/devtoolset-10/root/usr/lib/dyninst:/usr/local/lib64 --build-arg BUILD_UID=$( id -u )"
-          Repository: onnxruntimecuda11build
+          DockerBuildArgs: "--network=host --build-arg POLICY=manylinux2014 --build-arg PLATFORM=x86_64 --build-arg DEVTOOLSET_ROOTPATH=/opt/rh/devtoolset-10/root --build-arg PREPEND_PATH=/opt/rh/devtoolset-10/root/usr/bin: --build-arg LD_LIBRARY_PATH_ARG=/opt/rh/devtoolset-10/root/usr/lib64:/opt/rh/devtoolset-10/root/usr/lib:/opt/rh/devtoolset-10/root/usr/lib64/dyninst:/opt/rh/devtoolset-10/root/usr/lib/dyninst:/usr/local/lib64 --build-arg BUILD_UID=$( id -u )"
+          Repository: onnxruntimecuda114xtrt80build
 
       - task: CmdLine@2
         displayName: 'Build Python Wheel'
@@ -228,14 +228,14 @@ stages:
               --volume $HOME/.onnx:/home/onnxruntimedev/.onnx \
               -e NIGHTLY_BUILD \
               -e BUILD_BUILDNUMBER \
-              onnxruntimecuda11build \
+              onnxruntimecuda114xtrt80build \
                 $(PythonManylinuxDir)/bin/python3 /onnxruntime_src/tools/ci_build/build.py \
                   --build_dir /build --cmake_generator Ninja \
                   --config Release --update --build \
                   --skip_submodule_sync \
                   --parallel \
                   --build_wheel \
-                  --enable_onnx_tests --use_cuda --cuda_version=11.4 --cuda_home=/usr/local/cuda-11.4 --cudnn_home=/usr/local/cuda-11.4 \
+                  --enable_onnx_tests --use_tensorrt --cuda_version=11.4 --tensorrt_home=/usr --cuda_home=/usr/local/cuda-11.4 --cudnn_home=/usr/local/cuda-11.4 \
                   ${{ parameters.build_py_parameters }} \
                   --cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-10/root/usr/bin/cc 'CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80'
           workingDirectory: $(Build.SourcesDirectory)
@@ -266,7 +266,7 @@ stages:
                   --skip_submodule_sync \
                   --parallel \
                   --build_wheel \
-                  --enable_onnx_tests --use_cuda --cuda_version=11.4 --cuda_home=/usr/local/cuda-11.4 --cudnn_home=/usr/local/cuda-11.4 \
+                  --enable_onnx_tests --use_tensorrt --cuda_version=11.4 --tensorrt_home=/usr --cuda_home=/usr/local/cuda-11.4 --cudnn_home=/usr/local/cuda-11.4 \
                   ${{ parameters.build_py_parameters }} --ctest_path '' \
                   --cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-10/root/usr/bin/cc 'CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80'
 
@@ -810,25 +810,25 @@ stages:
         buildArch: x64
       strategy:
         matrix:
-          Python36_cuda:
+          Python36_GPU:
             PythonVersion: '3.6'
-            EpBuildFlags: --use_cuda --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
-            EnvSetupScript: setup_env_cuda_11.bat
+            EpBuildFlags: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.0.3.4.Windows10.x86_64.cuda-11.3.cudnn8.2" --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
+            EnvSetupScript: setup_env_gpu.bat
             EP_NAME: gpu
-          Python37_cuda:
+          Python37_GPU:
             PythonVersion: '3.7'
-            EpBuildFlags: --use_cuda --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
-            EnvSetupScript: setup_env_cuda_11.bat
+            EpBuildFlags: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.0.3.4.Windows10.x86_64.cuda-11.3.cudnn8.2" --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
+            EnvSetupScript: setup_env_gpu.bat
             EP_NAME: gpu
-          Python38_cuda:
+          Python38_GPU:
             PythonVersion: '3.8'
-            EpBuildFlags: --use_cuda --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
-            EnvSetupScript: setup_env_cuda_11.bat
+            EpBuildFlags: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.0.3.4.Windows10.x86_64.cuda-11.3.cudnn8.2" --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
+            EnvSetupScript: setup_env_gpu.bat
             EP_NAME: gpu
-          Python39_cuda:
+          Python39_GPU:
             PythonVersion: '3.9'
-            EpBuildFlags: --use_cuda --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
-            EnvSetupScript: setup_env_cuda_11.bat
+            EpBuildFlags: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.0.3.4.Windows10.x86_64.cuda-11.3.cudnn8.2" --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
+            EnvSetupScript: setup_env_gpu.bat
             EP_NAME: gpu
           Python36_dml:
             PythonVersion: '3.6'