mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-14 20:48:00 +00:00
Integrate TensorRT into GPU Python package (#9785)
* add use_tensorrt build option * Add use_tensorrt to running tests * add use_tensorrt for Windows * make trt ep to skip backend test * make trt ep to skip backend test * Fix bug * Add/Modify description * modify for debug * swtich pool to test * modify to debug * modify to debug * add vobersity * refine the code * refine the code * refine the code * fix flake8 warning * refine the code * add pre_load check for trt as well as add cupti lib to cuda depedencies * modify script to make trt build path the same as cuda * show error message when user wants to run TensorRT but TensorRT is not installed in the env * fix bug * fix bug * add trt lib for manylinux * include cuda_dependencies for trt * rewrite the condition to throw exception * make code more compact
This commit is contained in:
parent
76715ad525
commit
7242627fec
6 changed files with 209 additions and 167 deletions
|
|
@ -107,7 +107,11 @@ class OnnxRuntimeBackend(Backend):
|
|||
for k, v in kwargs.items():
|
||||
if hasattr(options, k):
|
||||
setattr(options, k, v)
|
||||
inf = InferenceSession(model, sess_options=options, providers=get_available_providers())
|
||||
|
||||
excluded_providers = os.getenv('ORT_ONNX_BACKEND_EXCLUDE_PROVIDERS', default="").split(',')
|
||||
providers = [x for x in get_available_providers() if (x not in excluded_providers)]
|
||||
|
||||
inf = InferenceSession(model, sess_options=options, providers=providers)
|
||||
# backend API is primarily used for ONNX test/validation. As such, we should disable session.run() fallback
|
||||
# which may hide test failures.
|
||||
inf.disable_fallback()
|
||||
|
|
|
|||
|
|
@ -352,151 +352,159 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
|
|||
->CreateProvider();
|
||||
} else if (type == kTensorrtExecutionProvider) {
|
||||
#ifdef USE_TENSORRT
|
||||
std::string calibration_table, cache_path, lib_path;
|
||||
auto it = provider_options_map.find(type);
|
||||
if (it != provider_options_map.end()) {
|
||||
OrtTensorRTProviderOptions params{
|
||||
0,
|
||||
0,
|
||||
nullptr,
|
||||
1000,
|
||||
1,
|
||||
1 << 30,
|
||||
0,
|
||||
0,
|
||||
nullptr,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
0};
|
||||
for (auto option : it->second) {
|
||||
if (option.first == "device_id") {
|
||||
if (!option.second.empty()) {
|
||||
params.device_id = std::stoi(option.second);
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'device_id' should be a number i.e. '0'.\n");
|
||||
}
|
||||
} else if (option.first == "trt_max_partition_iterations") {
|
||||
if (!option.second.empty()) {
|
||||
params.trt_max_partition_iterations = std::stoi(option.second);
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_partition_iterations' should be a positive integer number i.e. '1000'.\n");
|
||||
}
|
||||
} else if (option.first == "trt_min_subgraph_size") {
|
||||
if (!option.second.empty()) {
|
||||
params.trt_min_subgraph_size = std::stoi(option.second);
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_min_subgraph_size' should be a positive integer number i.e. '1'.\n");
|
||||
}
|
||||
} else if (option.first == "trt_max_workspace_size") {
|
||||
if (!option.second.empty()) {
|
||||
params.trt_max_workspace_size = std::stoull(option.second);
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_workspace_size' should be a number in byte i.e. '1073741824'.\n");
|
||||
}
|
||||
} else if (option.first == "trt_fp16_enable") {
|
||||
if (option.second == "True" || option.second == "true") {
|
||||
params.trt_fp16_enable = true;
|
||||
} else if (option.second == "False" || option.second == "false") {
|
||||
params.trt_fp16_enable = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_fp16_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
|
||||
}
|
||||
} else if (option.first == "trt_int8_enable") {
|
||||
if (option.second == "True" || option.second == "true") {
|
||||
params.trt_int8_enable = true;
|
||||
} else if (option.second == "False" || option.second == "false") {
|
||||
params.trt_int8_enable = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
|
||||
}
|
||||
} else if (option.first == "trt_int8_calibration_table_name") {
|
||||
if (!option.second.empty()) {
|
||||
calibration_table = option.second;
|
||||
params.trt_int8_calibration_table_name = calibration_table.c_str();
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_calibration_table_name' should be a file name i.e. 'cal_table'.\n");
|
||||
}
|
||||
} else if (option.first == "trt_int8_use_native_calibration_table") {
|
||||
if (option.second == "True" || option.second == "true") {
|
||||
params.trt_int8_use_native_calibration_table = true;
|
||||
} else if (option.second == "False" || option.second == "false") {
|
||||
params.trt_int8_use_native_calibration_table = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_use_native_calibration_table' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
|
||||
}
|
||||
} else if (option.first == "trt_dla_enable") {
|
||||
if (option.second == "True" || option.second == "true") {
|
||||
params.trt_dla_enable = true;
|
||||
} else if (option.second == "False" || option.second == "false") {
|
||||
params.trt_dla_enable = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
|
||||
}
|
||||
} else if (option.first == "trt_dla_core") {
|
||||
if (!option.second.empty()) {
|
||||
params.trt_dla_core = std::stoi(option.second);
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_core' should be a positive integer number i.e. '0'.\n");
|
||||
}
|
||||
} else if (option.first == "trt_dump_subgraphs") {
|
||||
if (option.second == "True" || option.second == "true") {
|
||||
params.trt_dump_subgraphs = true;
|
||||
} else if (option.second == "False" || option.second == "false") {
|
||||
params.trt_dump_subgraphs = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dump_subgraphs' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
|
||||
}
|
||||
} else if (option.first == "trt_engine_cache_enable") {
|
||||
if (option.second == "True" || option.second == "true") {
|
||||
params.trt_engine_cache_enable = true;
|
||||
} else if (option.second == "False" || option.second == "false") {
|
||||
params.trt_engine_cache_enable = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
|
||||
}
|
||||
} else if (option.first == "trt_engine_cache_path") {
|
||||
if (!option.second.empty()) {
|
||||
cache_path = option.second;
|
||||
params.trt_engine_cache_path = cache_path.c_str();
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_path' should be a path string i.e. 'engine_cache'.\n");
|
||||
}
|
||||
} else if (option.first == "trt_engine_decryption_enable") {
|
||||
if (option.second == "True" || option.second == "true") {
|
||||
params.trt_engine_decryption_enable = true;
|
||||
} else if (option.second == "False" || option.second == "false") {
|
||||
params.trt_engine_decryption_enable = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
|
||||
}
|
||||
} else if (option.first == "trt_engine_decryption_lib_path") {
|
||||
if (!option.second.empty()) {
|
||||
lib_path = option.second;
|
||||
params.trt_engine_decryption_lib_path = lib_path.c_str();
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_lib_path' should be a path string i.e. 'decryption_lib'.\n");
|
||||
}
|
||||
} else if (option.first == "trt_force_sequential_engine_build") {
|
||||
if (option.second == "True" || option.second == "true") {
|
||||
params.trt_force_sequential_engine_build = true;
|
||||
} else if (option.second == "False" || option.second == "false") {
|
||||
params.trt_force_sequential_engine_build = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_force_sequential_engine_build' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
|
||||
// If the environment variable 'ORT_TENSORRT_UNAVAILABLE' exists, then we do not load TensorRT. This is set by _ld_preload for the manylinux case
|
||||
// as in that case, trying to load the library itself will result in a crash due to the way that auditwheel strips dependencies.
|
||||
if (Env::Default().GetEnvironmentVar("ORT_TENSORRT_UNAVAILABLE").empty()) {
|
||||
std::string calibration_table, cache_path, lib_path;
|
||||
auto it = provider_options_map.find(type);
|
||||
if (it != provider_options_map.end()) {
|
||||
OrtTensorRTProviderOptions params{
|
||||
0,
|
||||
0,
|
||||
nullptr,
|
||||
1000,
|
||||
1,
|
||||
1 << 30,
|
||||
0,
|
||||
0,
|
||||
nullptr,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
0};
|
||||
for (auto option : it->second) {
|
||||
if (option.first == "device_id") {
|
||||
if (!option.second.empty()) {
|
||||
params.device_id = std::stoi(option.second);
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'device_id' should be a number i.e. '0'.\n");
|
||||
}
|
||||
} else if (option.first == "trt_max_partition_iterations") {
|
||||
if (!option.second.empty()) {
|
||||
params.trt_max_partition_iterations = std::stoi(option.second);
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_partition_iterations' should be a positive integer number i.e. '1000'.\n");
|
||||
}
|
||||
} else if (option.first == "trt_min_subgraph_size") {
|
||||
if (!option.second.empty()) {
|
||||
params.trt_min_subgraph_size = std::stoi(option.second);
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_min_subgraph_size' should be a positive integer number i.e. '1'.\n");
|
||||
}
|
||||
} else if (option.first == "trt_max_workspace_size") {
|
||||
if (!option.second.empty()) {
|
||||
params.trt_max_workspace_size = std::stoull(option.second);
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_workspace_size' should be a number in byte i.e. '1073741824'.\n");
|
||||
}
|
||||
} else if (option.first == "trt_fp16_enable") {
|
||||
if (option.second == "True" || option.second == "true") {
|
||||
params.trt_fp16_enable = true;
|
||||
} else if (option.second == "False" || option.second == "false") {
|
||||
params.trt_fp16_enable = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_fp16_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
|
||||
}
|
||||
} else if (option.first == "trt_int8_enable") {
|
||||
if (option.second == "True" || option.second == "true") {
|
||||
params.trt_int8_enable = true;
|
||||
} else if (option.second == "False" || option.second == "false") {
|
||||
params.trt_int8_enable = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
|
||||
}
|
||||
} else if (option.first == "trt_int8_calibration_table_name") {
|
||||
if (!option.second.empty()) {
|
||||
calibration_table = option.second;
|
||||
params.trt_int8_calibration_table_name = calibration_table.c_str();
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_calibration_table_name' should be a file name i.e. 'cal_table'.\n");
|
||||
}
|
||||
} else if (option.first == "trt_int8_use_native_calibration_table") {
|
||||
if (option.second == "True" || option.second == "true") {
|
||||
params.trt_int8_use_native_calibration_table = true;
|
||||
} else if (option.second == "False" || option.second == "false") {
|
||||
params.trt_int8_use_native_calibration_table = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_use_native_calibration_table' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
|
||||
}
|
||||
} else if (option.first == "trt_dla_enable") {
|
||||
if (option.second == "True" || option.second == "true") {
|
||||
params.trt_dla_enable = true;
|
||||
} else if (option.second == "False" || option.second == "false") {
|
||||
params.trt_dla_enable = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
|
||||
}
|
||||
} else if (option.first == "trt_dla_core") {
|
||||
if (!option.second.empty()) {
|
||||
params.trt_dla_core = std::stoi(option.second);
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_core' should be a positive integer number i.e. '0'.\n");
|
||||
}
|
||||
} else if (option.first == "trt_dump_subgraphs") {
|
||||
if (option.second == "True" || option.second == "true") {
|
||||
params.trt_dump_subgraphs = true;
|
||||
} else if (option.second == "False" || option.second == "false") {
|
||||
params.trt_dump_subgraphs = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dump_subgraphs' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
|
||||
}
|
||||
} else if (option.first == "trt_engine_cache_enable") {
|
||||
if (option.second == "True" || option.second == "true") {
|
||||
params.trt_engine_cache_enable = true;
|
||||
} else if (option.second == "False" || option.second == "false") {
|
||||
params.trt_engine_cache_enable = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
|
||||
}
|
||||
} else if (option.first == "trt_engine_cache_path") {
|
||||
if (!option.second.empty()) {
|
||||
cache_path = option.second;
|
||||
params.trt_engine_cache_path = cache_path.c_str();
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_path' should be a path string i.e. 'engine_cache'.\n");
|
||||
}
|
||||
} else if (option.first == "trt_engine_decryption_enable") {
|
||||
if (option.second == "True" || option.second == "true") {
|
||||
params.trt_engine_decryption_enable = true;
|
||||
} else if (option.second == "False" || option.second == "false") {
|
||||
params.trt_engine_decryption_enable = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
|
||||
}
|
||||
} else if (option.first == "trt_engine_decryption_lib_path") {
|
||||
if (!option.second.empty()) {
|
||||
lib_path = option.second;
|
||||
params.trt_engine_decryption_lib_path = lib_path.c_str();
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_lib_path' should be a path string i.e. 'decryption_lib'.\n");
|
||||
}
|
||||
} else if (option.first == "trt_force_sequential_engine_build") {
|
||||
if (option.second == "True" || option.second == "true") {
|
||||
params.trt_force_sequential_engine_build = true;
|
||||
} else if (option.second == "False" || option.second == "false") {
|
||||
params.trt_force_sequential_engine_build = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_force_sequential_engine_build' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
|
||||
}
|
||||
} else {
|
||||
ORT_THROW("Invalid TensorRT EP option: ", option.first);
|
||||
}
|
||||
}
|
||||
return onnxruntime::CreateExecutionProviderFactory_Tensorrt(¶ms)->CreateProvider();
|
||||
} else {
|
||||
ORT_THROW("Invalid TensorRT EP option: ", option.first);
|
||||
return onnxruntime::CreateExecutionProviderFactory_Tensorrt(cuda_device_id)->CreateProvider();
|
||||
}
|
||||
}
|
||||
return onnxruntime::CreateExecutionProviderFactory_Tensorrt(¶ms)->CreateProvider();
|
||||
} else {
|
||||
return onnxruntime::CreateExecutionProviderFactory_Tensorrt(cuda_device_id)->CreateProvider();
|
||||
if (!Env::Default().GetEnvironmentVar("CUDA_PATH").empty()) {
|
||||
ORT_THROW("CUDA_PATH is set but CUDA wasn't able to be loaded. Please install the correct version of CUDA and cuDNN as mentioned in the GPU requirements page (https://onnxruntime.ai/docs/reference/execution-providers/CUDA-ExecutionProvider.html#requirements) as well as TensorRT as mentioned in the TensorRT requirements page (https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html#requirements), make sure they're in the PATH, and that your GPU is supported.");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} else if (type == kMIGraphXExecutionProvider) {
|
||||
|
|
|
|||
|
|
@ -122,6 +122,9 @@ def create_backend_test(testname=None):
|
|||
backend_test.exclude('(' + '|'.join(filters) + ')')
|
||||
print('excluded tests:', filters)
|
||||
|
||||
# exclude TRT EP temporarily and only test CUDA EP to retain previous behavior
|
||||
os.environ["ORT_ONNX_BACKEND_EXCLUDE_PROVIDERS"] = "TensorrtExecutionProvider"
|
||||
|
||||
# import all test cases at global scope to make
|
||||
# them visible to python.unittest.
|
||||
globals().update(backend_test.enable_report().test_cases)
|
||||
|
|
|
|||
37
setup.py
37
setup.py
|
|
@ -52,9 +52,7 @@ cuda_version = None
|
|||
rocm_version = None
|
||||
is_rocm = False
|
||||
# The following arguments are mutually exclusive
|
||||
if parse_arg_remove_boolean(sys.argv, '--use_tensorrt'):
|
||||
package_name = 'onnxruntime-gpu-tensorrt' if not nightly_build else 'ort-trt-nightly'
|
||||
elif wheel_name_suffix == 'gpu':
|
||||
if wheel_name_suffix == 'gpu':
|
||||
# TODO: how to support multiple CUDA versions?
|
||||
cuda_version = parse_arg_remove_string(sys.argv, '--cuda_version=')
|
||||
elif parse_arg_remove_boolean(sys.argv, '--use_rocm'):
|
||||
|
|
@ -135,6 +133,17 @@ try:
|
|||
f.write(' import os\n')
|
||||
f.write(' os.environ["ORT_CUDA_UNAVAILABLE"] = "1"\n')
|
||||
|
||||
def _rewrite_ld_preload_tensorrt(self, to_preload):
|
||||
with open('onnxruntime/capi/_ld_preload.py', 'a') as f:
|
||||
if len(to_preload) > 0:
|
||||
f.write('from ctypes import CDLL, RTLD_GLOBAL\n')
|
||||
f.write('try:\n')
|
||||
for library in to_preload:
|
||||
f.write(' _{} = CDLL("{}", mode=RTLD_GLOBAL)\n'.format(library.split('.')[0], library))
|
||||
f.write('except OSError:\n')
|
||||
f.write(' import os\n')
|
||||
f.write(' os.environ["ORT_TENSORRT_UNAVAILABLE"] = "1"\n')
|
||||
|
||||
def run(self):
|
||||
if is_manylinux:
|
||||
source = 'onnxruntime/capi/onnxruntime_pybind11_state.so'
|
||||
|
|
@ -147,6 +156,8 @@ try:
|
|||
'libhsa-runtime64.so', 'libhsakmt.so']
|
||||
to_preload = []
|
||||
to_preload_cuda = []
|
||||
to_preload_tensorrt = []
|
||||
cuda_dependencies = []
|
||||
args = ['patchelf', '--debug']
|
||||
for line in result.stdout.split('\n'):
|
||||
for dependency in dependencies:
|
||||
|
|
@ -162,7 +173,7 @@ try:
|
|||
result = subprocess.run(['patchelf', '--print-needed', dest],
|
||||
check=True, stdout=subprocess.PIPE, universal_newlines=True)
|
||||
cuda_dependencies = ['libcublas.so', 'libcublasLt.so', 'libcudnn.so', 'libcudart.so',
|
||||
'libcurand.so', 'libcufft.so', 'libnvToolsExt.so']
|
||||
'libcurand.so', 'libcufft.so', 'libnvToolsExt.so', 'libcupti.so']
|
||||
rocm_dependencies = ['librccl.so', 'libamdhip64.so', 'librocblas.so', 'libMIOpen.so',
|
||||
'libhsa-runtime64.so', 'libhsakmt.so']
|
||||
args = ['patchelf', '--debug']
|
||||
|
|
@ -176,8 +187,25 @@ try:
|
|||
if len(args) > 3:
|
||||
subprocess.run(args, check=True, stdout=subprocess.PIPE)
|
||||
|
||||
dest = 'onnxruntime/capi/libonnxruntime_providers_tensorrt.so'
|
||||
if path.isfile(dest):
|
||||
result = subprocess.run(['patchelf', '--print-needed', dest],
|
||||
check=True, stdout=subprocess.PIPE, universal_newlines=True)
|
||||
tensorrt_dependencies = ['libnvinfer.so', 'libnvinfer_plugin.so', 'libnvonnxparser.so']
|
||||
args = ['patchelf', '--debug']
|
||||
for line in result.stdout.split('\n'):
|
||||
for dependency in (cuda_dependencies + tensorrt_dependencies):
|
||||
if dependency in line:
|
||||
if dependency not in (to_preload + to_preload_cuda):
|
||||
to_preload_tensorrt.append(line)
|
||||
args.extend(['--remove-needed', line])
|
||||
args.append(dest)
|
||||
if len(args) > 3:
|
||||
subprocess.run(args, check=True, stdout=subprocess.PIPE)
|
||||
|
||||
self._rewrite_ld_preload(to_preload)
|
||||
self._rewrite_ld_preload_cuda(to_preload_cuda)
|
||||
self._rewrite_ld_preload_tensorrt(to_preload_tensorrt)
|
||||
_bdist_wheel.run(self)
|
||||
if is_manylinux and not disable_auditwheel_repair:
|
||||
file = glob(path.join(self.dist_dir, '*linux*.whl'))[0]
|
||||
|
|
@ -202,6 +230,7 @@ if platform.system() == 'Linux':
|
|||
'mimalloc.so']
|
||||
dl_libs = ['libonnxruntime_providers_shared.so']
|
||||
dl_libs.append(providers_cuda_or_rocm)
|
||||
dl_libs.append('libonnxruntime_providers_tensorrt.so')
|
||||
# DNNL, TensorRT & OpenVINO EPs are built as shared libs
|
||||
libs.extend(['libonnxruntime_providers_shared.so'])
|
||||
libs.extend(['libonnxruntime_providers_dnnl.so'])
|
||||
|
|
|
|||
|
|
@ -1715,9 +1715,7 @@ def build_python_wheel(
|
|||
args.append("--disable_auditwheel_repair")
|
||||
|
||||
# The following arguments are mutually exclusive
|
||||
if use_tensorrt:
|
||||
args.append('--use_tensorrt')
|
||||
elif use_cuda:
|
||||
if use_cuda:
|
||||
# The following line assumes no other EP is enabled
|
||||
args.append('--wheel_name_suffix=gpu')
|
||||
if cuda_version:
|
||||
|
|
|
|||
|
|
@ -210,10 +210,10 @@ stages:
|
|||
|
||||
- template: get-docker-image-steps.yml
|
||||
parameters:
|
||||
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_cuda11
|
||||
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_cuda11_4_tensorrt8_0
|
||||
Context: tools/ci_build/github/linux/docker
|
||||
DockerBuildArgs: "--network=host --build-arg POLICY=manylinux2014 --build-arg PLATFORM=x86_64 --build-arg BASEIMAGE=nvidia/cuda:11.4.0-cudnn8-devel-centos7 --build-arg DEVTOOLSET_ROOTPATH=/opt/rh/devtoolset-10/root --build-arg PREPEND_PATH=/opt/rh/devtoolset-10/root/usr/bin: --build-arg LD_LIBRARY_PATH_ARG=/opt/rh/devtoolset-10/root/usr/lib64:/opt/rh/devtoolset-10/root/usr/lib:/opt/rh/devtoolset-10/root/usr/lib64/dyninst:/opt/rh/devtoolset-10/root/usr/lib/dyninst:/usr/local/lib64 --build-arg BUILD_UID=$( id -u )"
|
||||
Repository: onnxruntimecuda11build
|
||||
DockerBuildArgs: "--network=host --build-arg POLICY=manylinux2014 --build-arg PLATFORM=x86_64 --build-arg DEVTOOLSET_ROOTPATH=/opt/rh/devtoolset-10/root --build-arg PREPEND_PATH=/opt/rh/devtoolset-10/root/usr/bin: --build-arg LD_LIBRARY_PATH_ARG=/opt/rh/devtoolset-10/root/usr/lib64:/opt/rh/devtoolset-10/root/usr/lib:/opt/rh/devtoolset-10/root/usr/lib64/dyninst:/opt/rh/devtoolset-10/root/usr/lib/dyninst:/usr/local/lib64 --build-arg BUILD_UID=$( id -u )"
|
||||
Repository: onnxruntimecuda114xtrt80build
|
||||
|
||||
- task: CmdLine@2
|
||||
displayName: 'Build Python Wheel'
|
||||
|
|
@ -228,14 +228,14 @@ stages:
|
|||
--volume $HOME/.onnx:/home/onnxruntimedev/.onnx \
|
||||
-e NIGHTLY_BUILD \
|
||||
-e BUILD_BUILDNUMBER \
|
||||
onnxruntimecuda11build \
|
||||
onnxruntimecuda114xtrt80build \
|
||||
$(PythonManylinuxDir)/bin/python3 /onnxruntime_src/tools/ci_build/build.py \
|
||||
--build_dir /build --cmake_generator Ninja \
|
||||
--config Release --update --build \
|
||||
--skip_submodule_sync \
|
||||
--parallel \
|
||||
--build_wheel \
|
||||
--enable_onnx_tests --use_cuda --cuda_version=11.4 --cuda_home=/usr/local/cuda-11.4 --cudnn_home=/usr/local/cuda-11.4 \
|
||||
--enable_onnx_tests --use_tensorrt --cuda_version=11.4 --tensorrt_home=/usr --cuda_home=/usr/local/cuda-11.4 --cudnn_home=/usr/local/cuda-11.4 \
|
||||
${{ parameters.build_py_parameters }} \
|
||||
--cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-10/root/usr/bin/cc 'CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80'
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
|
|
@ -266,7 +266,7 @@ stages:
|
|||
--skip_submodule_sync \
|
||||
--parallel \
|
||||
--build_wheel \
|
||||
--enable_onnx_tests --use_cuda --cuda_version=11.4 --cuda_home=/usr/local/cuda-11.4 --cudnn_home=/usr/local/cuda-11.4 \
|
||||
--enable_onnx_tests --use_tensorrt --cuda_version=11.4 --tensorrt_home=/usr --cuda_home=/usr/local/cuda-11.4 --cudnn_home=/usr/local/cuda-11.4 \
|
||||
${{ parameters.build_py_parameters }} --ctest_path '' \
|
||||
--cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-10/root/usr/bin/cc 'CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80'
|
||||
|
||||
|
|
@ -810,25 +810,25 @@ stages:
|
|||
buildArch: x64
|
||||
strategy:
|
||||
matrix:
|
||||
Python36_cuda:
|
||||
Python36_GPU:
|
||||
PythonVersion: '3.6'
|
||||
EpBuildFlags: --use_cuda --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
|
||||
EnvSetupScript: setup_env_cuda_11.bat
|
||||
EpBuildFlags: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.0.3.4.Windows10.x86_64.cuda-11.3.cudnn8.2" --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
|
||||
EnvSetupScript: setup_env_gpu.bat
|
||||
EP_NAME: gpu
|
||||
Python37_cuda:
|
||||
Python37_GPU:
|
||||
PythonVersion: '3.7'
|
||||
EpBuildFlags: --use_cuda --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
|
||||
EnvSetupScript: setup_env_cuda_11.bat
|
||||
EpBuildFlags: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.0.3.4.Windows10.x86_64.cuda-11.3.cudnn8.2" --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
|
||||
EnvSetupScript: setup_env_gpu.bat
|
||||
EP_NAME: gpu
|
||||
Python38_cuda:
|
||||
Python38_GPU:
|
||||
PythonVersion: '3.8'
|
||||
EpBuildFlags: --use_cuda --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
|
||||
EnvSetupScript: setup_env_cuda_11.bat
|
||||
EpBuildFlags: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.0.3.4.Windows10.x86_64.cuda-11.3.cudnn8.2" --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
|
||||
EnvSetupScript: setup_env_gpu.bat
|
||||
EP_NAME: gpu
|
||||
Python39_cuda:
|
||||
Python39_GPU:
|
||||
PythonVersion: '3.9'
|
||||
EpBuildFlags: --use_cuda --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
|
||||
EnvSetupScript: setup_env_cuda_11.bat
|
||||
EpBuildFlags: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.0.3.4.Windows10.x86_64.cuda-11.3.cudnn8.2" --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
|
||||
EnvSetupScript: setup_env_gpu.bat
|
||||
EP_NAME: gpu
|
||||
Python36_dml:
|
||||
PythonVersion: '3.6'
|
||||
|
|
|
|||
Loading…
Reference in a new issue