From 2e38bf5e23a9ec1db2f808532a239a3c5cbe65ae Mon Sep 17 00:00:00 2001 From: stevenlix <38092805+stevenlix@users.noreply.github.com> Date: Tue, 16 Mar 2021 17:16:28 -0700 Subject: [PATCH] add TensorRT configuration to OrtProviderOptions (#6979) * add TensorRT configurations in provider options * Update ort_test_session.cc * Update tensorrt_execution_provider.cc * Update onnxruntime_pybind_state.cc * Update main.cc --- .../core/session/onnxruntime_c_api.h | 12 ++- .../tensorrt/tensorrt_execution_provider.cc | 50 +++++++--- .../tensorrt/tensorrt_execution_provider.h | 6 ++ .../tensorrt/tensorrt_provider_factory.cc | 6 ++ .../python/onnxruntime_pybind_state.cc | 56 ++++++++++- onnxruntime/test/onnx/main.cc | 8 +- .../test/perftest/command_args_parser.cc | 8 ++ onnxruntime/test/perftest/ort_test_session.cc | 96 ++++++++++++++++++- onnxruntime/test/util/default_providers.cc | 2 +- 9 files changed, 221 insertions(+), 23 deletions(-) diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index e9c0fea093..df0b1c221a 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -289,9 +289,15 @@ typedef struct OrtROCMProviderOptions { /// Options for the TensorRT provider that are passed to SessionOptionsAppendExecutionProvider_TensorRT /// typedef struct OrtTensorRTProviderOptions { - int device_id; - int has_user_compute_stream; - void* user_compute_stream; + int device_id; // cuda device id. + int has_user_compute_stream; // indicator of user specified CUDA compute stream. + void* user_compute_stream; // user specified CUDA compute stream. + int has_trt_options; // override environment variables with following TensorRT settings at runtime. + size_t trt_max_workspace_size; // maximum workspace size for TensorRT. + int trt_fp16_enable; // enable TensorRT FP16 precision. Default 0 = false, nonzero = true + int trt_int8_enable; // enable TensorRT INT8 precision. Default 0 = false, nonzero = true + const char* trt_int8_calibration_table_name; // TensorRT INT8 calibration table name. + int trt_int8_use_native_calibration_table; // use native TensorRT generated calibration table. Default 0 = false, nonzero = true } OrtTensorRTProviderOptions; /// diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index 3546609994..b20170ad76 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -404,30 +404,50 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv min_subgraph_size_ = std::stoi(min_subgraph_size_env); } - const std::string max_workspace_size_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kMaxWorkspaceSize); - if (!max_workspace_size_env.empty()) { - max_workspace_size_ = std::stoull(max_workspace_size_env); + if (info.has_trt_options) { + max_workspace_size_ = info.max_workspace_size; + } else { + const std::string max_workspace_size_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kMaxWorkspaceSize); + if (!max_workspace_size_env.empty()) { + max_workspace_size_ = std::stoull(max_workspace_size_env); + } } - const std::string fp16_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kFP16Enable); - if (!fp16_enable_env.empty()) { - fp16_enable_ = (std::stoi(fp16_enable_env) == 0 ? false : true); + if (info.has_trt_options) { + fp16_enable_ = info.fp16_enable; + } else { + const std::string fp16_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kFP16Enable); + if (!fp16_enable_env.empty()) { + fp16_enable_ = (std::stoi(fp16_enable_env) == 0 ? false : true); + } } - const std::string int8_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kINT8Enable); - if (!int8_enable_env.empty()) { - int8_enable_ = (std::stoi(int8_enable_env) == 0 ? false : true); + if (info.has_trt_options) { + int8_enable_ = info.int8_enable; + } else { + const std::string int8_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kINT8Enable); + if (!int8_enable_env.empty()) { + int8_enable_ = (std::stoi(int8_enable_env) == 0 ? false : true); + } } if (int8_enable_) { - const std::string int8_calibration_cache_name_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kINT8CalibrationTableName); - if (!int8_calibration_cache_name_env.empty()) { - int8_calibration_cache_name_ = int8_calibration_cache_name_env; + if (info.has_trt_options) { + int8_calibration_cache_name_ = info.int8_calibration_table_name; + } else { + const std::string int8_calibration_cache_name_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kINT8CalibrationTableName); + if (!int8_calibration_cache_name_env.empty()) { + int8_calibration_cache_name_ = int8_calibration_cache_name_env; + } } - const std::string int8_use_native_tensorrt_calibration_table_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kINT8UseNativeTensorrtCalibrationTable); - if (!int8_use_native_tensorrt_calibration_table_env.empty()) { - int8_use_native_tensorrt_calibration_table_ = (std::stoi(int8_use_native_tensorrt_calibration_table_env) == 0 ? false : true); + if (info.has_trt_options) { + int8_use_native_tensorrt_calibration_table_ = info.int8_use_native_calibration_table; + } else { + const std::string int8_use_native_tensorrt_calibration_table_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kINT8UseNativeTensorrtCalibrationTable); + if (!int8_use_native_tensorrt_calibration_table_env.empty()) { + int8_use_native_tensorrt_calibration_table_ = (std::stoi(int8_use_native_tensorrt_calibration_table_env) == 0 ? false : true); + } } } diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h index 1490a0ffd8..5974139f90 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h @@ -71,6 +71,12 @@ struct TensorrtExecutionProviderInfo { int device_id{0}; bool has_user_compute_stream{false}; void* user_compute_stream{nullptr}; + bool has_trt_options{false}; + size_t max_workspace_size{1 << 30}; + bool fp16_enable{false}; + bool int8_enable{false}; + std::string int8_calibration_table_name{""}; + bool int8_use_native_calibration_table{false}; }; // Information to construct kernel function state. diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc index 66bc8e517f..64b57dfbe6 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc @@ -49,6 +49,12 @@ struct Tensorrt_Provider : Provider { info.device_id = options.device_id; info.has_user_compute_stream = options.has_user_compute_stream; info.user_compute_stream = options.user_compute_stream; + info.has_trt_options = options.has_trt_options; + info.max_workspace_size = options.trt_max_workspace_size; + info.fp16_enable = options.trt_fp16_enable; + info.int8_enable = options.trt_int8_enable; + info.int8_calibration_table_name = options.trt_int8_calibration_table_name == nullptr ? "" : options.trt_int8_calibration_table_name; + info.int8_use_native_calibration_table = options.trt_int8_use_native_calibration_table; return std::make_shared(info); } diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index 32bd9bc15f..a7fac4981b 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -490,7 +490,61 @@ static void RegisterExecutionProviders(InferenceSession* sess, const std::vector sess->GetSessionOptions().enable_cpu_mem_arena)); } else if (type == kTensorrtExecutionProvider) { #ifdef USE_TENSORRT - OrtTensorRTProviderOptions params{0, 0, nullptr}; + OrtTensorRTProviderOptions params{0, 0, nullptr, 0, 1 << 30, 0, 0, nullptr, 0}; + std::string trt_int8_calibration_table_name; + auto it = provider_options_map.find(type); + if (it != provider_options_map.end()) { + for (auto option : it->second) { + if (option.first == "has_trt_options") { + if (option.second == "True" || option.second == "true") { + params.has_trt_options = true; + } else if (option.second == "False" || option.second == "false") { + params.has_trt_options = false; + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'has_trt_options' should be a boolean i.e. 'True' or 'False'. Default value is False.\n"); + } + } else if (option.first == "trt_max_workspace_size") { + if (!option.second.empty()) { + params.trt_max_workspace_size = std::stoull(option.second); + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_workspace_size' should be a number in byte i.e. '1073741824'.\n"); + } + } else if (option.first == "trt_fp16_enable") { + if (option.second == "True" || option.second == "true") { + params.trt_fp16_enable = true; + } else if (option.second == "False" || option.second == "false") { + params.trt_fp16_enable = false; + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_fp16_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n"); + } + } else if (option.first == "trt_int8_enable") { + if (option.second == "True" || option.second == "true") { + params.trt_int8_enable = true; + } else if (option.second == "False" || option.second == "false") { + params.trt_int8_enable = false; + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n"); + } + } else if (option.first == "trt_int8_calibration_table_name") { + if (!option.second.empty()) { + trt_int8_calibration_table_name = option.second; + params.trt_int8_calibration_table_name = trt_int8_calibration_table_name.c_str(); + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_calibration_table_name' should be a file name i.e. 'cal_table'.\n"); + } + } else if (option.first == "trt_int8_use_native_calibration_table") { + if (option.second == "True" || option.second == "true") { + params.trt_int8_use_native_calibration_table = true; + } else if (option.second == "False" || option.second == "false") { + params.trt_int8_use_native_calibration_table = false; + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_use_native_calibration_table' should be a boolean i.e. 'True' or 'False'. Default value is False.\n"); + } + } else { + ORT_THROW("Invalid TensorRT EP option: ", option.first); + } + } + } RegisterExecutionProvider(sess, *onnxruntime::CreateExecutionProviderFactory_Tensorrt(¶ms)); #endif } else if (type == kMIGraphXExecutionProvider) { diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc index ac7b3fc734..dd799ac655 100644 --- a/onnxruntime/test/onnx/main.cc +++ b/onnxruntime/test/onnx/main.cc @@ -312,7 +312,13 @@ int real_main(int argc, char* argv[], Ort::Env& env) { OrtTensorRTProviderOptions tensorrt_options{ 0, 0, - nullptr}; + nullptr, + 0, + 1 << 30, + 0, + 0, + nullptr, + 0}; OrtCUDAProviderOptions cuda_options{ 0, diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 3094559b52..013b646061 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -62,6 +62,14 @@ namespace perftest { "\t [OpenVINO only] [num_of_threads]: Overrides the accelerator hardware type and precision with these values at runtime.\n" "\t [Usage]: -e -i '| |'\n\n" "\t [Example] [For OpenVINO EP] -e openvino -i 'device_type|CPU_FP32 enable_vpu_fast_compile|true num_of_threads|5'\n" + "\t [TensorRT only] [use_trt_options]: Overrides TensorRT environment variables (if any) with following settings at runtime.\n" + "\t [TensorRT only] [trt_max_workspace_size]: Set TensorRT maximum workspace size in byte.\n" + "\t [TensorRT only] [trt_fp16_enable]: Enable TensorRT FP16 precision.\n" + "\t [TensorRT only] [trt_int8_enable]: Enable TensorRT INT8 precision.\n" + "\t [TensorRT only] [trt_int8_calibration_table_name]: Specify INT8 calibration table name.\n" + "\t [TensorRT only] [trt_int8_use_native_calibration_table]: Use Native TensorRT calibration table.\n" + "\t [Usage]: -e -i '| |'\n\n" + "\t [Example] [For TensorRT EP] -e tensorrt -i 'use_trt_options|true trt_fp16_enable|true trt_int8_enable|true trt_int8_calibration_table_name|calibration.flatbuffers trt_int8_use_native_calibration_table|false'\n" "\t-h: help\n"); } #ifdef _WIN32 diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index fcee88cbdf..7e95baf8d1 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -62,8 +62,100 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device #endif } else if (provider_name == onnxruntime::kTensorrtExecutionProvider) { #ifdef USE_TENSORRT - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Tensorrt(session_options, 0)); - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0)); + bool has_trt_options = false; + size_t trt_max_workspace_size = 1 << 30; + bool trt_fp16_enable = false; + bool trt_int8_enable = false; + std::string trt_int8_calibration_table_name = ""; + bool trt_int8_use_native_calibration_table = false; + + #ifdef _MSC_VER + std::string ov_string = ToMBString(performance_test_config.run_config.ep_runtime_config_string); + #else + std::string ov_string = performance_test_config.run_config.ep_runtime_config_string; + #endif + std::istringstream ss(ov_string); + std::string token; + while (ss >> token) { + if(token == "") { + continue; + } + auto pos = token.find("|"); + if (pos == std::string::npos || pos == 0 || pos == token.length()) { + ORT_THROW("[ERROR] [TensorRT] Use a '|' to separate the key and value for the run-time option you are trying to use.\n"); + } + + auto key = token.substr(0,pos); + auto value = token.substr(pos+1); + if (key == "has_trt_options") { + if(value == "true" || value == "True"){ + has_trt_options = true; + } else if (value == "false" || value == "False") { + has_trt_options = false; + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'has_trt_options' should be a boolean i.e. true or false. Default value is false.\n"); + } + } else if (key == "trt_max_workspace_size") { + if(!value.empty()) { + trt_max_workspace_size = std::stoull(value); + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_workspace_size' should be a number.\n"); + } + } else if (key == "trt_fp16_enable") { + if(value == "true" || value == "True"){ + trt_fp16_enable = true; + } else if (value == "false" || value == "False") { + trt_fp16_enable = false; + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_fp16_enable' should be a boolean i.e. true or false. Default value is false.\n"); + } + } else if (key == "trt_int8_enable") { + if(value == "true" || value == "True"){ + trt_int8_enable = true; + } else if (value == "false" || value == "False") { + trt_int8_enable = false; + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_enable' should be a boolean i.e. true or false. Default value is false.\n"); + } + } else if (key == "trt_int8_calibration_table_name") { + if(!value.empty()) { + trt_int8_calibration_table_name = value; + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_calibration_table_name' should be a non-emtpy string.\n"); + } + } else if (key == "trt_int8_use_native_calibration_table") { + if(value == "true" || value == "True"){ + trt_int8_use_native_calibration_table = true; + } else if (value == "false" || value == "False") { + trt_int8_use_native_calibration_table = false; + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_use_native_calibration_table' should be a boolean i.e. true or false. Default value is false.\n"); + } + } else { + ORT_THROW("[ERROR] [TensorRT] wrong key type entered. Choose from the following runtime key options that are available for TensorRT. ['use_trt_options', 'trt_fp16_enable', 'trt_int8_enable', 'trt_int8_calibration_table_name', 'trt_int8_use_native_calibration_table'] \n"); + } + } + OrtTensorRTProviderOptions tensorrt_options; + tensorrt_options.device_id = 0; + tensorrt_options.has_user_compute_stream = 0; + tensorrt_options.user_compute_stream = nullptr; + tensorrt_options.has_trt_options = has_trt_options; + tensorrt_options.trt_max_workspace_size = trt_max_workspace_size; + tensorrt_options.trt_fp16_enable = trt_fp16_enable; + tensorrt_options.trt_int8_enable = trt_int8_enable; + tensorrt_options.trt_int8_calibration_table_name = trt_int8_calibration_table_name.c_str(); + tensorrt_options.trt_int8_use_native_calibration_table = trt_int8_use_native_calibration_table; + session_options.AppendExecutionProvider_TensorRT(tensorrt_options); + + OrtCUDAProviderOptions cuda_options{ + 0, + static_cast(performance_test_config.run_config.cudnn_conv_algo), + std::numeric_limits::max(), + 0, + !performance_test_config.run_config.do_cuda_copy_in_separate_stream, + 0, + nullptr}; + session_options.AppendExecutionProvider_CUDA(cuda_options); #else ORT_THROW("TensorRT is not supported in this build\n"); #endif diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc index 897d14ef79..7cb8e4c216 100644 --- a/onnxruntime/test/util/default_providers.cc +++ b/onnxruntime/test/util/default_providers.cc @@ -43,7 +43,7 @@ std::unique_ptr DefaultCpuExecutionProvider(bool enable_aren std::unique_ptr DefaultTensorrtExecutionProvider() { #ifdef USE_TENSORRT - OrtTensorRTProviderOptions params{0, 0, nullptr}; + OrtTensorRTProviderOptions params{0, 0, nullptr, 0, 1 << 30, 0, 0, nullptr, 0}; if (auto factory = CreateExecutionProviderFactory_Tensorrt(¶ms)) return factory->CreateProvider(); #endif