add TensorRT configuration to OrtProviderOptions (#6979)

* add TensorRT configurations in provider options

* Update ort_test_session.cc

* Update tensorrt_execution_provider.cc

* Update onnxruntime_pybind_state.cc

* Update main.cc
This commit is contained in:
stevenlix 2021-03-16 17:16:28 -07:00 committed by GitHub
parent 783acb144f
commit 2e38bf5e23
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 221 additions and 23 deletions

View file

@ -289,9 +289,15 @@ typedef struct OrtROCMProviderOptions {
/// Options for the TensorRT provider that are passed to SessionOptionsAppendExecutionProvider_TensorRT
/// </summary>
typedef struct OrtTensorRTProviderOptions {
int device_id;
int has_user_compute_stream;
void* user_compute_stream;
int device_id; // cuda device id.
int has_user_compute_stream; // indicator of user specified CUDA compute stream.
void* user_compute_stream; // user specified CUDA compute stream.
int has_trt_options; // override environment variables with following TensorRT settings at runtime.
size_t trt_max_workspace_size; // maximum workspace size for TensorRT.
int trt_fp16_enable; // enable TensorRT FP16 precision. Default 0 = false, nonzero = true
int trt_int8_enable; // enable TensorRT INT8 precision. Default 0 = false, nonzero = true
const char* trt_int8_calibration_table_name; // TensorRT INT8 calibration table name.
int trt_int8_use_native_calibration_table; // use native TensorRT generated calibration table. Default 0 = false, nonzero = true
} OrtTensorRTProviderOptions;
/// <summary>

View file

@ -404,30 +404,50 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
min_subgraph_size_ = std::stoi(min_subgraph_size_env);
}
const std::string max_workspace_size_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kMaxWorkspaceSize);
if (!max_workspace_size_env.empty()) {
max_workspace_size_ = std::stoull(max_workspace_size_env);
if (info.has_trt_options) {
max_workspace_size_ = info.max_workspace_size;
} else {
const std::string max_workspace_size_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kMaxWorkspaceSize);
if (!max_workspace_size_env.empty()) {
max_workspace_size_ = std::stoull(max_workspace_size_env);
}
}
const std::string fp16_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kFP16Enable);
if (!fp16_enable_env.empty()) {
fp16_enable_ = (std::stoi(fp16_enable_env) == 0 ? false : true);
if (info.has_trt_options) {
fp16_enable_ = info.fp16_enable;
} else {
const std::string fp16_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kFP16Enable);
if (!fp16_enable_env.empty()) {
fp16_enable_ = (std::stoi(fp16_enable_env) == 0 ? false : true);
}
}
const std::string int8_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kINT8Enable);
if (!int8_enable_env.empty()) {
int8_enable_ = (std::stoi(int8_enable_env) == 0 ? false : true);
if (info.has_trt_options) {
int8_enable_ = info.int8_enable;
} else {
const std::string int8_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kINT8Enable);
if (!int8_enable_env.empty()) {
int8_enable_ = (std::stoi(int8_enable_env) == 0 ? false : true);
}
}
if (int8_enable_) {
const std::string int8_calibration_cache_name_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kINT8CalibrationTableName);
if (!int8_calibration_cache_name_env.empty()) {
int8_calibration_cache_name_ = int8_calibration_cache_name_env;
if (info.has_trt_options) {
int8_calibration_cache_name_ = info.int8_calibration_table_name;
} else {
const std::string int8_calibration_cache_name_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kINT8CalibrationTableName);
if (!int8_calibration_cache_name_env.empty()) {
int8_calibration_cache_name_ = int8_calibration_cache_name_env;
}
}
const std::string int8_use_native_tensorrt_calibration_table_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kINT8UseNativeTensorrtCalibrationTable);
if (!int8_use_native_tensorrt_calibration_table_env.empty()) {
int8_use_native_tensorrt_calibration_table_ = (std::stoi(int8_use_native_tensorrt_calibration_table_env) == 0 ? false : true);
if (info.has_trt_options) {
int8_use_native_tensorrt_calibration_table_ = info.int8_use_native_calibration_table;
} else {
const std::string int8_use_native_tensorrt_calibration_table_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kINT8UseNativeTensorrtCalibrationTable);
if (!int8_use_native_tensorrt_calibration_table_env.empty()) {
int8_use_native_tensorrt_calibration_table_ = (std::stoi(int8_use_native_tensorrt_calibration_table_env) == 0 ? false : true);
}
}
}

View file

@ -71,6 +71,12 @@ struct TensorrtExecutionProviderInfo {
int device_id{0};
bool has_user_compute_stream{false};
void* user_compute_stream{nullptr};
bool has_trt_options{false};
size_t max_workspace_size{1 << 30};
bool fp16_enable{false};
bool int8_enable{false};
std::string int8_calibration_table_name{""};
bool int8_use_native_calibration_table{false};
};
// Information to construct kernel function state.

View file

@ -49,6 +49,12 @@ struct Tensorrt_Provider : Provider {
info.device_id = options.device_id;
info.has_user_compute_stream = options.has_user_compute_stream;
info.user_compute_stream = options.user_compute_stream;
info.has_trt_options = options.has_trt_options;
info.max_workspace_size = options.trt_max_workspace_size;
info.fp16_enable = options.trt_fp16_enable;
info.int8_enable = options.trt_int8_enable;
info.int8_calibration_table_name = options.trt_int8_calibration_table_name == nullptr ? "" : options.trt_int8_calibration_table_name;
info.int8_use_native_calibration_table = options.trt_int8_use_native_calibration_table;
return std::make_shared<TensorrtProviderFactory>(info);
}

View file

@ -490,7 +490,61 @@ static void RegisterExecutionProviders(InferenceSession* sess, const std::vector
sess->GetSessionOptions().enable_cpu_mem_arena));
} else if (type == kTensorrtExecutionProvider) {
#ifdef USE_TENSORRT
OrtTensorRTProviderOptions params{0, 0, nullptr};
OrtTensorRTProviderOptions params{0, 0, nullptr, 0, 1 << 30, 0, 0, nullptr, 0};
std::string trt_int8_calibration_table_name;
auto it = provider_options_map.find(type);
if (it != provider_options_map.end()) {
for (auto option : it->second) {
if (option.first == "has_trt_options") {
if (option.second == "True" || option.second == "true") {
params.has_trt_options = true;
} else if (option.second == "False" || option.second == "false") {
params.has_trt_options = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'has_trt_options' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
}
} else if (option.first == "trt_max_workspace_size") {
if (!option.second.empty()) {
params.trt_max_workspace_size = std::stoull(option.second);
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_workspace_size' should be a number in byte i.e. '1073741824'.\n");
}
} else if (option.first == "trt_fp16_enable") {
if (option.second == "True" || option.second == "true") {
params.trt_fp16_enable = true;
} else if (option.second == "False" || option.second == "false") {
params.trt_fp16_enable = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_fp16_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
}
} else if (option.first == "trt_int8_enable") {
if (option.second == "True" || option.second == "true") {
params.trt_int8_enable = true;
} else if (option.second == "False" || option.second == "false") {
params.trt_int8_enable = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
}
} else if (option.first == "trt_int8_calibration_table_name") {
if (!option.second.empty()) {
trt_int8_calibration_table_name = option.second;
params.trt_int8_calibration_table_name = trt_int8_calibration_table_name.c_str();
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_calibration_table_name' should be a file name i.e. 'cal_table'.\n");
}
} else if (option.first == "trt_int8_use_native_calibration_table") {
if (option.second == "True" || option.second == "true") {
params.trt_int8_use_native_calibration_table = true;
} else if (option.second == "False" || option.second == "false") {
params.trt_int8_use_native_calibration_table = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_use_native_calibration_table' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
}
} else {
ORT_THROW("Invalid TensorRT EP option: ", option.first);
}
}
}
RegisterExecutionProvider(sess, *onnxruntime::CreateExecutionProviderFactory_Tensorrt(&params));
#endif
} else if (type == kMIGraphXExecutionProvider) {

View file

@ -312,7 +312,13 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
OrtTensorRTProviderOptions tensorrt_options{
0,
0,
nullptr};
nullptr,
0,
1 << 30,
0,
0,
nullptr,
0};
OrtCUDAProviderOptions cuda_options{
0,

View file

@ -62,6 +62,14 @@ namespace perftest {
"\t [OpenVINO only] [num_of_threads]: Overrides the accelerator hardware type and precision with these values at runtime.\n"
"\t [Usage]: -e <provider_name> -i '<key1>|<value1> <key2>|<value2>'\n\n"
"\t [Example] [For OpenVINO EP] -e openvino -i 'device_type|CPU_FP32 enable_vpu_fast_compile|true num_of_threads|5'\n"
"\t [TensorRT only] [use_trt_options]: Overrides TensorRT environment variables (if any) with following settings at runtime.\n"
"\t [TensorRT only] [trt_max_workspace_size]: Set TensorRT maximum workspace size in byte.\n"
"\t [TensorRT only] [trt_fp16_enable]: Enable TensorRT FP16 precision.\n"
"\t [TensorRT only] [trt_int8_enable]: Enable TensorRT INT8 precision.\n"
"\t [TensorRT only] [trt_int8_calibration_table_name]: Specify INT8 calibration table name.\n"
"\t [TensorRT only] [trt_int8_use_native_calibration_table]: Use Native TensorRT calibration table.\n"
"\t [Usage]: -e <provider_name> -i '<key1>|<value1> <key2>|<value2>'\n\n"
"\t [Example] [For TensorRT EP] -e tensorrt -i 'use_trt_options|true trt_fp16_enable|true trt_int8_enable|true trt_int8_calibration_table_name|calibration.flatbuffers trt_int8_use_native_calibration_table|false'\n"
"\t-h: help\n");
}
#ifdef _WIN32

View file

@ -62,8 +62,100 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
#endif
} else if (provider_name == onnxruntime::kTensorrtExecutionProvider) {
#ifdef USE_TENSORRT
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Tensorrt(session_options, 0));
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0));
bool has_trt_options = false;
size_t trt_max_workspace_size = 1 << 30;
bool trt_fp16_enable = false;
bool trt_int8_enable = false;
std::string trt_int8_calibration_table_name = "";
bool trt_int8_use_native_calibration_table = false;
#ifdef _MSC_VER
std::string ov_string = ToMBString(performance_test_config.run_config.ep_runtime_config_string);
#else
std::string ov_string = performance_test_config.run_config.ep_runtime_config_string;
#endif
std::istringstream ss(ov_string);
std::string token;
while (ss >> token) {
if(token == "") {
continue;
}
auto pos = token.find("|");
if (pos == std::string::npos || pos == 0 || pos == token.length()) {
ORT_THROW("[ERROR] [TensorRT] Use a '|' to separate the key and value for the run-time option you are trying to use.\n");
}
auto key = token.substr(0,pos);
auto value = token.substr(pos+1);
if (key == "has_trt_options") {
if(value == "true" || value == "True"){
has_trt_options = true;
} else if (value == "false" || value == "False") {
has_trt_options = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'has_trt_options' should be a boolean i.e. true or false. Default value is false.\n");
}
} else if (key == "trt_max_workspace_size") {
if(!value.empty()) {
trt_max_workspace_size = std::stoull(value);
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_workspace_size' should be a number.\n");
}
} else if (key == "trt_fp16_enable") {
if(value == "true" || value == "True"){
trt_fp16_enable = true;
} else if (value == "false" || value == "False") {
trt_fp16_enable = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_fp16_enable' should be a boolean i.e. true or false. Default value is false.\n");
}
} else if (key == "trt_int8_enable") {
if(value == "true" || value == "True"){
trt_int8_enable = true;
} else if (value == "false" || value == "False") {
trt_int8_enable = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_enable' should be a boolean i.e. true or false. Default value is false.\n");
}
} else if (key == "trt_int8_calibration_table_name") {
if(!value.empty()) {
trt_int8_calibration_table_name = value;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_calibration_table_name' should be a non-emtpy string.\n");
}
} else if (key == "trt_int8_use_native_calibration_table") {
if(value == "true" || value == "True"){
trt_int8_use_native_calibration_table = true;
} else if (value == "false" || value == "False") {
trt_int8_use_native_calibration_table = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_use_native_calibration_table' should be a boolean i.e. true or false. Default value is false.\n");
}
} else {
ORT_THROW("[ERROR] [TensorRT] wrong key type entered. Choose from the following runtime key options that are available for TensorRT. ['use_trt_options', 'trt_fp16_enable', 'trt_int8_enable', 'trt_int8_calibration_table_name', 'trt_int8_use_native_calibration_table'] \n");
}
}
OrtTensorRTProviderOptions tensorrt_options;
tensorrt_options.device_id = 0;
tensorrt_options.has_user_compute_stream = 0;
tensorrt_options.user_compute_stream = nullptr;
tensorrt_options.has_trt_options = has_trt_options;
tensorrt_options.trt_max_workspace_size = trt_max_workspace_size;
tensorrt_options.trt_fp16_enable = trt_fp16_enable;
tensorrt_options.trt_int8_enable = trt_int8_enable;
tensorrt_options.trt_int8_calibration_table_name = trt_int8_calibration_table_name.c_str();
tensorrt_options.trt_int8_use_native_calibration_table = trt_int8_use_native_calibration_table;
session_options.AppendExecutionProvider_TensorRT(tensorrt_options);
OrtCUDAProviderOptions cuda_options{
0,
static_cast<OrtCudnnConvAlgoSearch>(performance_test_config.run_config.cudnn_conv_algo),
std::numeric_limits<size_t>::max(),
0,
!performance_test_config.run_config.do_cuda_copy_in_separate_stream,
0,
nullptr};
session_options.AppendExecutionProvider_CUDA(cuda_options);
#else
ORT_THROW("TensorRT is not supported in this build\n");
#endif

View file

@ -43,7 +43,7 @@ std::unique_ptr<IExecutionProvider> DefaultCpuExecutionProvider(bool enable_aren
std::unique_ptr<IExecutionProvider> DefaultTensorrtExecutionProvider() {
#ifdef USE_TENSORRT
OrtTensorRTProviderOptions params{0, 0, nullptr};
OrtTensorRTProviderOptions params{0, 0, nullptr, 0, 1 << 30, 0, 0, nullptr, 0};
if (auto factory = CreateExecutionProviderFactory_Tensorrt(&params))
return factory->CreateProvider();
#endif