mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-14 20:48:00 +00:00
add TensorRT configuration to OrtProviderOptions (#6979)
* add TensorRT configurations in provider options * Update ort_test_session.cc * Update tensorrt_execution_provider.cc * Update onnxruntime_pybind_state.cc * Update main.cc
This commit is contained in:
parent
783acb144f
commit
2e38bf5e23
9 changed files with 221 additions and 23 deletions
|
|
@ -289,9 +289,15 @@ typedef struct OrtROCMProviderOptions {
|
|||
/// Options for the TensorRT provider that are passed to SessionOptionsAppendExecutionProvider_TensorRT
|
||||
/// </summary>
|
||||
typedef struct OrtTensorRTProviderOptions {
|
||||
int device_id;
|
||||
int has_user_compute_stream;
|
||||
void* user_compute_stream;
|
||||
int device_id; // cuda device id.
|
||||
int has_user_compute_stream; // indicator of user specified CUDA compute stream.
|
||||
void* user_compute_stream; // user specified CUDA compute stream.
|
||||
int has_trt_options; // override environment variables with following TensorRT settings at runtime.
|
||||
size_t trt_max_workspace_size; // maximum workspace size for TensorRT.
|
||||
int trt_fp16_enable; // enable TensorRT FP16 precision. Default 0 = false, nonzero = true
|
||||
int trt_int8_enable; // enable TensorRT INT8 precision. Default 0 = false, nonzero = true
|
||||
const char* trt_int8_calibration_table_name; // TensorRT INT8 calibration table name.
|
||||
int trt_int8_use_native_calibration_table; // use native TensorRT generated calibration table. Default 0 = false, nonzero = true
|
||||
} OrtTensorRTProviderOptions;
|
||||
|
||||
/// <summary>
|
||||
|
|
|
|||
|
|
@ -404,30 +404,50 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
|
|||
min_subgraph_size_ = std::stoi(min_subgraph_size_env);
|
||||
}
|
||||
|
||||
const std::string max_workspace_size_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kMaxWorkspaceSize);
|
||||
if (!max_workspace_size_env.empty()) {
|
||||
max_workspace_size_ = std::stoull(max_workspace_size_env);
|
||||
if (info.has_trt_options) {
|
||||
max_workspace_size_ = info.max_workspace_size;
|
||||
} else {
|
||||
const std::string max_workspace_size_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kMaxWorkspaceSize);
|
||||
if (!max_workspace_size_env.empty()) {
|
||||
max_workspace_size_ = std::stoull(max_workspace_size_env);
|
||||
}
|
||||
}
|
||||
|
||||
const std::string fp16_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kFP16Enable);
|
||||
if (!fp16_enable_env.empty()) {
|
||||
fp16_enable_ = (std::stoi(fp16_enable_env) == 0 ? false : true);
|
||||
if (info.has_trt_options) {
|
||||
fp16_enable_ = info.fp16_enable;
|
||||
} else {
|
||||
const std::string fp16_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kFP16Enable);
|
||||
if (!fp16_enable_env.empty()) {
|
||||
fp16_enable_ = (std::stoi(fp16_enable_env) == 0 ? false : true);
|
||||
}
|
||||
}
|
||||
|
||||
const std::string int8_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kINT8Enable);
|
||||
if (!int8_enable_env.empty()) {
|
||||
int8_enable_ = (std::stoi(int8_enable_env) == 0 ? false : true);
|
||||
if (info.has_trt_options) {
|
||||
int8_enable_ = info.int8_enable;
|
||||
} else {
|
||||
const std::string int8_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kINT8Enable);
|
||||
if (!int8_enable_env.empty()) {
|
||||
int8_enable_ = (std::stoi(int8_enable_env) == 0 ? false : true);
|
||||
}
|
||||
}
|
||||
|
||||
if (int8_enable_) {
|
||||
const std::string int8_calibration_cache_name_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kINT8CalibrationTableName);
|
||||
if (!int8_calibration_cache_name_env.empty()) {
|
||||
int8_calibration_cache_name_ = int8_calibration_cache_name_env;
|
||||
if (info.has_trt_options) {
|
||||
int8_calibration_cache_name_ = info.int8_calibration_table_name;
|
||||
} else {
|
||||
const std::string int8_calibration_cache_name_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kINT8CalibrationTableName);
|
||||
if (!int8_calibration_cache_name_env.empty()) {
|
||||
int8_calibration_cache_name_ = int8_calibration_cache_name_env;
|
||||
}
|
||||
}
|
||||
|
||||
const std::string int8_use_native_tensorrt_calibration_table_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kINT8UseNativeTensorrtCalibrationTable);
|
||||
if (!int8_use_native_tensorrt_calibration_table_env.empty()) {
|
||||
int8_use_native_tensorrt_calibration_table_ = (std::stoi(int8_use_native_tensorrt_calibration_table_env) == 0 ? false : true);
|
||||
if (info.has_trt_options) {
|
||||
int8_use_native_tensorrt_calibration_table_ = info.int8_use_native_calibration_table;
|
||||
} else {
|
||||
const std::string int8_use_native_tensorrt_calibration_table_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kINT8UseNativeTensorrtCalibrationTable);
|
||||
if (!int8_use_native_tensorrt_calibration_table_env.empty()) {
|
||||
int8_use_native_tensorrt_calibration_table_ = (std::stoi(int8_use_native_tensorrt_calibration_table_env) == 0 ? false : true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -71,6 +71,12 @@ struct TensorrtExecutionProviderInfo {
|
|||
int device_id{0};
|
||||
bool has_user_compute_stream{false};
|
||||
void* user_compute_stream{nullptr};
|
||||
bool has_trt_options{false};
|
||||
size_t max_workspace_size{1 << 30};
|
||||
bool fp16_enable{false};
|
||||
bool int8_enable{false};
|
||||
std::string int8_calibration_table_name{""};
|
||||
bool int8_use_native_calibration_table{false};
|
||||
};
|
||||
|
||||
// Information to construct kernel function state.
|
||||
|
|
|
|||
|
|
@ -49,6 +49,12 @@ struct Tensorrt_Provider : Provider {
|
|||
info.device_id = options.device_id;
|
||||
info.has_user_compute_stream = options.has_user_compute_stream;
|
||||
info.user_compute_stream = options.user_compute_stream;
|
||||
info.has_trt_options = options.has_trt_options;
|
||||
info.max_workspace_size = options.trt_max_workspace_size;
|
||||
info.fp16_enable = options.trt_fp16_enable;
|
||||
info.int8_enable = options.trt_int8_enable;
|
||||
info.int8_calibration_table_name = options.trt_int8_calibration_table_name == nullptr ? "" : options.trt_int8_calibration_table_name;
|
||||
info.int8_use_native_calibration_table = options.trt_int8_use_native_calibration_table;
|
||||
return std::make_shared<TensorrtProviderFactory>(info);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -490,7 +490,61 @@ static void RegisterExecutionProviders(InferenceSession* sess, const std::vector
|
|||
sess->GetSessionOptions().enable_cpu_mem_arena));
|
||||
} else if (type == kTensorrtExecutionProvider) {
|
||||
#ifdef USE_TENSORRT
|
||||
OrtTensorRTProviderOptions params{0, 0, nullptr};
|
||||
OrtTensorRTProviderOptions params{0, 0, nullptr, 0, 1 << 30, 0, 0, nullptr, 0};
|
||||
std::string trt_int8_calibration_table_name;
|
||||
auto it = provider_options_map.find(type);
|
||||
if (it != provider_options_map.end()) {
|
||||
for (auto option : it->second) {
|
||||
if (option.first == "has_trt_options") {
|
||||
if (option.second == "True" || option.second == "true") {
|
||||
params.has_trt_options = true;
|
||||
} else if (option.second == "False" || option.second == "false") {
|
||||
params.has_trt_options = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'has_trt_options' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
|
||||
}
|
||||
} else if (option.first == "trt_max_workspace_size") {
|
||||
if (!option.second.empty()) {
|
||||
params.trt_max_workspace_size = std::stoull(option.second);
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_workspace_size' should be a number in byte i.e. '1073741824'.\n");
|
||||
}
|
||||
} else if (option.first == "trt_fp16_enable") {
|
||||
if (option.second == "True" || option.second == "true") {
|
||||
params.trt_fp16_enable = true;
|
||||
} else if (option.second == "False" || option.second == "false") {
|
||||
params.trt_fp16_enable = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_fp16_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
|
||||
}
|
||||
} else if (option.first == "trt_int8_enable") {
|
||||
if (option.second == "True" || option.second == "true") {
|
||||
params.trt_int8_enable = true;
|
||||
} else if (option.second == "False" || option.second == "false") {
|
||||
params.trt_int8_enable = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
|
||||
}
|
||||
} else if (option.first == "trt_int8_calibration_table_name") {
|
||||
if (!option.second.empty()) {
|
||||
trt_int8_calibration_table_name = option.second;
|
||||
params.trt_int8_calibration_table_name = trt_int8_calibration_table_name.c_str();
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_calibration_table_name' should be a file name i.e. 'cal_table'.\n");
|
||||
}
|
||||
} else if (option.first == "trt_int8_use_native_calibration_table") {
|
||||
if (option.second == "True" || option.second == "true") {
|
||||
params.trt_int8_use_native_calibration_table = true;
|
||||
} else if (option.second == "False" || option.second == "false") {
|
||||
params.trt_int8_use_native_calibration_table = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_use_native_calibration_table' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
|
||||
}
|
||||
} else {
|
||||
ORT_THROW("Invalid TensorRT EP option: ", option.first);
|
||||
}
|
||||
}
|
||||
}
|
||||
RegisterExecutionProvider(sess, *onnxruntime::CreateExecutionProviderFactory_Tensorrt(¶ms));
|
||||
#endif
|
||||
} else if (type == kMIGraphXExecutionProvider) {
|
||||
|
|
|
|||
|
|
@ -312,7 +312,13 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
|
|||
OrtTensorRTProviderOptions tensorrt_options{
|
||||
0,
|
||||
0,
|
||||
nullptr};
|
||||
nullptr,
|
||||
0,
|
||||
1 << 30,
|
||||
0,
|
||||
0,
|
||||
nullptr,
|
||||
0};
|
||||
|
||||
OrtCUDAProviderOptions cuda_options{
|
||||
0,
|
||||
|
|
|
|||
|
|
@ -62,6 +62,14 @@ namespace perftest {
|
|||
"\t [OpenVINO only] [num_of_threads]: Overrides the accelerator hardware type and precision with these values at runtime.\n"
|
||||
"\t [Usage]: -e <provider_name> -i '<key1>|<value1> <key2>|<value2>'\n\n"
|
||||
"\t [Example] [For OpenVINO EP] -e openvino -i 'device_type|CPU_FP32 enable_vpu_fast_compile|true num_of_threads|5'\n"
|
||||
"\t [TensorRT only] [use_trt_options]: Overrides TensorRT environment variables (if any) with following settings at runtime.\n"
|
||||
"\t [TensorRT only] [trt_max_workspace_size]: Set TensorRT maximum workspace size in byte.\n"
|
||||
"\t [TensorRT only] [trt_fp16_enable]: Enable TensorRT FP16 precision.\n"
|
||||
"\t [TensorRT only] [trt_int8_enable]: Enable TensorRT INT8 precision.\n"
|
||||
"\t [TensorRT only] [trt_int8_calibration_table_name]: Specify INT8 calibration table name.\n"
|
||||
"\t [TensorRT only] [trt_int8_use_native_calibration_table]: Use Native TensorRT calibration table.\n"
|
||||
"\t [Usage]: -e <provider_name> -i '<key1>|<value1> <key2>|<value2>'\n\n"
|
||||
"\t [Example] [For TensorRT EP] -e tensorrt -i 'use_trt_options|true trt_fp16_enable|true trt_int8_enable|true trt_int8_calibration_table_name|calibration.flatbuffers trt_int8_use_native_calibration_table|false'\n"
|
||||
"\t-h: help\n");
|
||||
}
|
||||
#ifdef _WIN32
|
||||
|
|
|
|||
|
|
@ -62,8 +62,100 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
|
|||
#endif
|
||||
} else if (provider_name == onnxruntime::kTensorrtExecutionProvider) {
|
||||
#ifdef USE_TENSORRT
|
||||
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Tensorrt(session_options, 0));
|
||||
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0));
|
||||
bool has_trt_options = false;
|
||||
size_t trt_max_workspace_size = 1 << 30;
|
||||
bool trt_fp16_enable = false;
|
||||
bool trt_int8_enable = false;
|
||||
std::string trt_int8_calibration_table_name = "";
|
||||
bool trt_int8_use_native_calibration_table = false;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
std::string ov_string = ToMBString(performance_test_config.run_config.ep_runtime_config_string);
|
||||
#else
|
||||
std::string ov_string = performance_test_config.run_config.ep_runtime_config_string;
|
||||
#endif
|
||||
std::istringstream ss(ov_string);
|
||||
std::string token;
|
||||
while (ss >> token) {
|
||||
if(token == "") {
|
||||
continue;
|
||||
}
|
||||
auto pos = token.find("|");
|
||||
if (pos == std::string::npos || pos == 0 || pos == token.length()) {
|
||||
ORT_THROW("[ERROR] [TensorRT] Use a '|' to separate the key and value for the run-time option you are trying to use.\n");
|
||||
}
|
||||
|
||||
auto key = token.substr(0,pos);
|
||||
auto value = token.substr(pos+1);
|
||||
if (key == "has_trt_options") {
|
||||
if(value == "true" || value == "True"){
|
||||
has_trt_options = true;
|
||||
} else if (value == "false" || value == "False") {
|
||||
has_trt_options = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'has_trt_options' should be a boolean i.e. true or false. Default value is false.\n");
|
||||
}
|
||||
} else if (key == "trt_max_workspace_size") {
|
||||
if(!value.empty()) {
|
||||
trt_max_workspace_size = std::stoull(value);
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_workspace_size' should be a number.\n");
|
||||
}
|
||||
} else if (key == "trt_fp16_enable") {
|
||||
if(value == "true" || value == "True"){
|
||||
trt_fp16_enable = true;
|
||||
} else if (value == "false" || value == "False") {
|
||||
trt_fp16_enable = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_fp16_enable' should be a boolean i.e. true or false. Default value is false.\n");
|
||||
}
|
||||
} else if (key == "trt_int8_enable") {
|
||||
if(value == "true" || value == "True"){
|
||||
trt_int8_enable = true;
|
||||
} else if (value == "false" || value == "False") {
|
||||
trt_int8_enable = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_enable' should be a boolean i.e. true or false. Default value is false.\n");
|
||||
}
|
||||
} else if (key == "trt_int8_calibration_table_name") {
|
||||
if(!value.empty()) {
|
||||
trt_int8_calibration_table_name = value;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_calibration_table_name' should be a non-emtpy string.\n");
|
||||
}
|
||||
} else if (key == "trt_int8_use_native_calibration_table") {
|
||||
if(value == "true" || value == "True"){
|
||||
trt_int8_use_native_calibration_table = true;
|
||||
} else if (value == "false" || value == "False") {
|
||||
trt_int8_use_native_calibration_table = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_use_native_calibration_table' should be a boolean i.e. true or false. Default value is false.\n");
|
||||
}
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] wrong key type entered. Choose from the following runtime key options that are available for TensorRT. ['use_trt_options', 'trt_fp16_enable', 'trt_int8_enable', 'trt_int8_calibration_table_name', 'trt_int8_use_native_calibration_table'] \n");
|
||||
}
|
||||
}
|
||||
OrtTensorRTProviderOptions tensorrt_options;
|
||||
tensorrt_options.device_id = 0;
|
||||
tensorrt_options.has_user_compute_stream = 0;
|
||||
tensorrt_options.user_compute_stream = nullptr;
|
||||
tensorrt_options.has_trt_options = has_trt_options;
|
||||
tensorrt_options.trt_max_workspace_size = trt_max_workspace_size;
|
||||
tensorrt_options.trt_fp16_enable = trt_fp16_enable;
|
||||
tensorrt_options.trt_int8_enable = trt_int8_enable;
|
||||
tensorrt_options.trt_int8_calibration_table_name = trt_int8_calibration_table_name.c_str();
|
||||
tensorrt_options.trt_int8_use_native_calibration_table = trt_int8_use_native_calibration_table;
|
||||
session_options.AppendExecutionProvider_TensorRT(tensorrt_options);
|
||||
|
||||
OrtCUDAProviderOptions cuda_options{
|
||||
0,
|
||||
static_cast<OrtCudnnConvAlgoSearch>(performance_test_config.run_config.cudnn_conv_algo),
|
||||
std::numeric_limits<size_t>::max(),
|
||||
0,
|
||||
!performance_test_config.run_config.do_cuda_copy_in_separate_stream,
|
||||
0,
|
||||
nullptr};
|
||||
session_options.AppendExecutionProvider_CUDA(cuda_options);
|
||||
#else
|
||||
ORT_THROW("TensorRT is not supported in this build\n");
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ std::unique_ptr<IExecutionProvider> DefaultCpuExecutionProvider(bool enable_aren
|
|||
|
||||
std::unique_ptr<IExecutionProvider> DefaultTensorrtExecutionProvider() {
|
||||
#ifdef USE_TENSORRT
|
||||
OrtTensorRTProviderOptions params{0, 0, nullptr};
|
||||
OrtTensorRTProviderOptions params{0, 0, nullptr, 0, 1 << 30, 0, 0, nullptr, 0};
|
||||
if (auto factory = CreateExecutionProviderFactory_Tensorrt(¶ms))
|
||||
return factory->CreateProvider();
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Reference in a new issue