mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-07-02 03:55:34 +00:00
Enable global TRT timing cache (#17865)
I am adding a new `trt_timing_cache_path` option. Internally it is handled as `global_cache_path_` and will be set via a fall through approach: 1. no path provided => workdir 2. `trt_engine_cache_path` provided but no `trt_timing_cache_path` => `trt_engine_cache_path` 3. `trt_timing_cache_path` provided => `trt_timing_cache_path` (if not provided `trt_engine_cache_path` will still be workdir) ### Motivation and Context A TRT timing cache can be reused across multiple models as it only holds kernel timings and it is common that network "patterns" are reused. This can accelerate build times a lot. --------- Co-authored-by: Carson M <carson@pyke.io>
This commit is contained in:
parent
58f1d15d19
commit
2eeafc37bc
10 changed files with 138 additions and 478 deletions
|
|
@ -25,13 +25,14 @@ struct OrtTensorRTProviderOptionsV2 {
|
|||
int trt_dla_core{0}; // DLA core number. Default 0
|
||||
int trt_dump_subgraphs{0}; // dump TRT subgraph. Default 0 = false, nonzero = true
|
||||
int trt_engine_cache_enable{0}; // enable engine caching. Default 0 = false, nonzero = true
|
||||
const char* trt_engine_cache_path{nullptr}; // specify engine cache path
|
||||
const char* trt_engine_cache_path{nullptr}; // specify engine cache path, defaults to the working directory
|
||||
int trt_engine_decryption_enable{0}; // enable engine decryption. Default 0 = false, nonzero = true
|
||||
const char* trt_engine_decryption_lib_path{nullptr}; // specify engine decryption library path
|
||||
int trt_force_sequential_engine_build{0}; // force building TensorRT engine sequentially. Default 0 = false, nonzero = true
|
||||
int trt_context_memory_sharing_enable{0}; // enable context memory sharing between subgraphs. Default 0 = false, nonzero = true
|
||||
int trt_layer_norm_fp32_fallback{0}; // force Pow + Reduce ops in layer norm to FP32. Default 0 = false, nonzero = true
|
||||
int trt_timing_cache_enable{0}; // enable TensorRT timing cache. Default 0 = false, nonzero = true
|
||||
const char* trt_timing_cache_path{nullptr}; // specify timing cache path, if none is provided the trt_engine_cache_path is used
|
||||
int trt_force_timing_cache{0}; // force the TensorRT cache to be used even if device profile does not match. Default 0 = false, nonzero = true
|
||||
int trt_detailed_build_log{0}; // Enable detailed build step logging on TensorRT EP with timing for each engine build. Default 0 = false, nonzero = true
|
||||
int trt_build_heuristics_enable{0}; // Build engine using heuristics to reduce build time. Default 0 = false, nonzero = true
|
||||
|
|
|
|||
|
|
@ -824,6 +824,14 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
|
|||
if (engine_cache_enable_ || int8_enable_ || timing_cache_enable_) {
|
||||
cache_path_ = info.engine_cache_path;
|
||||
}
|
||||
// use a more global cache if given
|
||||
if (timing_cache_enable_) {
|
||||
if (!info.timing_cache_path.empty()) {
|
||||
global_cache_path_ = info.timing_cache_path;
|
||||
} else {
|
||||
global_cache_path_ = cache_path_;
|
||||
}
|
||||
}
|
||||
engine_decryption_enable_ = info.engine_decryption_enable;
|
||||
if (engine_decryption_enable_) {
|
||||
engine_decryption_lib_path_ = info.engine_decryption_lib_path;
|
||||
|
|
@ -928,6 +936,15 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
|
|||
LOGS_DEFAULT(WARNING) << "[TensorRT EP] ORT_TENSORRT_ENGINE_CACHE_PATH is deprecated! Please use ORT_TENSORRT_CACHE_PATH to specify engine cache path";
|
||||
}
|
||||
}
|
||||
if (timing_cache_enable_) {
|
||||
std::string timing_cache_path = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kTimingCachePath);
|
||||
// use a more global cache if given
|
||||
if (!timing_cache_path.empty()) {
|
||||
global_cache_path_ = timing_cache_path;
|
||||
} else {
|
||||
global_cache_path_ = cache_path_;
|
||||
}
|
||||
}
|
||||
|
||||
const std::string engine_decryption_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kDecryptionEnable);
|
||||
if (!engine_decryption_enable_env.empty()) {
|
||||
|
|
@ -1019,6 +1036,11 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
|
|||
throw std::runtime_error("Failed to create directory " + cache_path_);
|
||||
}
|
||||
}
|
||||
if (!global_cache_path_.empty() && !fs::is_directory(global_cache_path_)) {
|
||||
if (!fs::create_directory(global_cache_path_)) {
|
||||
throw std::runtime_error("Failed to create directory " + global_cache_path_);
|
||||
}
|
||||
}
|
||||
{
|
||||
auto lock = GetApiLock();
|
||||
runtime_ = std::unique_ptr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(GetTensorrtLogger()));
|
||||
|
|
@ -1104,6 +1126,7 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
|
|||
<< ", trt_dump_subgraphs: " << dump_subgraphs_
|
||||
<< ", trt_engine_cache_enable: " << engine_cache_enable_
|
||||
<< ", trt_cache_path: " << cache_path_
|
||||
<< ", trt_global_cache_path: " << global_cache_path_
|
||||
<< ", trt_engine_decryption_enable: " << engine_decryption_enable_
|
||||
<< ", trt_engine_decryption_lib_path: " << engine_decryption_lib_path_
|
||||
<< ", trt_force_sequential_engine_build: " << force_sequential_engine_build_
|
||||
|
|
@ -2199,7 +2222,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
|
|||
std::string timing_cache_path = "";
|
||||
bool engine_update = false;
|
||||
if (timing_cache_enable_) {
|
||||
timing_cache_path = GetTimingCachePath(cache_path_, prop);
|
||||
timing_cache_path = GetTimingCachePath(global_cache_path_, prop);
|
||||
}
|
||||
{
|
||||
// ifstream file check, engine serialization/deserialization and engine build are in critical section. It needs lock protection to prevent race condition when inferencing with multithreading.
|
||||
|
|
@ -2398,7 +2421,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
|
|||
dla_enable_, dla_core_, &max_workspace_size_, trt_node_name_with_precision, engine_cache_enable_, cache_path_,
|
||||
runtime_.get(), profiles_[context->node_name], context_memory_sharing_enable_, &max_ctx_mem_size_,
|
||||
dynamic_range_map, engine_decryption_enable_, engine_decryption_, engine_encryption_, timing_cache_enable_,
|
||||
force_timing_cache_match_, detailed_build_log_, build_heuristics_enable_, sparsity_enable_,
|
||||
global_cache_path_, force_timing_cache_match_, detailed_build_log_, build_heuristics_enable_, sparsity_enable_,
|
||||
builder_optimization_level_, auxiliary_streams_, !tactic_sources_.empty(), tactics};
|
||||
*state = p.release();
|
||||
return 0;
|
||||
|
|
@ -2460,7 +2483,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
|
|||
const std::string profile_cache_path = cache_path + "_sm" + compute_capability + ".profile";
|
||||
std::string timing_cache_path = "";
|
||||
if (timing_cache_enable_) {
|
||||
timing_cache_path = GetTimingCachePath(cache_path_, prop);
|
||||
timing_cache_path = GetTimingCachePath(global_cache_path_, prop);
|
||||
}
|
||||
|
||||
// Load serialized engine
|
||||
|
|
|
|||
|
|
@ -26,6 +26,8 @@ static const std::string kDLACore = "ORT_TENSORRT_DLA_CORE";
|
|||
static const std::string kDumpSubgraphs = "ORT_TENSORRT_DUMP_SUBGRAPHS";
|
||||
static const std::string kEngineCacheEnable = "ORT_TENSORRT_ENGINE_CACHE_ENABLE";
|
||||
static const std::string kCachePath = "ORT_TENSORRT_CACHE_PATH";
|
||||
// As a timing cache can be used across multiple ONNX files it makes sense to have a seperate cache path
|
||||
static const std::string kTimingCachePath = "ORT_TENSORRT_GLOBAL_CACHE_PATH";
|
||||
static const std::string kDecryptionEnable = "ORT_TENSORRT_ENGINE_DECRYPTION_ENABLE";
|
||||
static const std::string kDecryptionLibPath = "ORT_TENSORRT_ENGINE_DECRYPTION_LIB_PATH";
|
||||
static const std::string kForceSequentialEngineBuild = "ORT_TENSORRT_FORCE_SEQUENTIAL_ENGINE_BUILD";
|
||||
|
|
@ -131,6 +133,7 @@ struct TensorrtFuncState {
|
|||
int (*engine_decryption)(const char*, char*, size_t*) = nullptr;
|
||||
int (*engine_encryption)(const char*, char*, size_t) = nullptr;
|
||||
bool timing_cache_enable = true;
|
||||
std::string timing_cache_path;
|
||||
bool force_timing_cache = false;
|
||||
bool detailed_build_log = false;
|
||||
bool build_heuristics_enable = false;
|
||||
|
|
@ -218,7 +221,7 @@ class TensorrtExecutionProvider : public IExecutionProvider {
|
|||
int builder_optimization_level_ = 3;
|
||||
int auxiliary_streams_ = -1;
|
||||
std::string tactic_sources_;
|
||||
std::string cache_path_, engine_decryption_lib_path_;
|
||||
std::string global_cache_path_, cache_path_, engine_decryption_lib_path_;
|
||||
std::unique_ptr<nvinfer1::IRuntime> runtime_ = nullptr;
|
||||
OrtMutex tensorrt_mu_;
|
||||
int device_id_;
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ constexpr const char* kDLAEnable = "trt_dla_enable";
|
|||
constexpr const char* kDLACore = "trt_dla_core";
|
||||
constexpr const char* kDumpSubgraphs = "trt_dump_subgraphs";
|
||||
constexpr const char* kEngineCacheEnable = "trt_engine_cache_enable";
|
||||
constexpr const char* kCachePath = "trt_engine_cache_path";
|
||||
constexpr const char* kEngineCachePath = "trt_engine_cache_path";
|
||||
constexpr const char* kDecryptionEnable = "trt_engine_decryption_enable";
|
||||
constexpr const char* kDecryptionLibPath = "trt_engine_decryption_lib_path";
|
||||
constexpr const char* kForceSequentialEngineBuild = "trt_force_sequential_engine_build";
|
||||
|
|
@ -33,7 +33,8 @@ constexpr const char* kForceSequentialEngineBuild = "trt_force_sequential_engine
|
|||
constexpr const char* kContextMemorySharingEnable = "trt_context_memory_sharing_enable";
|
||||
constexpr const char* kLayerNormFP32Fallback = "trt_layer_norm_fp32_fallback";
|
||||
constexpr const char* kTimingCacheEnable = "trt_timing_cache_enable";
|
||||
constexpr const char* kForceTimingCacheMatch = "trt_force_timing_cache_match";
|
||||
constexpr const char* kTimingCachePath = "trt_timing_cache_path";
|
||||
constexpr const char* kForceTimingCacheMatch = "trt_force_timing_cache";
|
||||
constexpr const char* kDetailedBuildLog = "trt_detailed_build_log";
|
||||
constexpr const char* kBuildHeuristics = "trt_build_heuristics_enable";
|
||||
constexpr const char* kSparsityEnable = "trt_sparsity_enable";
|
||||
|
|
@ -76,13 +77,14 @@ TensorrtExecutionProviderInfo TensorrtExecutionProviderInfo::FromProviderOptions
|
|||
.AddAssignmentToReference(tensorrt::provider_option_names::kDLACore, info.dla_core)
|
||||
.AddAssignmentToReference(tensorrt::provider_option_names::kDumpSubgraphs, info.dump_subgraphs)
|
||||
.AddAssignmentToReference(tensorrt::provider_option_names::kEngineCacheEnable, info.engine_cache_enable)
|
||||
.AddAssignmentToReference(tensorrt::provider_option_names::kCachePath, info.engine_cache_path)
|
||||
.AddAssignmentToReference(tensorrt::provider_option_names::kEngineCachePath, info.engine_cache_path)
|
||||
.AddAssignmentToReference(tensorrt::provider_option_names::kDecryptionEnable, info.engine_decryption_enable)
|
||||
.AddAssignmentToReference(tensorrt::provider_option_names::kDecryptionLibPath, info.engine_decryption_lib_path)
|
||||
.AddAssignmentToReference(tensorrt::provider_option_names::kForceSequentialEngineBuild, info.force_sequential_engine_build)
|
||||
.AddAssignmentToReference(tensorrt::provider_option_names::kContextMemorySharingEnable, info.context_memory_sharing_enable)
|
||||
.AddAssignmentToReference(tensorrt::provider_option_names::kLayerNormFP32Fallback, info.layer_norm_fp32_fallback)
|
||||
.AddAssignmentToReference(tensorrt::provider_option_names::kTimingCacheEnable, info.timing_cache_enable)
|
||||
.AddAssignmentToReference(tensorrt::provider_option_names::kTimingCachePath, info.timing_cache_path)
|
||||
.AddAssignmentToReference(tensorrt::provider_option_names::kForceTimingCacheMatch, info.force_timing_cache)
|
||||
.AddAssignmentToReference(tensorrt::provider_option_names::kDetailedBuildLog, info.detailed_build_log)
|
||||
.AddAssignmentToReference(tensorrt::provider_option_names::kBuildHeuristics, info.build_heuristics_enable)
|
||||
|
|
@ -115,7 +117,7 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const TensorrtE
|
|||
{tensorrt::provider_option_names::kDLACore, MakeStringWithClassicLocale(info.dla_core)},
|
||||
{tensorrt::provider_option_names::kDumpSubgraphs, MakeStringWithClassicLocale(info.dump_subgraphs)},
|
||||
{tensorrt::provider_option_names::kEngineCacheEnable, MakeStringWithClassicLocale(info.engine_cache_enable)},
|
||||
{tensorrt::provider_option_names::kCachePath, MakeStringWithClassicLocale(info.engine_cache_path)},
|
||||
{tensorrt::provider_option_names::kEngineCachePath, MakeStringWithClassicLocale(info.engine_cache_path)},
|
||||
{tensorrt::provider_option_names::kDecryptionEnable, MakeStringWithClassicLocale(info.engine_decryption_enable)},
|
||||
{tensorrt::provider_option_names::kDecryptionLibPath, MakeStringWithClassicLocale(info.engine_decryption_lib_path)},
|
||||
{tensorrt::provider_option_names::kForceSequentialEngineBuild, MakeStringWithClassicLocale(info.force_sequential_engine_build)},
|
||||
|
|
@ -123,6 +125,7 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const TensorrtE
|
|||
{tensorrt::provider_option_names::kContextMemorySharingEnable, MakeStringWithClassicLocale(info.context_memory_sharing_enable)},
|
||||
{tensorrt::provider_option_names::kLayerNormFP32Fallback, MakeStringWithClassicLocale(info.layer_norm_fp32_fallback)},
|
||||
{tensorrt::provider_option_names::kTimingCacheEnable, MakeStringWithClassicLocale(info.timing_cache_enable)},
|
||||
{tensorrt::provider_option_names::kTimingCachePath, MakeStringWithClassicLocale(info.timing_cache_path)},
|
||||
{tensorrt::provider_option_names::kForceTimingCacheMatch, MakeStringWithClassicLocale(info.force_timing_cache)},
|
||||
{tensorrt::provider_option_names::kDetailedBuildLog, MakeStringWithClassicLocale(info.detailed_build_log)},
|
||||
{tensorrt::provider_option_names::kBuildHeuristics, MakeStringWithClassicLocale(info.build_heuristics_enable)},
|
||||
|
|
@ -142,7 +145,8 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const TensorrtE
|
|||
ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const OrtTensorRTProviderOptionsV2& info) {
|
||||
auto empty_if_null = [](const char* s) { return s != nullptr ? std::string{s} : std::string{}; };
|
||||
const std::string kInt8CalibTable_ = empty_if_null(info.trt_int8_calibration_table_name);
|
||||
const std::string kCachePath_ = empty_if_null(info.trt_engine_cache_path);
|
||||
const std::string kEngineCachePath_ = empty_if_null(info.trt_engine_cache_path);
|
||||
const std::string kTimingCachePath_ = empty_if_null(info.trt_timing_cache_path);
|
||||
const std::string kTacticSources_ = empty_if_null(info.trt_tactic_sources);
|
||||
const std::string kDecryptionLibPath_ = empty_if_null(info.trt_engine_decryption_lib_path);
|
||||
const std::string kExtraPluginLibPaths_ = empty_if_null(info.trt_extra_plugin_lib_paths);
|
||||
|
|
@ -164,13 +168,14 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const OrtTensor
|
|||
{tensorrt::provider_option_names::kDLACore, MakeStringWithClassicLocale(info.trt_dla_core)},
|
||||
{tensorrt::provider_option_names::kDumpSubgraphs, MakeStringWithClassicLocale(info.trt_dump_subgraphs)},
|
||||
{tensorrt::provider_option_names::kEngineCacheEnable, MakeStringWithClassicLocale(info.trt_engine_cache_enable)},
|
||||
{tensorrt::provider_option_names::kCachePath, kCachePath_},
|
||||
{tensorrt::provider_option_names::kEngineCachePath, kEngineCachePath_},
|
||||
{tensorrt::provider_option_names::kDecryptionEnable, MakeStringWithClassicLocale(info.trt_engine_decryption_enable)},
|
||||
{tensorrt::provider_option_names::kDecryptionLibPath, kDecryptionLibPath_},
|
||||
{tensorrt::provider_option_names::kForceSequentialEngineBuild, MakeStringWithClassicLocale(info.trt_force_sequential_engine_build)},
|
||||
{tensorrt::provider_option_names::kContextMemorySharingEnable, MakeStringWithClassicLocale(info.trt_context_memory_sharing_enable)},
|
||||
{tensorrt::provider_option_names::kLayerNormFP32Fallback, MakeStringWithClassicLocale(info.trt_layer_norm_fp32_fallback)},
|
||||
{tensorrt::provider_option_names::kTimingCacheEnable, MakeStringWithClassicLocale(info.trt_timing_cache_enable)},
|
||||
{tensorrt::provider_option_names::kTimingCachePath, kTimingCachePath_},
|
||||
{tensorrt::provider_option_names::kForceTimingCacheMatch, MakeStringWithClassicLocale(info.trt_force_timing_cache)},
|
||||
{tensorrt::provider_option_names::kDetailedBuildLog, MakeStringWithClassicLocale(info.trt_detailed_build_log)},
|
||||
{tensorrt::provider_option_names::kBuildHeuristics, MakeStringWithClassicLocale(info.trt_build_heuristics_enable)},
|
||||
|
|
@ -204,6 +209,27 @@ void TensorrtExecutionProviderInfo::UpdateProviderOptions(void* provider_options
|
|||
if (provider_options == nullptr) {
|
||||
return;
|
||||
}
|
||||
auto copy_string_if_needed = [&](std::string& s_in) {
|
||||
if (string_copy) {
|
||||
char* dest = nullptr;
|
||||
auto str_size = s_in.size();
|
||||
if (str_size == 0) {
|
||||
return (const char*)nullptr;
|
||||
} else {
|
||||
dest = new char[str_size + 1];
|
||||
#ifdef _MSC_VER
|
||||
strncpy_s(dest, str_size + 1, s_in.c_str(), str_size);
|
||||
#else
|
||||
strncpy(dest, s_in.c_str(), str_size);
|
||||
#endif
|
||||
dest[str_size] = '\0';
|
||||
return (const char*)dest;
|
||||
}
|
||||
} else {
|
||||
return s_in.c_str();
|
||||
}
|
||||
};
|
||||
|
||||
TensorrtExecutionProviderInfo internal_options = onnxruntime::TensorrtExecutionProviderInfo::FromProviderOptions(options);
|
||||
auto& trt_provider_options_v2 = *reinterpret_cast<OrtTensorRTProviderOptionsV2*>(provider_options);
|
||||
trt_provider_options_v2.device_id = internal_options.device_id;
|
||||
|
|
@ -220,24 +246,7 @@ void TensorrtExecutionProviderInfo::UpdateProviderOptions(void* provider_options
|
|||
trt_provider_options_v2.trt_fp16_enable = internal_options.fp16_enable;
|
||||
trt_provider_options_v2.trt_int8_enable = internal_options.int8_enable;
|
||||
|
||||
if (string_copy) {
|
||||
char* dest = nullptr;
|
||||
auto str_size = internal_options.int8_calibration_table_name.size();
|
||||
if (str_size == 0) {
|
||||
trt_provider_options_v2.trt_int8_calibration_table_name = nullptr;
|
||||
} else {
|
||||
dest = new char[str_size + 1];
|
||||
#ifdef _MSC_VER
|
||||
strncpy_s(dest, str_size + 1, internal_options.int8_calibration_table_name.c_str(), str_size);
|
||||
#else
|
||||
strncpy(dest, internal_options.int8_calibration_table_name.c_str(), str_size);
|
||||
#endif
|
||||
dest[str_size] = '\0';
|
||||
trt_provider_options_v2.trt_int8_calibration_table_name = (const char*)dest;
|
||||
}
|
||||
} else {
|
||||
trt_provider_options_v2.trt_int8_calibration_table_name = internal_options.int8_calibration_table_name.c_str();
|
||||
}
|
||||
trt_provider_options_v2.trt_int8_calibration_table_name = copy_string_if_needed(internal_options.int8_calibration_table_name);
|
||||
|
||||
trt_provider_options_v2.trt_int8_use_native_calibration_table = internal_options.int8_use_native_calibration_table;
|
||||
trt_provider_options_v2.trt_dla_enable = internal_options.dla_enable;
|
||||
|
|
@ -245,45 +254,12 @@ void TensorrtExecutionProviderInfo::UpdateProviderOptions(void* provider_options
|
|||
trt_provider_options_v2.trt_dump_subgraphs = internal_options.dump_subgraphs;
|
||||
trt_provider_options_v2.trt_engine_cache_enable = internal_options.engine_cache_enable;
|
||||
|
||||
if (string_copy) {
|
||||
char* dest = nullptr;
|
||||
auto str_size = internal_options.engine_cache_path.size();
|
||||
if (str_size == 0) {
|
||||
trt_provider_options_v2.trt_engine_cache_path = nullptr;
|
||||
} else {
|
||||
dest = new char[str_size + 1];
|
||||
#ifdef _MSC_VER
|
||||
strncpy_s(dest, str_size + 1, internal_options.engine_cache_path.c_str(), str_size);
|
||||
#else
|
||||
strncpy(dest, internal_options.engine_cache_path.c_str(), str_size);
|
||||
#endif
|
||||
dest[str_size] = '\0';
|
||||
trt_provider_options_v2.trt_engine_cache_path = (const char*)dest;
|
||||
}
|
||||
} else {
|
||||
trt_provider_options_v2.trt_engine_cache_path = internal_options.engine_cache_path.c_str();
|
||||
}
|
||||
trt_provider_options_v2.trt_engine_cache_path = copy_string_if_needed(internal_options.engine_cache_path);
|
||||
trt_provider_options_v2.trt_timing_cache_path = copy_string_if_needed(internal_options.timing_cache_path);
|
||||
|
||||
trt_provider_options_v2.trt_engine_decryption_enable = internal_options.engine_decryption_enable;
|
||||
|
||||
if (string_copy) {
|
||||
char* dest = nullptr;
|
||||
auto str_size = internal_options.engine_decryption_lib_path.size();
|
||||
if (str_size == 0) {
|
||||
trt_provider_options_v2.trt_engine_decryption_lib_path = nullptr;
|
||||
} else {
|
||||
dest = new char[str_size + 1];
|
||||
#ifdef _MSC_VER
|
||||
strncpy_s(dest, str_size + 1, internal_options.engine_decryption_lib_path.c_str(), str_size);
|
||||
#else
|
||||
strncpy(dest, internal_options.engine_decryption_lib_path.c_str(), str_size);
|
||||
#endif
|
||||
dest[str_size] = '\0';
|
||||
trt_provider_options_v2.trt_engine_decryption_lib_path = (const char*)dest;
|
||||
}
|
||||
} else {
|
||||
trt_provider_options_v2.trt_engine_decryption_lib_path = internal_options.engine_decryption_lib_path.c_str();
|
||||
}
|
||||
trt_provider_options_v2.trt_engine_decryption_lib_path = copy_string_if_needed(internal_options.engine_decryption_lib_path);
|
||||
|
||||
trt_provider_options_v2.trt_force_sequential_engine_build = internal_options.force_sequential_engine_build;
|
||||
trt_provider_options_v2.trt_context_memory_sharing_enable = internal_options.context_memory_sharing_enable;
|
||||
|
|
@ -296,100 +272,11 @@ void TensorrtExecutionProviderInfo::UpdateProviderOptions(void* provider_options
|
|||
trt_provider_options_v2.trt_builder_optimization_level = internal_options.builder_optimization_level;
|
||||
trt_provider_options_v2.trt_auxiliary_streams = internal_options.auxiliary_streams;
|
||||
|
||||
if (string_copy) {
|
||||
char* dest = nullptr;
|
||||
auto str_size = internal_options.tactic_sources.size();
|
||||
if (str_size == 0) {
|
||||
trt_provider_options_v2.trt_tactic_sources = nullptr;
|
||||
} else {
|
||||
dest = new char[str_size + 1];
|
||||
#ifdef _MSC_VER
|
||||
strncpy_s(dest, str_size + 1, internal_options.tactic_sources.c_str(), str_size);
|
||||
#else
|
||||
strncpy(dest, internal_options.tactic_sources.c_str(), str_size);
|
||||
#endif
|
||||
dest[str_size] = '\0';
|
||||
trt_provider_options_v2.trt_tactic_sources = (const char*)dest;
|
||||
}
|
||||
} else {
|
||||
trt_provider_options_v2.trt_tactic_sources = internal_options.tactic_sources.c_str();
|
||||
}
|
||||
|
||||
if (string_copy) {
|
||||
char* dest = nullptr;
|
||||
auto str_size = internal_options.extra_plugin_lib_paths.size();
|
||||
if (str_size == 0) {
|
||||
trt_provider_options_v2.trt_extra_plugin_lib_paths = nullptr;
|
||||
} else {
|
||||
dest = new char[str_size + 1];
|
||||
#ifdef _MSC_VER
|
||||
strncpy_s(dest, str_size + 1, internal_options.extra_plugin_lib_paths.c_str(), str_size);
|
||||
#else
|
||||
strncpy(dest, internal_options.extra_plugin_lib_paths.c_str(), str_size);
|
||||
#endif
|
||||
dest[str_size] = '\0';
|
||||
trt_provider_options_v2.trt_extra_plugin_lib_paths = (const char*)dest;
|
||||
}
|
||||
} else {
|
||||
trt_provider_options_v2.trt_extra_plugin_lib_paths = internal_options.extra_plugin_lib_paths.c_str();
|
||||
}
|
||||
|
||||
if (string_copy) {
|
||||
char* dest = nullptr;
|
||||
auto str_size = internal_options.profile_min_shapes.size();
|
||||
if (str_size == 0) {
|
||||
trt_provider_options_v2.trt_profile_min_shapes = nullptr;
|
||||
} else {
|
||||
dest = new char[str_size + 1];
|
||||
#ifdef _MSC_VER
|
||||
strncpy_s(dest, str_size + 1, internal_options.profile_min_shapes.c_str(), str_size);
|
||||
#else
|
||||
strncpy(dest, internal_options.profile_min_shapes.c_str(), str_size);
|
||||
#endif
|
||||
dest[str_size] = '\0';
|
||||
trt_provider_options_v2.trt_profile_min_shapes = (const char*)dest;
|
||||
}
|
||||
} else {
|
||||
trt_provider_options_v2.trt_profile_min_shapes = internal_options.profile_min_shapes.c_str();
|
||||
}
|
||||
|
||||
if (string_copy) {
|
||||
char* dest = nullptr;
|
||||
auto str_size = internal_options.profile_max_shapes.size();
|
||||
if (str_size == 0) {
|
||||
trt_provider_options_v2.trt_profile_max_shapes = nullptr;
|
||||
} else {
|
||||
dest = new char[str_size + 1];
|
||||
#ifdef _MSC_VER
|
||||
strncpy_s(dest, str_size + 1, internal_options.profile_max_shapes.c_str(), str_size);
|
||||
#else
|
||||
strncpy(dest, internal_options.profile_max_shapes.c_str(), str_size);
|
||||
#endif
|
||||
dest[str_size] = '\0';
|
||||
trt_provider_options_v2.trt_profile_max_shapes = (const char*)dest;
|
||||
}
|
||||
} else {
|
||||
trt_provider_options_v2.trt_profile_max_shapes = internal_options.profile_max_shapes.c_str();
|
||||
}
|
||||
|
||||
if (string_copy) {
|
||||
char* dest = nullptr;
|
||||
auto str_size = internal_options.profile_opt_shapes.size();
|
||||
if (str_size == 0) {
|
||||
trt_provider_options_v2.trt_profile_opt_shapes = nullptr;
|
||||
} else {
|
||||
dest = new char[str_size + 1];
|
||||
#ifdef _MSC_VER
|
||||
strncpy_s(dest, str_size + 1, internal_options.profile_opt_shapes.c_str(), str_size);
|
||||
#else
|
||||
strncpy(dest, internal_options.profile_opt_shapes.c_str(), str_size);
|
||||
#endif
|
||||
dest[str_size] = '\0';
|
||||
trt_provider_options_v2.trt_profile_opt_shapes = (const char*)dest;
|
||||
}
|
||||
} else {
|
||||
trt_provider_options_v2.trt_profile_opt_shapes = internal_options.profile_opt_shapes.c_str();
|
||||
}
|
||||
trt_provider_options_v2.trt_tactic_sources = copy_string_if_needed(internal_options.tactic_sources);
|
||||
trt_provider_options_v2.trt_extra_plugin_lib_paths = copy_string_if_needed(internal_options.extra_plugin_lib_paths);
|
||||
trt_provider_options_v2.trt_profile_min_shapes = copy_string_if_needed(internal_options.profile_min_shapes);
|
||||
trt_provider_options_v2.trt_profile_max_shapes = copy_string_if_needed(internal_options.profile_max_shapes);
|
||||
trt_provider_options_v2.trt_profile_opt_shapes = copy_string_if_needed(internal_options.profile_opt_shapes);
|
||||
|
||||
trt_provider_options_v2.trt_cuda_graph_enable = internal_options.cuda_graph_enable;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ struct TensorrtExecutionProviderInfo {
|
|||
bool context_memory_sharing_enable{false};
|
||||
bool layer_norm_fp32_fallback{false};
|
||||
bool timing_cache_enable{false};
|
||||
std::string timing_cache_path{""};
|
||||
bool force_timing_cache{false};
|
||||
bool detailed_build_log{false};
|
||||
bool build_heuristics_enable{false};
|
||||
|
|
|
|||
|
|
@ -103,6 +103,7 @@ struct Tensorrt_Provider : Provider {
|
|||
info.context_memory_sharing_enable = options.trt_context_memory_sharing_enable != 0;
|
||||
info.layer_norm_fp32_fallback = options.trt_layer_norm_fp32_fallback != 0;
|
||||
info.timing_cache_enable = options.trt_timing_cache_enable != 0;
|
||||
info.timing_cache_path = options.trt_timing_cache_path == nullptr ? "" : options.trt_timing_cache_path;
|
||||
info.force_timing_cache = options.trt_force_timing_cache != 0;
|
||||
info.detailed_build_log = options.trt_detailed_build_log != 0;
|
||||
info.build_heuristics_enable = options.trt_build_heuristics_enable != 0;
|
||||
|
|
|
|||
|
|
@ -1931,6 +1931,7 @@ ORT_API(void, OrtApis::ReleaseTensorRTProviderOptions, _Frees_ptr_opt_ OrtTensor
|
|||
if (ptr != nullptr) {
|
||||
delete[] ptr->trt_int8_calibration_table_name;
|
||||
delete[] ptr->trt_engine_cache_path;
|
||||
delete[] ptr->trt_timing_cache_path;
|
||||
delete[] ptr->trt_engine_decryption_lib_path;
|
||||
delete[] ptr->trt_tactic_sources;
|
||||
delete[] ptr->trt_extra_plugin_lib_paths;
|
||||
|
|
|
|||
|
|
@ -479,7 +479,7 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
|
|||
// So we need these std::string variables defined here as they will be kept alive for the lifetime of TRT EP and we can still access them from OrtTensorRTProviderOptionsV2 instance.
|
||||
// (The reason is string copy is involved, for example params.trt_engine_cache_path = cache_path.c_str() and those std::string variable is referenced by OrtTensorRTProviderOptionsV2 instance
|
||||
// and TRT EP instance, so it won't be released.)
|
||||
std::string calibration_table, cache_path, lib_path, trt_tactic_sources, trt_extra_plugin_lib_paths, min_profile, max_profile, opt_profile;
|
||||
std::string calibration_table, cache_path, timing_cache_path, lib_path, trt_tactic_sources, trt_extra_plugin_lib_paths, min_profile, max_profile, opt_profile;
|
||||
auto it = provider_options_map.find(type);
|
||||
if (it != provider_options_map.end()) {
|
||||
OrtTensorRTProviderOptionsV2 params;
|
||||
|
|
@ -623,6 +623,13 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
|
|||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_timing_cache_enable' should be 'True' or 'False'. Default value is 'False'.\n");
|
||||
}
|
||||
} else if (option.first == "trt_timing_cache_path") {
|
||||
if (!option.second.empty()) {
|
||||
timing_cache_path = option.second;
|
||||
params.trt_timing_cache_path = timing_cache_path.c_str();
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_timing_cache_path' should be a path string i.e. 'cache_folder/'.\n");
|
||||
}
|
||||
} else if (option.first == "trt_force_timing_cache") {
|
||||
if (option.second == "True" || option.second == "true") {
|
||||
params.trt_force_timing_cache = true;
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <set>
|
||||
#include <list>
|
||||
#include <type_traits>
|
||||
#include <core/session/onnxruntime_cxx_api.h>
|
||||
#include "core/session/onnxruntime_session_options_config_keys.h"
|
||||
|
|
@ -100,36 +101,28 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
|
|||
const auto& api = Ort::GetApi();
|
||||
OrtCUDAProviderOptionsV2* cuda_options;
|
||||
Ort::ThrowOnError(api.CreateCUDAProviderOptions(&cuda_options));
|
||||
|
||||
const char* cudnn_conv_algo_search = "cudnn_conv_algo_search";
|
||||
const char* default_conv = "DEFAULT";
|
||||
const char* benchmarking = "EXHAUSTIVE";
|
||||
const char* heuristic = "HEURISTIC";
|
||||
std::vector<const char*> option_keys, option_values;
|
||||
// used to keep all option keys and value strings alive
|
||||
std::list<std::string> buffer;
|
||||
buffer.emplace_back("cudnn_conv_algo_search");
|
||||
option_keys.push_back(buffer.back().c_str());
|
||||
switch (performance_test_config.run_config.cudnn_conv_algo) {
|
||||
case 0:
|
||||
Ort::ThrowOnError(
|
||||
api.UpdateCUDAProviderOptions(cuda_options, &cudnn_conv_algo_search, &benchmarking, 1));
|
||||
buffer.emplace_back("EXHAUSTIVE");
|
||||
break;
|
||||
case 1:
|
||||
Ort::ThrowOnError(
|
||||
api.UpdateCUDAProviderOptions(cuda_options, &cudnn_conv_algo_search, &heuristic, 1));
|
||||
buffer.emplace_back("HEURISTIC");
|
||||
break;
|
||||
default:
|
||||
Ort::ThrowOnError(
|
||||
api.UpdateCUDAProviderOptions(cuda_options, &cudnn_conv_algo_search, &default_conv, 1));
|
||||
buffer.emplace_back("DEFAULT");
|
||||
break;
|
||||
}
|
||||
option_values.push_back(buffer.back().c_str());
|
||||
|
||||
const char* do_copy_in_default_stream = "do_copy_in_default_stream";
|
||||
if (performance_test_config.run_config.do_cuda_copy_in_separate_stream) {
|
||||
const char* v = "1";
|
||||
Ort::ThrowOnError(
|
||||
api.UpdateCUDAProviderOptions(cuda_options, &do_copy_in_default_stream, &v, 1));
|
||||
} else {
|
||||
const char* v = "0";
|
||||
Ort::ThrowOnError(
|
||||
api.UpdateCUDAProviderOptions(cuda_options, &do_copy_in_default_stream, &v, 1));
|
||||
}
|
||||
buffer.emplace_back("do_copy_in_default_stream");
|
||||
option_keys.push_back(buffer.back().c_str());
|
||||
buffer.emplace_back(performance_test_config.run_config.do_cuda_copy_in_separate_stream ? "1" : "0");
|
||||
option_values.push_back(buffer.back().c_str());
|
||||
|
||||
#ifdef _MSC_VER
|
||||
std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string);
|
||||
|
|
@ -148,51 +141,34 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
|
|||
"[ERROR] [CUDA] Use a '|' to separate the key and value for the run-time option you are trying to use.\n");
|
||||
}
|
||||
|
||||
auto key = token.substr(0, pos);
|
||||
auto value = token.substr(pos + 1);
|
||||
auto key_p = key.c_str();
|
||||
auto value_p = value.c_str();
|
||||
Ort::ThrowOnError(
|
||||
api.UpdateCUDAProviderOptions(cuda_options, &key_p, &value_p, 1));
|
||||
buffer.emplace_back(token.substr(0, pos));
|
||||
option_keys.push_back(buffer.back().c_str());
|
||||
buffer.emplace_back(token.substr(pos + 1));
|
||||
option_values.push_back(buffer.back().c_str());
|
||||
}
|
||||
|
||||
Ort::Status status(api.UpdateCUDAProviderOptions(cuda_options,
|
||||
option_keys.data(), option_values.data(), option_keys.size()));
|
||||
if (!status.IsOK()) {
|
||||
OrtAllocator* allocator;
|
||||
char* options;
|
||||
Ort::ThrowOnError(api.GetAllocatorWithDefaultOptions(&allocator));
|
||||
Ort::ThrowOnError(api.GetCUDAProviderOptionsAsString(cuda_options, allocator, &options));
|
||||
ORT_THROW("[ERROR] [CUDA] Configuring the CUDA options failed with message: ", status.GetErrorMessage(),
|
||||
"\nSupported options are:\n", options);
|
||||
}
|
||||
session_options.AppendExecutionProvider_CUDA_V2(*cuda_options);
|
||||
#else
|
||||
ORT_THROW("CUDA is not supported in this build\n");
|
||||
#endif
|
||||
} else if (provider_name == onnxruntime::kTensorrtExecutionProvider) {
|
||||
#ifdef USE_TENSORRT
|
||||
int device_id = 0;
|
||||
int trt_max_partition_iterations = 1000;
|
||||
int trt_min_subgraph_size = 1;
|
||||
size_t trt_max_workspace_size = 1 << 30;
|
||||
bool trt_fp16_enable = false;
|
||||
bool trt_int8_enable = false;
|
||||
std::string trt_int8_calibration_table_name = "";
|
||||
bool trt_int8_use_native_calibration_table = false;
|
||||
bool trt_dla_enable = false;
|
||||
int trt_dla_core = 0;
|
||||
bool trt_dump_subgraphs = false;
|
||||
bool trt_engine_cache_enable = false;
|
||||
std::string trt_engine_cache_path = "";
|
||||
bool trt_engine_decryption_enable = false;
|
||||
std::string trt_engine_decryption_lib_path = "";
|
||||
bool trt_force_sequential_engine_build = false;
|
||||
bool trt_context_memory_sharing_enable = false;
|
||||
bool trt_layer_norm_fp32_fallback = false;
|
||||
bool trt_timing_cache_enable = false;
|
||||
bool trt_force_timing_cache = false;
|
||||
bool trt_detailed_build_log = false;
|
||||
bool trt_build_heuristics_enable = false;
|
||||
bool trt_sparsity_enable = false;
|
||||
int trt_builder_optimization_level = 3;
|
||||
int trt_auxiliary_streams = -1;
|
||||
std::string trt_tactic_sources = "";
|
||||
std::string trt_extra_plugin_lib_paths = "";
|
||||
std::string trt_profile_min_shapes = "";
|
||||
std::string trt_profile_max_shapes = "";
|
||||
std::string trt_profile_opt_shapes = "";
|
||||
bool trt_cuda_graph_enable = false;
|
||||
const auto& api = Ort::GetApi();
|
||||
OrtTensorRTProviderOptionsV2* tensorrt_options;
|
||||
Ort::ThrowOnError(api.CreateTensorRTProviderOptions(&tensorrt_options));
|
||||
std::vector<const char*> option_keys, option_values;
|
||||
// used to keep all option keys and value strings alive
|
||||
std::list<std::string> buffer;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string);
|
||||
|
|
@ -207,272 +183,31 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
|
|||
}
|
||||
auto pos = token.find("|");
|
||||
if (pos == std::string::npos || pos == 0 || pos == token.length()) {
|
||||
ORT_THROW("[ERROR] [TensorRT] Use a '|' to separate the key and value for the run-time option you are trying to use.\n");
|
||||
ORT_THROW(
|
||||
"[ERROR] [TensorRT] Use a '|' to separate the key and value for the run-time option you are trying to use.\n");
|
||||
}
|
||||
|
||||
auto key = token.substr(0, pos);
|
||||
auto value = token.substr(pos + 1);
|
||||
if (key == "device_id") {
|
||||
if (!value.empty()) {
|
||||
device_id = std::stoi(value);
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'device_id' should be a number.\n");
|
||||
}
|
||||
} else if (key == "trt_max_partition_iterations") {
|
||||
if (!value.empty()) {
|
||||
trt_max_partition_iterations = std::stoi(value);
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_partition_iterations' should be a number.\n");
|
||||
}
|
||||
} else if (key == "trt_min_subgraph_size") {
|
||||
if (!value.empty()) {
|
||||
trt_min_subgraph_size = std::stoi(value);
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_min_subgraph_size' should be a number.\n");
|
||||
}
|
||||
} else if (key == "trt_max_workspace_size") {
|
||||
if (!value.empty()) {
|
||||
trt_max_workspace_size = std::stoull(value);
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_workspace_size' should be a number.\n");
|
||||
}
|
||||
} else if (key == "trt_fp16_enable") {
|
||||
if (value == "true" || value == "True") {
|
||||
trt_fp16_enable = true;
|
||||
} else if (value == "false" || value == "False") {
|
||||
trt_fp16_enable = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_fp16_enable' should be a boolean i.e. true or false. Default value is false.\n");
|
||||
}
|
||||
} else if (key == "trt_int8_enable") {
|
||||
if (value == "true" || value == "True") {
|
||||
trt_int8_enable = true;
|
||||
} else if (value == "false" || value == "False") {
|
||||
trt_int8_enable = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_enable' should be a boolean i.e. true or false. Default value is false.\n");
|
||||
}
|
||||
} else if (key == "trt_int8_calibration_table_name") {
|
||||
if (!value.empty()) {
|
||||
trt_int8_calibration_table_name = value;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_calibration_table_name' should be a non-empty string.\n");
|
||||
}
|
||||
} else if (key == "trt_int8_use_native_calibration_table") {
|
||||
if (value == "true" || value == "True") {
|
||||
trt_int8_use_native_calibration_table = true;
|
||||
} else if (value == "false" || value == "False") {
|
||||
trt_int8_use_native_calibration_table = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_use_native_calibration_table' should be a boolean i.e. true or false. Default value is false.\n");
|
||||
}
|
||||
} else if (key == "trt_dla_enable") {
|
||||
if (value == "true" || value == "True") {
|
||||
trt_dla_enable = true;
|
||||
} else if (value == "false" || value == "False") {
|
||||
trt_dla_enable = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_enable' should be a boolean i.e. true or false. Default value is false.\n");
|
||||
}
|
||||
} else if (key == "trt_dla_core") {
|
||||
if (!value.empty()) {
|
||||
trt_dla_core = std::stoi(value);
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_core' should be a number.\n");
|
||||
}
|
||||
} else if (key == "trt_dump_subgraphs") {
|
||||
if (value == "true" || value == "True") {
|
||||
trt_dump_subgraphs = true;
|
||||
} else if (value == "false" || value == "False") {
|
||||
trt_dump_subgraphs = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dump_subgraphs' should be a boolean i.e. true or false. Default value is false.\n");
|
||||
}
|
||||
} else if (key == "trt_engine_cache_enable") {
|
||||
if (value == "true" || value == "True") {
|
||||
trt_engine_cache_enable = true;
|
||||
} else if (value == "false" || value == "False") {
|
||||
trt_engine_cache_enable = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_enable' should be a boolean i.e. true or false. Default value is false.\n");
|
||||
}
|
||||
} else if (key == "trt_engine_cache_path") {
|
||||
if (!value.empty()) {
|
||||
trt_engine_cache_path = value;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_path' should be a non-empty string.\n");
|
||||
}
|
||||
} else if (key == "trt_engine_decryption_enable") {
|
||||
if (value == "true" || value == "True") {
|
||||
trt_engine_decryption_enable = true;
|
||||
} else if (value == "false" || value == "False") {
|
||||
trt_engine_decryption_enable = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_enable' should be a boolean i.e. true or false. Default value is false.\n");
|
||||
}
|
||||
} else if (key == "trt_engine_decryption_lib_path") {
|
||||
if (!value.empty()) {
|
||||
trt_engine_decryption_lib_path = value;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_lib_path' should be a non-empty string.\n");
|
||||
}
|
||||
} else if (key == "trt_force_sequential_engine_build") {
|
||||
if (value == "true" || value == "True") {
|
||||
trt_force_sequential_engine_build = true;
|
||||
} else if (value == "false" || value == "False") {
|
||||
trt_force_sequential_engine_build = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_force_sequential_engine_build' should be a boolean i.e. true or false. Default value is false.\n");
|
||||
}
|
||||
} else if (key == "trt_context_memory_sharing_enable") {
|
||||
if (value == "true" || value == "True") {
|
||||
trt_context_memory_sharing_enable = true;
|
||||
} else if (value == "false" || value == "False") {
|
||||
trt_context_memory_sharing_enable = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_context_memory_sharing_enable' should be a boolean i.e. true or false. Default value is false.\n");
|
||||
}
|
||||
} else if (key == "trt_layer_norm_fp32_fallback") {
|
||||
if (value == "true" || value == "True") {
|
||||
trt_layer_norm_fp32_fallback = true;
|
||||
} else if (value == "false" || value == "False") {
|
||||
trt_layer_norm_fp32_fallback = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_layer_norm_fp32_fallback' should be a boolean i.e. true or false. Default value is false.\n");
|
||||
}
|
||||
} else if (key == "trt_timing_cache_enable") {
|
||||
if (value == "true" || value == "True") {
|
||||
trt_timing_cache_enable = true;
|
||||
} else if (value == "false" || value == "False") {
|
||||
trt_timing_cache_enable = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_timing_cache_enable' should be a boolean i.e. true or false. Default value is false.\n");
|
||||
}
|
||||
} else if (key == "trt_force_timing_cache") {
|
||||
if (value == "true" || value == "True") {
|
||||
trt_force_timing_cache = true;
|
||||
} else if (value == "false" || value == "False") {
|
||||
trt_force_timing_cache = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_force_timing_cache' should be a boolean i.e. true or false. Default value is false.\n");
|
||||
}
|
||||
} else if (key == "trt_detailed_build_log") {
|
||||
if (value == "true" || value == "True") {
|
||||
trt_detailed_build_log = true;
|
||||
} else if (value == "false" || value == "False") {
|
||||
trt_detailed_build_log = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_detailed_build_log' should be a boolean i.e. true or false. Default value is false.\n");
|
||||
}
|
||||
} else if (key == "trt_build_heuristics_enable") {
|
||||
if (value == "true" || value == "True") {
|
||||
trt_build_heuristics_enable = true;
|
||||
} else if (value == "false" || value == "False") {
|
||||
trt_build_heuristics_enable = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_build_heuristics_enable' should be a boolean i.e. true or false. Default value is false.\n");
|
||||
}
|
||||
} else if (key == "trt_sparsity_enable") {
|
||||
if (value == "true" || value == "True") {
|
||||
trt_sparsity_enable = true;
|
||||
} else if (value == "false" || value == "False") {
|
||||
trt_sparsity_enable = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_sparsity_enable' should be a boolean i.e. true or false. Default value is false.\n");
|
||||
}
|
||||
} else if (key == "trt_builder_optimization_level") {
|
||||
if (!value.empty()) {
|
||||
trt_builder_optimization_level = std::stoi(value);
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_builder_optimization_level' should be a number and default to 2.\n");
|
||||
}
|
||||
} else if (key == "trt_auxiliary_streams") {
|
||||
if (!value.empty()) {
|
||||
trt_auxiliary_streams = std::stoi(value);
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_auxiliary_streams' should be a number.\n");
|
||||
}
|
||||
} else if (key == "trt_tactic_sources") {
|
||||
if (!value.empty()) {
|
||||
trt_tactic_sources = value;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_tactic_sources' should be a non-empty string.\n");
|
||||
}
|
||||
} else if (key == "trt_extra_plugin_lib_paths") {
|
||||
if (!value.empty()) {
|
||||
trt_extra_plugin_lib_paths = value;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_extra_plugin_lib_paths' should be a non-empty string.\n");
|
||||
}
|
||||
} else if (key == "trt_profile_min_shapes") {
|
||||
if (!value.empty()) {
|
||||
trt_profile_min_shapes = value;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_profile_min_shapes' should be a non-empty string.\n");
|
||||
}
|
||||
} else if (key == "trt_profile_max_shapes") {
|
||||
if (!value.empty()) {
|
||||
trt_profile_max_shapes = value;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_profile_max_shapes' should be a non-empty string.\n");
|
||||
}
|
||||
} else if (key == "trt_profile_opt_shapes") {
|
||||
if (!value.empty()) {
|
||||
trt_profile_opt_shapes = value;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_profile_opt_shapes' should be a non-empty string.\n");
|
||||
}
|
||||
} else if (key == "trt_cuda_graph_enable") {
|
||||
if (value == "true" || value == "True") {
|
||||
trt_cuda_graph_enable = true;
|
||||
} else if (value == "false" || value == "False") {
|
||||
trt_cuda_graph_enable = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_cuda_graph_enable' should be a boolean i.e. true or false. Default value is false.\n");
|
||||
}
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] wrong key type entered. Choose from the following runtime key options that are available for TensorRT. ['device_id', 'trt_max_partition_iterations', 'trt_min_subgraph_size', 'trt_max_workspace_size', 'trt_fp16_enable', 'trt_int8_enable', 'trt_int8_calibration_table_name', 'trt_int8_use_native_calibration_table', 'trt_dla_enable', 'trt_dla_core', 'trt_dump_subgraphs', 'trt_engine_cache_enable', 'trt_engine_cache_path', 'trt_engine_decryption_enable', 'trt_engine_decryption_lib_path', 'trt_force_sequential_engine_build', 'trt_context_memory_sharing_enable', 'trt_layer_norm_fp32_fallback', 'trt_timing_cache_enable', 'trt_force_timing_cache', 'trt_detailed_build_log', 'trt_build_heuristics_enable', 'trt_sparsity_enable', 'trt_builder_optimization_level', 'trt_auxiliary_streams', 'trt_tactic_sources', 'trt_extra_plugin_lib_paths', 'trt_profile_min_shapes', 'trt_profile_max_shapes', 'trt_profile_opt_shapes', 'trt_cuda_graph_enable'] \n");
|
||||
}
|
||||
buffer.emplace_back(token.substr(0, pos));
|
||||
option_keys.push_back(buffer.back().c_str());
|
||||
buffer.emplace_back(token.substr(pos + 1));
|
||||
option_values.push_back(buffer.back().c_str());
|
||||
}
|
||||
OrtTensorRTProviderOptionsV2 tensorrt_options;
|
||||
tensorrt_options.device_id = device_id;
|
||||
tensorrt_options.has_user_compute_stream = 0;
|
||||
tensorrt_options.user_compute_stream = nullptr;
|
||||
tensorrt_options.trt_max_partition_iterations = trt_max_partition_iterations;
|
||||
tensorrt_options.trt_min_subgraph_size = trt_min_subgraph_size;
|
||||
tensorrt_options.trt_max_workspace_size = trt_max_workspace_size;
|
||||
tensorrt_options.trt_fp16_enable = trt_fp16_enable;
|
||||
tensorrt_options.trt_int8_enable = trt_int8_enable;
|
||||
tensorrt_options.trt_int8_calibration_table_name = trt_int8_calibration_table_name.c_str();
|
||||
tensorrt_options.trt_int8_use_native_calibration_table = trt_int8_use_native_calibration_table;
|
||||
tensorrt_options.trt_dla_enable = trt_dla_enable;
|
||||
tensorrt_options.trt_dla_core = trt_dla_core;
|
||||
tensorrt_options.trt_dump_subgraphs = trt_dump_subgraphs;
|
||||
tensorrt_options.trt_engine_cache_enable = trt_engine_cache_enable;
|
||||
tensorrt_options.trt_engine_cache_path = trt_engine_cache_path.c_str();
|
||||
tensorrt_options.trt_engine_decryption_enable = trt_engine_decryption_enable;
|
||||
tensorrt_options.trt_engine_decryption_lib_path = trt_engine_decryption_lib_path.c_str();
|
||||
tensorrt_options.trt_force_sequential_engine_build = trt_force_sequential_engine_build;
|
||||
tensorrt_options.trt_context_memory_sharing_enable = trt_context_memory_sharing_enable;
|
||||
tensorrt_options.trt_layer_norm_fp32_fallback = trt_layer_norm_fp32_fallback;
|
||||
tensorrt_options.trt_timing_cache_enable = trt_timing_cache_enable;
|
||||
tensorrt_options.trt_force_timing_cache = trt_force_timing_cache;
|
||||
tensorrt_options.trt_detailed_build_log = trt_detailed_build_log;
|
||||
tensorrt_options.trt_build_heuristics_enable = trt_build_heuristics_enable;
|
||||
tensorrt_options.trt_sparsity_enable = trt_sparsity_enable;
|
||||
tensorrt_options.trt_builder_optimization_level = trt_builder_optimization_level;
|
||||
tensorrt_options.trt_auxiliary_streams = trt_auxiliary_streams;
|
||||
tensorrt_options.trt_tactic_sources = trt_tactic_sources.c_str();
|
||||
tensorrt_options.trt_extra_plugin_lib_paths = trt_extra_plugin_lib_paths.c_str();
|
||||
tensorrt_options.trt_profile_min_shapes = trt_profile_min_shapes.c_str();
|
||||
tensorrt_options.trt_profile_max_shapes = trt_profile_max_shapes.c_str();
|
||||
tensorrt_options.trt_profile_opt_shapes = trt_profile_opt_shapes.c_str();
|
||||
tensorrt_options.trt_cuda_graph_enable = trt_cuda_graph_enable;
|
||||
|
||||
session_options.AppendExecutionProvider_TensorRT_V2(tensorrt_options);
|
||||
Ort::Status status(api.UpdateTensorRTProviderOptions(tensorrt_options,
|
||||
option_keys.data(), option_values.data(), option_keys.size()));
|
||||
if (!status.IsOK()) {
|
||||
OrtAllocator* allocator;
|
||||
char* options;
|
||||
Ort::ThrowOnError(api.GetAllocatorWithDefaultOptions(&allocator));
|
||||
Ort::ThrowOnError(api.GetTensorRTProviderOptionsAsString(tensorrt_options, allocator, &options));
|
||||
ORT_THROW("[ERROR] [TensorRT] Configuring the CUDA options failed with message: ", status.GetErrorMessage(),
|
||||
"\nSupported options are:\n", options);
|
||||
}
|
||||
|
||||
session_options.AppendExecutionProvider_TensorRT_V2(*tensorrt_options);
|
||||
|
||||
OrtCUDAProviderOptions cuda_options;
|
||||
cuda_options.device_id = device_id;
|
||||
cuda_options.device_id = tensorrt_options->device_id;
|
||||
cuda_options.cudnn_conv_algo_search = static_cast<OrtCudnnConvAlgoSearch>(performance_test_config.run_config.cudnn_conv_algo);
|
||||
cuda_options.do_copy_in_default_stream = !performance_test_config.run_config.do_cuda_copy_in_separate_stream;
|
||||
// TODO: Support arena configuration for users of perf test
|
||||
|
|
|
|||
|
|
@ -590,6 +590,7 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) {
|
|||
// uint64_t compilation_without_cache_ms, compilation_with_cache_ms;
|
||||
|
||||
// First session is created with TRT EP with timing cache enabled
|
||||
// Not specifying a trt_timing_cache_path will result in using the working directory
|
||||
params.trt_timing_cache_enable = 1;
|
||||
{
|
||||
// auto start = chrono::steady_clock::now();
|
||||
|
|
|
|||
Loading…
Reference in a new issue