Enable global TRT timing cache (#17865)

I am adding a new `trt_timing_cache_path` option. Internally it is
handled as `global_cache_path_` and will be set via a fall through
approach:
1. no path provided => workdir
2. `trt_engine_cache_path` provided but no `trt_timing_cache_path` =>
`trt_engine_cache_path`
3. `trt_timing_cache_path` provided => `trt_timing_cache_path` (if not
provided `trt_engine_cache_path` will still be workdir)

### Motivation and Context

A TRT timing cache can be reused across multiple models as it only holds
kernel timings and it is common that network "patterns" are reused. This
can accelerate build times a lot.

---------

Co-authored-by: Carson M <carson@pyke.io>
This commit is contained in:
Maximilian Müller 2023-10-27 18:23:19 +02:00 committed by GitHub
parent 58f1d15d19
commit 2eeafc37bc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 138 additions and 478 deletions

View file

@ -25,13 +25,14 @@ struct OrtTensorRTProviderOptionsV2 {
int trt_dla_core{0}; // DLA core number. Default 0
int trt_dump_subgraphs{0}; // dump TRT subgraph. Default 0 = false, nonzero = true
int trt_engine_cache_enable{0}; // enable engine caching. Default 0 = false, nonzero = true
const char* trt_engine_cache_path{nullptr}; // specify engine cache path
const char* trt_engine_cache_path{nullptr}; // specify engine cache path, defaults to the working directory
int trt_engine_decryption_enable{0}; // enable engine decryption. Default 0 = false, nonzero = true
const char* trt_engine_decryption_lib_path{nullptr}; // specify engine decryption library path
int trt_force_sequential_engine_build{0}; // force building TensorRT engine sequentially. Default 0 = false, nonzero = true
int trt_context_memory_sharing_enable{0}; // enable context memory sharing between subgraphs. Default 0 = false, nonzero = true
int trt_layer_norm_fp32_fallback{0}; // force Pow + Reduce ops in layer norm to FP32. Default 0 = false, nonzero = true
int trt_timing_cache_enable{0}; // enable TensorRT timing cache. Default 0 = false, nonzero = true
const char* trt_timing_cache_path{nullptr}; // specify timing cache path, if none is provided the trt_engine_cache_path is used
int trt_force_timing_cache{0}; // force the TensorRT cache to be used even if device profile does not match. Default 0 = false, nonzero = true
int trt_detailed_build_log{0}; // Enable detailed build step logging on TensorRT EP with timing for each engine build. Default 0 = false, nonzero = true
int trt_build_heuristics_enable{0}; // Build engine using heuristics to reduce build time. Default 0 = false, nonzero = true

View file

@ -824,6 +824,14 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
if (engine_cache_enable_ || int8_enable_ || timing_cache_enable_) {
cache_path_ = info.engine_cache_path;
}
// use a more global cache if given
if (timing_cache_enable_) {
if (!info.timing_cache_path.empty()) {
global_cache_path_ = info.timing_cache_path;
} else {
global_cache_path_ = cache_path_;
}
}
engine_decryption_enable_ = info.engine_decryption_enable;
if (engine_decryption_enable_) {
engine_decryption_lib_path_ = info.engine_decryption_lib_path;
@ -928,6 +936,15 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
LOGS_DEFAULT(WARNING) << "[TensorRT EP] ORT_TENSORRT_ENGINE_CACHE_PATH is deprecated! Please use ORT_TENSORRT_CACHE_PATH to specify engine cache path";
}
}
if (timing_cache_enable_) {
std::string timing_cache_path = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kTimingCachePath);
// use a more global cache if given
if (!timing_cache_path.empty()) {
global_cache_path_ = timing_cache_path;
} else {
global_cache_path_ = cache_path_;
}
}
const std::string engine_decryption_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kDecryptionEnable);
if (!engine_decryption_enable_env.empty()) {
@ -1019,6 +1036,11 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
throw std::runtime_error("Failed to create directory " + cache_path_);
}
}
if (!global_cache_path_.empty() && !fs::is_directory(global_cache_path_)) {
if (!fs::create_directory(global_cache_path_)) {
throw std::runtime_error("Failed to create directory " + global_cache_path_);
}
}
{
auto lock = GetApiLock();
runtime_ = std::unique_ptr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(GetTensorrtLogger()));
@ -1104,6 +1126,7 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
<< ", trt_dump_subgraphs: " << dump_subgraphs_
<< ", trt_engine_cache_enable: " << engine_cache_enable_
<< ", trt_cache_path: " << cache_path_
<< ", trt_global_cache_path: " << global_cache_path_
<< ", trt_engine_decryption_enable: " << engine_decryption_enable_
<< ", trt_engine_decryption_lib_path: " << engine_decryption_lib_path_
<< ", trt_force_sequential_engine_build: " << force_sequential_engine_build_
@ -2199,7 +2222,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
std::string timing_cache_path = "";
bool engine_update = false;
if (timing_cache_enable_) {
timing_cache_path = GetTimingCachePath(cache_path_, prop);
timing_cache_path = GetTimingCachePath(global_cache_path_, prop);
}
{
// ifstream file check, engine serialization/deserialization and engine build are in critical section. It needs lock protection to prevent race condition when inferencing with multithreading.
@ -2398,7 +2421,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
dla_enable_, dla_core_, &max_workspace_size_, trt_node_name_with_precision, engine_cache_enable_, cache_path_,
runtime_.get(), profiles_[context->node_name], context_memory_sharing_enable_, &max_ctx_mem_size_,
dynamic_range_map, engine_decryption_enable_, engine_decryption_, engine_encryption_, timing_cache_enable_,
force_timing_cache_match_, detailed_build_log_, build_heuristics_enable_, sparsity_enable_,
global_cache_path_, force_timing_cache_match_, detailed_build_log_, build_heuristics_enable_, sparsity_enable_,
builder_optimization_level_, auxiliary_streams_, !tactic_sources_.empty(), tactics};
*state = p.release();
return 0;
@ -2460,7 +2483,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
const std::string profile_cache_path = cache_path + "_sm" + compute_capability + ".profile";
std::string timing_cache_path = "";
if (timing_cache_enable_) {
timing_cache_path = GetTimingCachePath(cache_path_, prop);
timing_cache_path = GetTimingCachePath(global_cache_path_, prop);
}
// Load serialized engine

View file

@ -26,6 +26,8 @@ static const std::string kDLACore = "ORT_TENSORRT_DLA_CORE";
static const std::string kDumpSubgraphs = "ORT_TENSORRT_DUMP_SUBGRAPHS";
static const std::string kEngineCacheEnable = "ORT_TENSORRT_ENGINE_CACHE_ENABLE";
static const std::string kCachePath = "ORT_TENSORRT_CACHE_PATH";
// As a timing cache can be used across multiple ONNX files it makes sense to have a seperate cache path
static const std::string kTimingCachePath = "ORT_TENSORRT_GLOBAL_CACHE_PATH";
static const std::string kDecryptionEnable = "ORT_TENSORRT_ENGINE_DECRYPTION_ENABLE";
static const std::string kDecryptionLibPath = "ORT_TENSORRT_ENGINE_DECRYPTION_LIB_PATH";
static const std::string kForceSequentialEngineBuild = "ORT_TENSORRT_FORCE_SEQUENTIAL_ENGINE_BUILD";
@ -131,6 +133,7 @@ struct TensorrtFuncState {
int (*engine_decryption)(const char*, char*, size_t*) = nullptr;
int (*engine_encryption)(const char*, char*, size_t) = nullptr;
bool timing_cache_enable = true;
std::string timing_cache_path;
bool force_timing_cache = false;
bool detailed_build_log = false;
bool build_heuristics_enable = false;
@ -218,7 +221,7 @@ class TensorrtExecutionProvider : public IExecutionProvider {
int builder_optimization_level_ = 3;
int auxiliary_streams_ = -1;
std::string tactic_sources_;
std::string cache_path_, engine_decryption_lib_path_;
std::string global_cache_path_, cache_path_, engine_decryption_lib_path_;
std::unique_ptr<nvinfer1::IRuntime> runtime_ = nullptr;
OrtMutex tensorrt_mu_;
int device_id_;

View file

@ -25,7 +25,7 @@ constexpr const char* kDLAEnable = "trt_dla_enable";
constexpr const char* kDLACore = "trt_dla_core";
constexpr const char* kDumpSubgraphs = "trt_dump_subgraphs";
constexpr const char* kEngineCacheEnable = "trt_engine_cache_enable";
constexpr const char* kCachePath = "trt_engine_cache_path";
constexpr const char* kEngineCachePath = "trt_engine_cache_path";
constexpr const char* kDecryptionEnable = "trt_engine_decryption_enable";
constexpr const char* kDecryptionLibPath = "trt_engine_decryption_lib_path";
constexpr const char* kForceSequentialEngineBuild = "trt_force_sequential_engine_build";
@ -33,7 +33,8 @@ constexpr const char* kForceSequentialEngineBuild = "trt_force_sequential_engine
constexpr const char* kContextMemorySharingEnable = "trt_context_memory_sharing_enable";
constexpr const char* kLayerNormFP32Fallback = "trt_layer_norm_fp32_fallback";
constexpr const char* kTimingCacheEnable = "trt_timing_cache_enable";
constexpr const char* kForceTimingCacheMatch = "trt_force_timing_cache_match";
constexpr const char* kTimingCachePath = "trt_timing_cache_path";
constexpr const char* kForceTimingCacheMatch = "trt_force_timing_cache";
constexpr const char* kDetailedBuildLog = "trt_detailed_build_log";
constexpr const char* kBuildHeuristics = "trt_build_heuristics_enable";
constexpr const char* kSparsityEnable = "trt_sparsity_enable";
@ -76,13 +77,14 @@ TensorrtExecutionProviderInfo TensorrtExecutionProviderInfo::FromProviderOptions
.AddAssignmentToReference(tensorrt::provider_option_names::kDLACore, info.dla_core)
.AddAssignmentToReference(tensorrt::provider_option_names::kDumpSubgraphs, info.dump_subgraphs)
.AddAssignmentToReference(tensorrt::provider_option_names::kEngineCacheEnable, info.engine_cache_enable)
.AddAssignmentToReference(tensorrt::provider_option_names::kCachePath, info.engine_cache_path)
.AddAssignmentToReference(tensorrt::provider_option_names::kEngineCachePath, info.engine_cache_path)
.AddAssignmentToReference(tensorrt::provider_option_names::kDecryptionEnable, info.engine_decryption_enable)
.AddAssignmentToReference(tensorrt::provider_option_names::kDecryptionLibPath, info.engine_decryption_lib_path)
.AddAssignmentToReference(tensorrt::provider_option_names::kForceSequentialEngineBuild, info.force_sequential_engine_build)
.AddAssignmentToReference(tensorrt::provider_option_names::kContextMemorySharingEnable, info.context_memory_sharing_enable)
.AddAssignmentToReference(tensorrt::provider_option_names::kLayerNormFP32Fallback, info.layer_norm_fp32_fallback)
.AddAssignmentToReference(tensorrt::provider_option_names::kTimingCacheEnable, info.timing_cache_enable)
.AddAssignmentToReference(tensorrt::provider_option_names::kTimingCachePath, info.timing_cache_path)
.AddAssignmentToReference(tensorrt::provider_option_names::kForceTimingCacheMatch, info.force_timing_cache)
.AddAssignmentToReference(tensorrt::provider_option_names::kDetailedBuildLog, info.detailed_build_log)
.AddAssignmentToReference(tensorrt::provider_option_names::kBuildHeuristics, info.build_heuristics_enable)
@ -115,7 +117,7 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const TensorrtE
{tensorrt::provider_option_names::kDLACore, MakeStringWithClassicLocale(info.dla_core)},
{tensorrt::provider_option_names::kDumpSubgraphs, MakeStringWithClassicLocale(info.dump_subgraphs)},
{tensorrt::provider_option_names::kEngineCacheEnable, MakeStringWithClassicLocale(info.engine_cache_enable)},
{tensorrt::provider_option_names::kCachePath, MakeStringWithClassicLocale(info.engine_cache_path)},
{tensorrt::provider_option_names::kEngineCachePath, MakeStringWithClassicLocale(info.engine_cache_path)},
{tensorrt::provider_option_names::kDecryptionEnable, MakeStringWithClassicLocale(info.engine_decryption_enable)},
{tensorrt::provider_option_names::kDecryptionLibPath, MakeStringWithClassicLocale(info.engine_decryption_lib_path)},
{tensorrt::provider_option_names::kForceSequentialEngineBuild, MakeStringWithClassicLocale(info.force_sequential_engine_build)},
@ -123,6 +125,7 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const TensorrtE
{tensorrt::provider_option_names::kContextMemorySharingEnable, MakeStringWithClassicLocale(info.context_memory_sharing_enable)},
{tensorrt::provider_option_names::kLayerNormFP32Fallback, MakeStringWithClassicLocale(info.layer_norm_fp32_fallback)},
{tensorrt::provider_option_names::kTimingCacheEnable, MakeStringWithClassicLocale(info.timing_cache_enable)},
{tensorrt::provider_option_names::kTimingCachePath, MakeStringWithClassicLocale(info.timing_cache_path)},
{tensorrt::provider_option_names::kForceTimingCacheMatch, MakeStringWithClassicLocale(info.force_timing_cache)},
{tensorrt::provider_option_names::kDetailedBuildLog, MakeStringWithClassicLocale(info.detailed_build_log)},
{tensorrt::provider_option_names::kBuildHeuristics, MakeStringWithClassicLocale(info.build_heuristics_enable)},
@ -142,7 +145,8 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const TensorrtE
ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const OrtTensorRTProviderOptionsV2& info) {
auto empty_if_null = [](const char* s) { return s != nullptr ? std::string{s} : std::string{}; };
const std::string kInt8CalibTable_ = empty_if_null(info.trt_int8_calibration_table_name);
const std::string kCachePath_ = empty_if_null(info.trt_engine_cache_path);
const std::string kEngineCachePath_ = empty_if_null(info.trt_engine_cache_path);
const std::string kTimingCachePath_ = empty_if_null(info.trt_timing_cache_path);
const std::string kTacticSources_ = empty_if_null(info.trt_tactic_sources);
const std::string kDecryptionLibPath_ = empty_if_null(info.trt_engine_decryption_lib_path);
const std::string kExtraPluginLibPaths_ = empty_if_null(info.trt_extra_plugin_lib_paths);
@ -164,13 +168,14 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const OrtTensor
{tensorrt::provider_option_names::kDLACore, MakeStringWithClassicLocale(info.trt_dla_core)},
{tensorrt::provider_option_names::kDumpSubgraphs, MakeStringWithClassicLocale(info.trt_dump_subgraphs)},
{tensorrt::provider_option_names::kEngineCacheEnable, MakeStringWithClassicLocale(info.trt_engine_cache_enable)},
{tensorrt::provider_option_names::kCachePath, kCachePath_},
{tensorrt::provider_option_names::kEngineCachePath, kEngineCachePath_},
{tensorrt::provider_option_names::kDecryptionEnable, MakeStringWithClassicLocale(info.trt_engine_decryption_enable)},
{tensorrt::provider_option_names::kDecryptionLibPath, kDecryptionLibPath_},
{tensorrt::provider_option_names::kForceSequentialEngineBuild, MakeStringWithClassicLocale(info.trt_force_sequential_engine_build)},
{tensorrt::provider_option_names::kContextMemorySharingEnable, MakeStringWithClassicLocale(info.trt_context_memory_sharing_enable)},
{tensorrt::provider_option_names::kLayerNormFP32Fallback, MakeStringWithClassicLocale(info.trt_layer_norm_fp32_fallback)},
{tensorrt::provider_option_names::kTimingCacheEnable, MakeStringWithClassicLocale(info.trt_timing_cache_enable)},
{tensorrt::provider_option_names::kTimingCachePath, kTimingCachePath_},
{tensorrt::provider_option_names::kForceTimingCacheMatch, MakeStringWithClassicLocale(info.trt_force_timing_cache)},
{tensorrt::provider_option_names::kDetailedBuildLog, MakeStringWithClassicLocale(info.trt_detailed_build_log)},
{tensorrt::provider_option_names::kBuildHeuristics, MakeStringWithClassicLocale(info.trt_build_heuristics_enable)},
@ -204,6 +209,27 @@ void TensorrtExecutionProviderInfo::UpdateProviderOptions(void* provider_options
if (provider_options == nullptr) {
return;
}
auto copy_string_if_needed = [&](std::string& s_in) {
if (string_copy) {
char* dest = nullptr;
auto str_size = s_in.size();
if (str_size == 0) {
return (const char*)nullptr;
} else {
dest = new char[str_size + 1];
#ifdef _MSC_VER
strncpy_s(dest, str_size + 1, s_in.c_str(), str_size);
#else
strncpy(dest, s_in.c_str(), str_size);
#endif
dest[str_size] = '\0';
return (const char*)dest;
}
} else {
return s_in.c_str();
}
};
TensorrtExecutionProviderInfo internal_options = onnxruntime::TensorrtExecutionProviderInfo::FromProviderOptions(options);
auto& trt_provider_options_v2 = *reinterpret_cast<OrtTensorRTProviderOptionsV2*>(provider_options);
trt_provider_options_v2.device_id = internal_options.device_id;
@ -220,24 +246,7 @@ void TensorrtExecutionProviderInfo::UpdateProviderOptions(void* provider_options
trt_provider_options_v2.trt_fp16_enable = internal_options.fp16_enable;
trt_provider_options_v2.trt_int8_enable = internal_options.int8_enable;
if (string_copy) {
char* dest = nullptr;
auto str_size = internal_options.int8_calibration_table_name.size();
if (str_size == 0) {
trt_provider_options_v2.trt_int8_calibration_table_name = nullptr;
} else {
dest = new char[str_size + 1];
#ifdef _MSC_VER
strncpy_s(dest, str_size + 1, internal_options.int8_calibration_table_name.c_str(), str_size);
#else
strncpy(dest, internal_options.int8_calibration_table_name.c_str(), str_size);
#endif
dest[str_size] = '\0';
trt_provider_options_v2.trt_int8_calibration_table_name = (const char*)dest;
}
} else {
trt_provider_options_v2.trt_int8_calibration_table_name = internal_options.int8_calibration_table_name.c_str();
}
trt_provider_options_v2.trt_int8_calibration_table_name = copy_string_if_needed(internal_options.int8_calibration_table_name);
trt_provider_options_v2.trt_int8_use_native_calibration_table = internal_options.int8_use_native_calibration_table;
trt_provider_options_v2.trt_dla_enable = internal_options.dla_enable;
@ -245,45 +254,12 @@ void TensorrtExecutionProviderInfo::UpdateProviderOptions(void* provider_options
trt_provider_options_v2.trt_dump_subgraphs = internal_options.dump_subgraphs;
trt_provider_options_v2.trt_engine_cache_enable = internal_options.engine_cache_enable;
if (string_copy) {
char* dest = nullptr;
auto str_size = internal_options.engine_cache_path.size();
if (str_size == 0) {
trt_provider_options_v2.trt_engine_cache_path = nullptr;
} else {
dest = new char[str_size + 1];
#ifdef _MSC_VER
strncpy_s(dest, str_size + 1, internal_options.engine_cache_path.c_str(), str_size);
#else
strncpy(dest, internal_options.engine_cache_path.c_str(), str_size);
#endif
dest[str_size] = '\0';
trt_provider_options_v2.trt_engine_cache_path = (const char*)dest;
}
} else {
trt_provider_options_v2.trt_engine_cache_path = internal_options.engine_cache_path.c_str();
}
trt_provider_options_v2.trt_engine_cache_path = copy_string_if_needed(internal_options.engine_cache_path);
trt_provider_options_v2.trt_timing_cache_path = copy_string_if_needed(internal_options.timing_cache_path);
trt_provider_options_v2.trt_engine_decryption_enable = internal_options.engine_decryption_enable;
if (string_copy) {
char* dest = nullptr;
auto str_size = internal_options.engine_decryption_lib_path.size();
if (str_size == 0) {
trt_provider_options_v2.trt_engine_decryption_lib_path = nullptr;
} else {
dest = new char[str_size + 1];
#ifdef _MSC_VER
strncpy_s(dest, str_size + 1, internal_options.engine_decryption_lib_path.c_str(), str_size);
#else
strncpy(dest, internal_options.engine_decryption_lib_path.c_str(), str_size);
#endif
dest[str_size] = '\0';
trt_provider_options_v2.trt_engine_decryption_lib_path = (const char*)dest;
}
} else {
trt_provider_options_v2.trt_engine_decryption_lib_path = internal_options.engine_decryption_lib_path.c_str();
}
trt_provider_options_v2.trt_engine_decryption_lib_path = copy_string_if_needed(internal_options.engine_decryption_lib_path);
trt_provider_options_v2.trt_force_sequential_engine_build = internal_options.force_sequential_engine_build;
trt_provider_options_v2.trt_context_memory_sharing_enable = internal_options.context_memory_sharing_enable;
@ -296,100 +272,11 @@ void TensorrtExecutionProviderInfo::UpdateProviderOptions(void* provider_options
trt_provider_options_v2.trt_builder_optimization_level = internal_options.builder_optimization_level;
trt_provider_options_v2.trt_auxiliary_streams = internal_options.auxiliary_streams;
if (string_copy) {
char* dest = nullptr;
auto str_size = internal_options.tactic_sources.size();
if (str_size == 0) {
trt_provider_options_v2.trt_tactic_sources = nullptr;
} else {
dest = new char[str_size + 1];
#ifdef _MSC_VER
strncpy_s(dest, str_size + 1, internal_options.tactic_sources.c_str(), str_size);
#else
strncpy(dest, internal_options.tactic_sources.c_str(), str_size);
#endif
dest[str_size] = '\0';
trt_provider_options_v2.trt_tactic_sources = (const char*)dest;
}
} else {
trt_provider_options_v2.trt_tactic_sources = internal_options.tactic_sources.c_str();
}
if (string_copy) {
char* dest = nullptr;
auto str_size = internal_options.extra_plugin_lib_paths.size();
if (str_size == 0) {
trt_provider_options_v2.trt_extra_plugin_lib_paths = nullptr;
} else {
dest = new char[str_size + 1];
#ifdef _MSC_VER
strncpy_s(dest, str_size + 1, internal_options.extra_plugin_lib_paths.c_str(), str_size);
#else
strncpy(dest, internal_options.extra_plugin_lib_paths.c_str(), str_size);
#endif
dest[str_size] = '\0';
trt_provider_options_v2.trt_extra_plugin_lib_paths = (const char*)dest;
}
} else {
trt_provider_options_v2.trt_extra_plugin_lib_paths = internal_options.extra_plugin_lib_paths.c_str();
}
if (string_copy) {
char* dest = nullptr;
auto str_size = internal_options.profile_min_shapes.size();
if (str_size == 0) {
trt_provider_options_v2.trt_profile_min_shapes = nullptr;
} else {
dest = new char[str_size + 1];
#ifdef _MSC_VER
strncpy_s(dest, str_size + 1, internal_options.profile_min_shapes.c_str(), str_size);
#else
strncpy(dest, internal_options.profile_min_shapes.c_str(), str_size);
#endif
dest[str_size] = '\0';
trt_provider_options_v2.trt_profile_min_shapes = (const char*)dest;
}
} else {
trt_provider_options_v2.trt_profile_min_shapes = internal_options.profile_min_shapes.c_str();
}
if (string_copy) {
char* dest = nullptr;
auto str_size = internal_options.profile_max_shapes.size();
if (str_size == 0) {
trt_provider_options_v2.trt_profile_max_shapes = nullptr;
} else {
dest = new char[str_size + 1];
#ifdef _MSC_VER
strncpy_s(dest, str_size + 1, internal_options.profile_max_shapes.c_str(), str_size);
#else
strncpy(dest, internal_options.profile_max_shapes.c_str(), str_size);
#endif
dest[str_size] = '\0';
trt_provider_options_v2.trt_profile_max_shapes = (const char*)dest;
}
} else {
trt_provider_options_v2.trt_profile_max_shapes = internal_options.profile_max_shapes.c_str();
}
if (string_copy) {
char* dest = nullptr;
auto str_size = internal_options.profile_opt_shapes.size();
if (str_size == 0) {
trt_provider_options_v2.trt_profile_opt_shapes = nullptr;
} else {
dest = new char[str_size + 1];
#ifdef _MSC_VER
strncpy_s(dest, str_size + 1, internal_options.profile_opt_shapes.c_str(), str_size);
#else
strncpy(dest, internal_options.profile_opt_shapes.c_str(), str_size);
#endif
dest[str_size] = '\0';
trt_provider_options_v2.trt_profile_opt_shapes = (const char*)dest;
}
} else {
trt_provider_options_v2.trt_profile_opt_shapes = internal_options.profile_opt_shapes.c_str();
}
trt_provider_options_v2.trt_tactic_sources = copy_string_if_needed(internal_options.tactic_sources);
trt_provider_options_v2.trt_extra_plugin_lib_paths = copy_string_if_needed(internal_options.extra_plugin_lib_paths);
trt_provider_options_v2.trt_profile_min_shapes = copy_string_if_needed(internal_options.profile_min_shapes);
trt_provider_options_v2.trt_profile_max_shapes = copy_string_if_needed(internal_options.profile_max_shapes);
trt_provider_options_v2.trt_profile_opt_shapes = copy_string_if_needed(internal_options.profile_opt_shapes);
trt_provider_options_v2.trt_cuda_graph_enable = internal_options.cuda_graph_enable;
}

View file

@ -38,6 +38,7 @@ struct TensorrtExecutionProviderInfo {
bool context_memory_sharing_enable{false};
bool layer_norm_fp32_fallback{false};
bool timing_cache_enable{false};
std::string timing_cache_path{""};
bool force_timing_cache{false};
bool detailed_build_log{false};
bool build_heuristics_enable{false};

View file

@ -103,6 +103,7 @@ struct Tensorrt_Provider : Provider {
info.context_memory_sharing_enable = options.trt_context_memory_sharing_enable != 0;
info.layer_norm_fp32_fallback = options.trt_layer_norm_fp32_fallback != 0;
info.timing_cache_enable = options.trt_timing_cache_enable != 0;
info.timing_cache_path = options.trt_timing_cache_path == nullptr ? "" : options.trt_timing_cache_path;
info.force_timing_cache = options.trt_force_timing_cache != 0;
info.detailed_build_log = options.trt_detailed_build_log != 0;
info.build_heuristics_enable = options.trt_build_heuristics_enable != 0;

View file

@ -1931,6 +1931,7 @@ ORT_API(void, OrtApis::ReleaseTensorRTProviderOptions, _Frees_ptr_opt_ OrtTensor
if (ptr != nullptr) {
delete[] ptr->trt_int8_calibration_table_name;
delete[] ptr->trt_engine_cache_path;
delete[] ptr->trt_timing_cache_path;
delete[] ptr->trt_engine_decryption_lib_path;
delete[] ptr->trt_tactic_sources;
delete[] ptr->trt_extra_plugin_lib_paths;

View file

@ -479,7 +479,7 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
// So we need these std::string variables defined here as they will be kept alive for the lifetime of TRT EP and we can still access them from OrtTensorRTProviderOptionsV2 instance.
// (The reason is string copy is involved, for example params.trt_engine_cache_path = cache_path.c_str() and those std::string variable is referenced by OrtTensorRTProviderOptionsV2 instance
// and TRT EP instance, so it won't be released.)
std::string calibration_table, cache_path, lib_path, trt_tactic_sources, trt_extra_plugin_lib_paths, min_profile, max_profile, opt_profile;
std::string calibration_table, cache_path, timing_cache_path, lib_path, trt_tactic_sources, trt_extra_plugin_lib_paths, min_profile, max_profile, opt_profile;
auto it = provider_options_map.find(type);
if (it != provider_options_map.end()) {
OrtTensorRTProviderOptionsV2 params;
@ -623,6 +623,13 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_timing_cache_enable' should be 'True' or 'False'. Default value is 'False'.\n");
}
} else if (option.first == "trt_timing_cache_path") {
if (!option.second.empty()) {
timing_cache_path = option.second;
params.trt_timing_cache_path = timing_cache_path.c_str();
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_timing_cache_path' should be a path string i.e. 'cache_folder/'.\n");
}
} else if (option.first == "trt_force_timing_cache") {
if (option.second == "True" || option.second == "true") {
params.trt_force_timing_cache = true;

View file

@ -6,6 +6,7 @@
#include <algorithm>
#include <limits>
#include <set>
#include <list>
#include <type_traits>
#include <core/session/onnxruntime_cxx_api.h>
#include "core/session/onnxruntime_session_options_config_keys.h"
@ -100,36 +101,28 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
const auto& api = Ort::GetApi();
OrtCUDAProviderOptionsV2* cuda_options;
Ort::ThrowOnError(api.CreateCUDAProviderOptions(&cuda_options));
const char* cudnn_conv_algo_search = "cudnn_conv_algo_search";
const char* default_conv = "DEFAULT";
const char* benchmarking = "EXHAUSTIVE";
const char* heuristic = "HEURISTIC";
std::vector<const char*> option_keys, option_values;
// used to keep all option keys and value strings alive
std::list<std::string> buffer;
buffer.emplace_back("cudnn_conv_algo_search");
option_keys.push_back(buffer.back().c_str());
switch (performance_test_config.run_config.cudnn_conv_algo) {
case 0:
Ort::ThrowOnError(
api.UpdateCUDAProviderOptions(cuda_options, &cudnn_conv_algo_search, &benchmarking, 1));
buffer.emplace_back("EXHAUSTIVE");
break;
case 1:
Ort::ThrowOnError(
api.UpdateCUDAProviderOptions(cuda_options, &cudnn_conv_algo_search, &heuristic, 1));
buffer.emplace_back("HEURISTIC");
break;
default:
Ort::ThrowOnError(
api.UpdateCUDAProviderOptions(cuda_options, &cudnn_conv_algo_search, &default_conv, 1));
buffer.emplace_back("DEFAULT");
break;
}
option_values.push_back(buffer.back().c_str());
const char* do_copy_in_default_stream = "do_copy_in_default_stream";
if (performance_test_config.run_config.do_cuda_copy_in_separate_stream) {
const char* v = "1";
Ort::ThrowOnError(
api.UpdateCUDAProviderOptions(cuda_options, &do_copy_in_default_stream, &v, 1));
} else {
const char* v = "0";
Ort::ThrowOnError(
api.UpdateCUDAProviderOptions(cuda_options, &do_copy_in_default_stream, &v, 1));
}
buffer.emplace_back("do_copy_in_default_stream");
option_keys.push_back(buffer.back().c_str());
buffer.emplace_back(performance_test_config.run_config.do_cuda_copy_in_separate_stream ? "1" : "0");
option_values.push_back(buffer.back().c_str());
#ifdef _MSC_VER
std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string);
@ -148,51 +141,34 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
"[ERROR] [CUDA] Use a '|' to separate the key and value for the run-time option you are trying to use.\n");
}
auto key = token.substr(0, pos);
auto value = token.substr(pos + 1);
auto key_p = key.c_str();
auto value_p = value.c_str();
Ort::ThrowOnError(
api.UpdateCUDAProviderOptions(cuda_options, &key_p, &value_p, 1));
buffer.emplace_back(token.substr(0, pos));
option_keys.push_back(buffer.back().c_str());
buffer.emplace_back(token.substr(pos + 1));
option_values.push_back(buffer.back().c_str());
}
Ort::Status status(api.UpdateCUDAProviderOptions(cuda_options,
option_keys.data(), option_values.data(), option_keys.size()));
if (!status.IsOK()) {
OrtAllocator* allocator;
char* options;
Ort::ThrowOnError(api.GetAllocatorWithDefaultOptions(&allocator));
Ort::ThrowOnError(api.GetCUDAProviderOptionsAsString(cuda_options, allocator, &options));
ORT_THROW("[ERROR] [CUDA] Configuring the CUDA options failed with message: ", status.GetErrorMessage(),
"\nSupported options are:\n", options);
}
session_options.AppendExecutionProvider_CUDA_V2(*cuda_options);
#else
ORT_THROW("CUDA is not supported in this build\n");
#endif
} else if (provider_name == onnxruntime::kTensorrtExecutionProvider) {
#ifdef USE_TENSORRT
int device_id = 0;
int trt_max_partition_iterations = 1000;
int trt_min_subgraph_size = 1;
size_t trt_max_workspace_size = 1 << 30;
bool trt_fp16_enable = false;
bool trt_int8_enable = false;
std::string trt_int8_calibration_table_name = "";
bool trt_int8_use_native_calibration_table = false;
bool trt_dla_enable = false;
int trt_dla_core = 0;
bool trt_dump_subgraphs = false;
bool trt_engine_cache_enable = false;
std::string trt_engine_cache_path = "";
bool trt_engine_decryption_enable = false;
std::string trt_engine_decryption_lib_path = "";
bool trt_force_sequential_engine_build = false;
bool trt_context_memory_sharing_enable = false;
bool trt_layer_norm_fp32_fallback = false;
bool trt_timing_cache_enable = false;
bool trt_force_timing_cache = false;
bool trt_detailed_build_log = false;
bool trt_build_heuristics_enable = false;
bool trt_sparsity_enable = false;
int trt_builder_optimization_level = 3;
int trt_auxiliary_streams = -1;
std::string trt_tactic_sources = "";
std::string trt_extra_plugin_lib_paths = "";
std::string trt_profile_min_shapes = "";
std::string trt_profile_max_shapes = "";
std::string trt_profile_opt_shapes = "";
bool trt_cuda_graph_enable = false;
const auto& api = Ort::GetApi();
OrtTensorRTProviderOptionsV2* tensorrt_options;
Ort::ThrowOnError(api.CreateTensorRTProviderOptions(&tensorrt_options));
std::vector<const char*> option_keys, option_values;
// used to keep all option keys and value strings alive
std::list<std::string> buffer;
#ifdef _MSC_VER
std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string);
@ -207,272 +183,31 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
}
auto pos = token.find("|");
if (pos == std::string::npos || pos == 0 || pos == token.length()) {
ORT_THROW("[ERROR] [TensorRT] Use a '|' to separate the key and value for the run-time option you are trying to use.\n");
ORT_THROW(
"[ERROR] [TensorRT] Use a '|' to separate the key and value for the run-time option you are trying to use.\n");
}
auto key = token.substr(0, pos);
auto value = token.substr(pos + 1);
if (key == "device_id") {
if (!value.empty()) {
device_id = std::stoi(value);
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'device_id' should be a number.\n");
}
} else if (key == "trt_max_partition_iterations") {
if (!value.empty()) {
trt_max_partition_iterations = std::stoi(value);
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_partition_iterations' should be a number.\n");
}
} else if (key == "trt_min_subgraph_size") {
if (!value.empty()) {
trt_min_subgraph_size = std::stoi(value);
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_min_subgraph_size' should be a number.\n");
}
} else if (key == "trt_max_workspace_size") {
if (!value.empty()) {
trt_max_workspace_size = std::stoull(value);
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_workspace_size' should be a number.\n");
}
} else if (key == "trt_fp16_enable") {
if (value == "true" || value == "True") {
trt_fp16_enable = true;
} else if (value == "false" || value == "False") {
trt_fp16_enable = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_fp16_enable' should be a boolean i.e. true or false. Default value is false.\n");
}
} else if (key == "trt_int8_enable") {
if (value == "true" || value == "True") {
trt_int8_enable = true;
} else if (value == "false" || value == "False") {
trt_int8_enable = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_enable' should be a boolean i.e. true or false. Default value is false.\n");
}
} else if (key == "trt_int8_calibration_table_name") {
if (!value.empty()) {
trt_int8_calibration_table_name = value;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_calibration_table_name' should be a non-empty string.\n");
}
} else if (key == "trt_int8_use_native_calibration_table") {
if (value == "true" || value == "True") {
trt_int8_use_native_calibration_table = true;
} else if (value == "false" || value == "False") {
trt_int8_use_native_calibration_table = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_use_native_calibration_table' should be a boolean i.e. true or false. Default value is false.\n");
}
} else if (key == "trt_dla_enable") {
if (value == "true" || value == "True") {
trt_dla_enable = true;
} else if (value == "false" || value == "False") {
trt_dla_enable = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_enable' should be a boolean i.e. true or false. Default value is false.\n");
}
} else if (key == "trt_dla_core") {
if (!value.empty()) {
trt_dla_core = std::stoi(value);
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_core' should be a number.\n");
}
} else if (key == "trt_dump_subgraphs") {
if (value == "true" || value == "True") {
trt_dump_subgraphs = true;
} else if (value == "false" || value == "False") {
trt_dump_subgraphs = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dump_subgraphs' should be a boolean i.e. true or false. Default value is false.\n");
}
} else if (key == "trt_engine_cache_enable") {
if (value == "true" || value == "True") {
trt_engine_cache_enable = true;
} else if (value == "false" || value == "False") {
trt_engine_cache_enable = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_enable' should be a boolean i.e. true or false. Default value is false.\n");
}
} else if (key == "trt_engine_cache_path") {
if (!value.empty()) {
trt_engine_cache_path = value;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_path' should be a non-empty string.\n");
}
} else if (key == "trt_engine_decryption_enable") {
if (value == "true" || value == "True") {
trt_engine_decryption_enable = true;
} else if (value == "false" || value == "False") {
trt_engine_decryption_enable = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_enable' should be a boolean i.e. true or false. Default value is false.\n");
}
} else if (key == "trt_engine_decryption_lib_path") {
if (!value.empty()) {
trt_engine_decryption_lib_path = value;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_lib_path' should be a non-empty string.\n");
}
} else if (key == "trt_force_sequential_engine_build") {
if (value == "true" || value == "True") {
trt_force_sequential_engine_build = true;
} else if (value == "false" || value == "False") {
trt_force_sequential_engine_build = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_force_sequential_engine_build' should be a boolean i.e. true or false. Default value is false.\n");
}
} else if (key == "trt_context_memory_sharing_enable") {
if (value == "true" || value == "True") {
trt_context_memory_sharing_enable = true;
} else if (value == "false" || value == "False") {
trt_context_memory_sharing_enable = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_context_memory_sharing_enable' should be a boolean i.e. true or false. Default value is false.\n");
}
} else if (key == "trt_layer_norm_fp32_fallback") {
if (value == "true" || value == "True") {
trt_layer_norm_fp32_fallback = true;
} else if (value == "false" || value == "False") {
trt_layer_norm_fp32_fallback = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_layer_norm_fp32_fallback' should be a boolean i.e. true or false. Default value is false.\n");
}
} else if (key == "trt_timing_cache_enable") {
if (value == "true" || value == "True") {
trt_timing_cache_enable = true;
} else if (value == "false" || value == "False") {
trt_timing_cache_enable = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_timing_cache_enable' should be a boolean i.e. true or false. Default value is false.\n");
}
} else if (key == "trt_force_timing_cache") {
if (value == "true" || value == "True") {
trt_force_timing_cache = true;
} else if (value == "false" || value == "False") {
trt_force_timing_cache = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_force_timing_cache' should be a boolean i.e. true or false. Default value is false.\n");
}
} else if (key == "trt_detailed_build_log") {
if (value == "true" || value == "True") {
trt_detailed_build_log = true;
} else if (value == "false" || value == "False") {
trt_detailed_build_log = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_detailed_build_log' should be a boolean i.e. true or false. Default value is false.\n");
}
} else if (key == "trt_build_heuristics_enable") {
if (value == "true" || value == "True") {
trt_build_heuristics_enable = true;
} else if (value == "false" || value == "False") {
trt_build_heuristics_enable = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_build_heuristics_enable' should be a boolean i.e. true or false. Default value is false.\n");
}
} else if (key == "trt_sparsity_enable") {
if (value == "true" || value == "True") {
trt_sparsity_enable = true;
} else if (value == "false" || value == "False") {
trt_sparsity_enable = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_sparsity_enable' should be a boolean i.e. true or false. Default value is false.\n");
}
} else if (key == "trt_builder_optimization_level") {
if (!value.empty()) {
trt_builder_optimization_level = std::stoi(value);
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_builder_optimization_level' should be a number and default to 2.\n");
}
} else if (key == "trt_auxiliary_streams") {
if (!value.empty()) {
trt_auxiliary_streams = std::stoi(value);
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_auxiliary_streams' should be a number.\n");
}
} else if (key == "trt_tactic_sources") {
if (!value.empty()) {
trt_tactic_sources = value;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_tactic_sources' should be a non-empty string.\n");
}
} else if (key == "trt_extra_plugin_lib_paths") {
if (!value.empty()) {
trt_extra_plugin_lib_paths = value;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_extra_plugin_lib_paths' should be a non-empty string.\n");
}
} else if (key == "trt_profile_min_shapes") {
if (!value.empty()) {
trt_profile_min_shapes = value;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_profile_min_shapes' should be a non-empty string.\n");
}
} else if (key == "trt_profile_max_shapes") {
if (!value.empty()) {
trt_profile_max_shapes = value;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_profile_max_shapes' should be a non-empty string.\n");
}
} else if (key == "trt_profile_opt_shapes") {
if (!value.empty()) {
trt_profile_opt_shapes = value;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_profile_opt_shapes' should be a non-empty string.\n");
}
} else if (key == "trt_cuda_graph_enable") {
if (value == "true" || value == "True") {
trt_cuda_graph_enable = true;
} else if (value == "false" || value == "False") {
trt_cuda_graph_enable = false;
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_cuda_graph_enable' should be a boolean i.e. true or false. Default value is false.\n");
}
} else {
ORT_THROW("[ERROR] [TensorRT] wrong key type entered. Choose from the following runtime key options that are available for TensorRT. ['device_id', 'trt_max_partition_iterations', 'trt_min_subgraph_size', 'trt_max_workspace_size', 'trt_fp16_enable', 'trt_int8_enable', 'trt_int8_calibration_table_name', 'trt_int8_use_native_calibration_table', 'trt_dla_enable', 'trt_dla_core', 'trt_dump_subgraphs', 'trt_engine_cache_enable', 'trt_engine_cache_path', 'trt_engine_decryption_enable', 'trt_engine_decryption_lib_path', 'trt_force_sequential_engine_build', 'trt_context_memory_sharing_enable', 'trt_layer_norm_fp32_fallback', 'trt_timing_cache_enable', 'trt_force_timing_cache', 'trt_detailed_build_log', 'trt_build_heuristics_enable', 'trt_sparsity_enable', 'trt_builder_optimization_level', 'trt_auxiliary_streams', 'trt_tactic_sources', 'trt_extra_plugin_lib_paths', 'trt_profile_min_shapes', 'trt_profile_max_shapes', 'trt_profile_opt_shapes', 'trt_cuda_graph_enable'] \n");
}
buffer.emplace_back(token.substr(0, pos));
option_keys.push_back(buffer.back().c_str());
buffer.emplace_back(token.substr(pos + 1));
option_values.push_back(buffer.back().c_str());
}
OrtTensorRTProviderOptionsV2 tensorrt_options;
tensorrt_options.device_id = device_id;
tensorrt_options.has_user_compute_stream = 0;
tensorrt_options.user_compute_stream = nullptr;
tensorrt_options.trt_max_partition_iterations = trt_max_partition_iterations;
tensorrt_options.trt_min_subgraph_size = trt_min_subgraph_size;
tensorrt_options.trt_max_workspace_size = trt_max_workspace_size;
tensorrt_options.trt_fp16_enable = trt_fp16_enable;
tensorrt_options.trt_int8_enable = trt_int8_enable;
tensorrt_options.trt_int8_calibration_table_name = trt_int8_calibration_table_name.c_str();
tensorrt_options.trt_int8_use_native_calibration_table = trt_int8_use_native_calibration_table;
tensorrt_options.trt_dla_enable = trt_dla_enable;
tensorrt_options.trt_dla_core = trt_dla_core;
tensorrt_options.trt_dump_subgraphs = trt_dump_subgraphs;
tensorrt_options.trt_engine_cache_enable = trt_engine_cache_enable;
tensorrt_options.trt_engine_cache_path = trt_engine_cache_path.c_str();
tensorrt_options.trt_engine_decryption_enable = trt_engine_decryption_enable;
tensorrt_options.trt_engine_decryption_lib_path = trt_engine_decryption_lib_path.c_str();
tensorrt_options.trt_force_sequential_engine_build = trt_force_sequential_engine_build;
tensorrt_options.trt_context_memory_sharing_enable = trt_context_memory_sharing_enable;
tensorrt_options.trt_layer_norm_fp32_fallback = trt_layer_norm_fp32_fallback;
tensorrt_options.trt_timing_cache_enable = trt_timing_cache_enable;
tensorrt_options.trt_force_timing_cache = trt_force_timing_cache;
tensorrt_options.trt_detailed_build_log = trt_detailed_build_log;
tensorrt_options.trt_build_heuristics_enable = trt_build_heuristics_enable;
tensorrt_options.trt_sparsity_enable = trt_sparsity_enable;
tensorrt_options.trt_builder_optimization_level = trt_builder_optimization_level;
tensorrt_options.trt_auxiliary_streams = trt_auxiliary_streams;
tensorrt_options.trt_tactic_sources = trt_tactic_sources.c_str();
tensorrt_options.trt_extra_plugin_lib_paths = trt_extra_plugin_lib_paths.c_str();
tensorrt_options.trt_profile_min_shapes = trt_profile_min_shapes.c_str();
tensorrt_options.trt_profile_max_shapes = trt_profile_max_shapes.c_str();
tensorrt_options.trt_profile_opt_shapes = trt_profile_opt_shapes.c_str();
tensorrt_options.trt_cuda_graph_enable = trt_cuda_graph_enable;
session_options.AppendExecutionProvider_TensorRT_V2(tensorrt_options);
Ort::Status status(api.UpdateTensorRTProviderOptions(tensorrt_options,
option_keys.data(), option_values.data(), option_keys.size()));
if (!status.IsOK()) {
OrtAllocator* allocator;
char* options;
Ort::ThrowOnError(api.GetAllocatorWithDefaultOptions(&allocator));
Ort::ThrowOnError(api.GetTensorRTProviderOptionsAsString(tensorrt_options, allocator, &options));
ORT_THROW("[ERROR] [TensorRT] Configuring the CUDA options failed with message: ", status.GetErrorMessage(),
"\nSupported options are:\n", options);
}
session_options.AppendExecutionProvider_TensorRT_V2(*tensorrt_options);
OrtCUDAProviderOptions cuda_options;
cuda_options.device_id = device_id;
cuda_options.device_id = tensorrt_options->device_id;
cuda_options.cudnn_conv_algo_search = static_cast<OrtCudnnConvAlgoSearch>(performance_test_config.run_config.cudnn_conv_algo);
cuda_options.do_copy_in_default_stream = !performance_test_config.run_config.do_cuda_copy_in_separate_stream;
// TODO: Support arena configuration for users of perf test

View file

@ -590,6 +590,7 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) {
// uint64_t compilation_without_cache_ms, compilation_with_cache_ms;
// First session is created with TRT EP with timing cache enabled
// Not specifying a trt_timing_cache_path will result in using the working directory
params.trt_timing_cache_enable = 1;
{
// auto start = chrono::steady_clock::now();