mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-23 02:38:28 +00:00
Exposing new TRT build options (#15089)
### Description
This will add a few TRT options, some of them are only available on TRT
8.6:
- heuristics
- sparsity
- optimization level (8.6 only)
- auxiliary stream (8.6 only)
- tactic source selection
I am no sure yet which tests is should add for these options. As those
are mostly simple TRT flags i am not sure to what level i should test.
For heuristics something similar to
44dda08b51/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc (L510-L538)
should be possible for, but for all other essentially we would only be
testing if there is a crash or not if the option is set.
Also if i forgot some option that would be good to have feel free to
speak up !
This commit is contained in:
parent
4e1f75810c
commit
fbe88fccbd
13 changed files with 363 additions and 20 deletions
|
|
@ -34,4 +34,10 @@ struct OrtTensorRTProviderOptionsV2 {
|
|||
int trt_timing_cache_enable; // enable TensorRT timing cache. Default 0 = false, nonzero = true
|
||||
int trt_force_timing_cache; // force the TensorRT cache to be used even if device profile does not match. Default 0 = false, nonzero = true
|
||||
int trt_detailed_build_log; // Enable detailed build step logging on TensorRT EP with timing for each engine build. Default 0 = false, nonzero = true
|
||||
int trt_build_heuristics_enable; // Build engine using heuristics to reduce build time. Default 0 = false, nonzero = true
|
||||
int trt_sparsity_enable; // Control if sparsity can be used by TRT. Default 0 = false, 1 = true
|
||||
int trt_builder_optimization_level; // Set the builder optimization level. WARNING: levels below 2 do not guarantee good engine performance, but greatly improve build time. Default 2, valid range [0-4]
|
||||
int trt_auxiliary_streams; // Set maximum number of auxiliary streams per inference stream. Setting this value to 0 will lead to optimal memory usage. Default -1 = heuristics
|
||||
const char* trt_tactic_sources; // pecify the tactics to be used by adding (+) or removing (-) tactics from the default
|
||||
// tactic sources (default = all available tactics) e.g. "-CUDNN,+CUBLAS" available keys: "CUBLAS"|"CUBLAS_LT"|"CUDNN"|"EDGE_MASK_CONVOLUTIONS"
|
||||
};
|
||||
|
|
|
|||
|
|
@ -2873,7 +2873,7 @@ struct OrtApi {
|
|||
*
|
||||
* For example, "trt_max_workspace_size=2147483648;trt_max_partition_iterations=10;trt_int8_enable=1;......"
|
||||
*
|
||||
* \param tensorrt_options - OrTensorRTProviderOptionsV2 instance
|
||||
* \param tensorrt_options - OrtTensorRTProviderOptionsV2 instance
|
||||
* \param allocator - a ptr to an instance of OrtAllocator obtained with OrtApi::CreateAllocator or OrtApi::GetAllocatorWithDefaultOptions
|
||||
* the specified allocator will be used to allocate continuous buffers for output strings and lengths.
|
||||
* \param ptr - is a UTF-8 null terminated string allocated using 'allocator'. The caller is responsible for using the same allocator to free it.
|
||||
|
|
|
|||
|
|
@ -118,6 +118,67 @@ bool SetDynamicRange(nvinfer1::INetworkDefinition& network, std::unordered_map<s
|
|||
return true;
|
||||
}
|
||||
|
||||
std::vector<std::string> SplitToStringVec(std::string const& s, char separator) {
|
||||
std::vector<std::string> splitted;
|
||||
|
||||
for (size_t start = 0; start < s.length();) {
|
||||
size_t separatorIndex = s.find(separator, start);
|
||||
if (separatorIndex == std::string::npos) {
|
||||
separatorIndex = s.length();
|
||||
}
|
||||
splitted.emplace_back(s.substr(start, separatorIndex - start));
|
||||
start = separatorIndex + 1;
|
||||
}
|
||||
|
||||
return splitted;
|
||||
}
|
||||
|
||||
nvinfer1::TacticSources GetTacticSourceFromString(std::string& tactic_sting) {
|
||||
nvinfer1::TacticSources disabledTactics = 0;
|
||||
nvinfer1::TacticSources enabledTactics = 0;
|
||||
std::vector<std::string> tacticList = SplitToStringVec(tactic_sting, ',');
|
||||
for (auto& t : tacticList) {
|
||||
bool enable{false};
|
||||
if (t.front() == '+') {
|
||||
enable = true;
|
||||
} else if (t.front() != '-') {
|
||||
LOGS_DEFAULT(WARNING) << "[TensorRT EP] Tactic source must be prefixed with + or - skipping: " << t;
|
||||
}
|
||||
t.erase(0, 1);
|
||||
|
||||
const auto toUpper = [](std::string& sourceName) {
|
||||
std::transform(
|
||||
sourceName.begin(), sourceName.end(), sourceName.begin(), [](char c) { return std::toupper(c); });
|
||||
return sourceName;
|
||||
};
|
||||
|
||||
nvinfer1::TacticSource source{};
|
||||
t = toUpper(t);
|
||||
if (t == "CUBLAS") {
|
||||
source = nvinfer1::TacticSource::kCUBLAS;
|
||||
} else if (t == "CUBLASLT" || t == "CUBLAS_LT") {
|
||||
source = nvinfer1::TacticSource::kCUBLAS_LT;
|
||||
} else if (t == "CUDNN") {
|
||||
source = nvinfer1::TacticSource::kCUDNN;
|
||||
} else if (t == "EDGE_MASK_CONVOLUTIONS") {
|
||||
source = nvinfer1::TacticSource::kEDGE_MASK_CONVOLUTIONS;
|
||||
} else if (t == "JIT_CONVOLUTIONS") {
|
||||
source = nvinfer1::TacticSource::kJIT_CONVOLUTIONS;
|
||||
} else {
|
||||
LOGS_DEFAULT(WARNING) << "[TensorRT EP] Tactic source was not found with name: " << t;
|
||||
}
|
||||
|
||||
uint32_t sourceBit = 1U << static_cast<uint32_t>(source);
|
||||
|
||||
if (enable) {
|
||||
enabledTactics |= sourceBit;
|
||||
} else {
|
||||
disabledTactics |= sourceBit;
|
||||
}
|
||||
}
|
||||
return enabledTactics & ~disabledTactics;
|
||||
}
|
||||
|
||||
inline std::vector<char> loadTimingCacheFile(const std::string inFileName) {
|
||||
std::ifstream iFile(inFileName, std::ios::in | std::ios::binary);
|
||||
if (!iFile) {
|
||||
|
|
@ -353,6 +414,11 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
|
|||
if (fp16_enable_) {
|
||||
layer_norm_fp32_fallback_ = info.layer_norm_fp32_fallback;
|
||||
}
|
||||
build_heuristics_enable_ = info.build_heuristics_enable;
|
||||
sparsity_enable_ = info.sparsity_enable;
|
||||
builder_optimization_level_ = info.builder_optimization_level;
|
||||
auxiliary_streams_ = info.auxiliary_streams;
|
||||
tactic_sources_ = info.tactic_sources;
|
||||
} else {
|
||||
const std::string max_partition_iterations_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kMaxPartitionIterations);
|
||||
if (!max_partition_iterations_env.empty()) {
|
||||
|
|
@ -462,6 +528,31 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
|
|||
if (!layer_norm_fp32_fallback_env.empty()) {
|
||||
layer_norm_fp32_fallback_ = (std::stoi(layer_norm_fp32_fallback_env) == 0 ? false : true);
|
||||
}
|
||||
|
||||
const std::string build_heuristics_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kBuildHeuristics);
|
||||
if (!build_heuristics_env.empty()) {
|
||||
build_heuristics_enable_ = (std::stoi(build_heuristics_env) == 0 ? false : true);
|
||||
}
|
||||
|
||||
const std::string sparsity_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kSparsityEnable);
|
||||
if (!sparsity_enable_env.empty()) {
|
||||
sparsity_enable_ = (std::stoi(sparsity_enable_env) == 0 ? false : true);
|
||||
}
|
||||
|
||||
const std::string builder_optimization_level_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kBuilderOptimizationLevel);
|
||||
if (!builder_optimization_level_env.empty()) {
|
||||
builder_optimization_level_ = std::stoi(builder_optimization_level_env);
|
||||
}
|
||||
|
||||
const std::string auxiliary_streams_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kAuxiliaryStreams);
|
||||
if (!auxiliary_streams_env.empty()) {
|
||||
auxiliary_streams_ = std::stoi(auxiliary_streams_env);
|
||||
}
|
||||
|
||||
const std::string tactic_sources_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kTacticSources);
|
||||
if (!tactic_sources_env.empty()) {
|
||||
tactic_sources_ = tactic_sources_env;
|
||||
}
|
||||
}
|
||||
|
||||
// Validate setting
|
||||
|
|
@ -527,7 +618,12 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
|
|||
<< ", trt_engine_decryption_lib_path: " << engine_decryption_lib_path_
|
||||
<< ", trt_force_sequential_engine_build: " << force_sequential_engine_build_
|
||||
<< ", trt_context_memory_sharing_enable: " << context_memory_sharing_enable_
|
||||
<< ", trt_layer_norm_fp32_fallback: " << layer_norm_fp32_fallback_;
|
||||
<< ", trt_layer_norm_fp32_fallback: " << layer_norm_fp32_fallback_
|
||||
<< ", trt_build_heuristics_enable: " << build_heuristics_enable_
|
||||
<< ", trt_sparsity_enable: " << sparsity_enable_
|
||||
<< ", trt_builder_optimization_level: " << builder_optimization_level_
|
||||
<< ", trt_auxiliary_streams: " << auxiliary_streams_
|
||||
<< ", trt_tactic_sources: " << tactic_sources_;
|
||||
}
|
||||
|
||||
TensorrtExecutionProvider::~TensorrtExecutionProvider() {
|
||||
|
|
@ -1410,6 +1506,45 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
|
|||
}
|
||||
}
|
||||
|
||||
// enable sparse weights
|
||||
if (sparsity_enable_) {
|
||||
trt_config->setFlag(nvinfer1::BuilderFlag::kSPARSE_WEIGHTS);
|
||||
LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Sparse weights are allowed";
|
||||
}
|
||||
|
||||
// enable builder heuristics
|
||||
if (build_heuristics_enable_) {
|
||||
trt_config->setFlag(nvinfer1::BuilderFlag::kENABLE_TACTIC_HEURISTIC );
|
||||
LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Builder heuristics are enabled";
|
||||
}
|
||||
#if NV_TENSORRT_MINOR > 5 && NV_TENSORRT_MAJOR >= 8
|
||||
// switch optimizaion level
|
||||
if (builder_optimization_level_ != 2) {
|
||||
trt_config->setBuilderOptimizationLevel(builder_optimization_level_);
|
||||
LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Builder optimization level is set to " << builder_optimization_level_;
|
||||
}
|
||||
|
||||
// limit auxiliary streams
|
||||
if (auxiliary_streams_ >= 0) {
|
||||
trt_config->setMaxAuxStreams(auxiliary_streams_);
|
||||
LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Auxiliary streams are se to " << auxiliary_streams_;
|
||||
}
|
||||
#else
|
||||
if (builder_optimization_level_ != 2) {
|
||||
LOGS_DEFAULT(WARNING) << "[TensorRT EP] Builder optimization level can only be used on TRT 8.6 onwards!";
|
||||
}
|
||||
if (auxiliary_streams_ >= 0) {
|
||||
LOGS_DEFAULT(WARNING) << "[TensorRT EP] Auxiliary streams can only be set on TRT 8.6 onwards!";
|
||||
}
|
||||
#endif
|
||||
// limit used tactic sources
|
||||
if (!tactic_sources_.empty()) {
|
||||
nvinfer1::TacticSources tactics = trt_config->getTacticSources();
|
||||
tactics |= GetTacticSourceFromString(tactic_sources_);
|
||||
trt_config->setTacticSources(tactics);
|
||||
LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Tactic sources are limited using " << tactic_sources_;
|
||||
}
|
||||
|
||||
// Build TRT engine here if the graph doesn't have dynamic shape input. Otherwise engine will
|
||||
// be built at runtime
|
||||
std::unique_ptr<nvinfer1::ICudaEngine> trt_engine;
|
||||
|
|
@ -1584,6 +1719,11 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
|
|||
NodeComputeInfo compute_info;
|
||||
compute_info.create_state_func = [=](ComputeContext* context, FunctionState* state) {
|
||||
std::unique_ptr<TensorrtFuncState> p = std::make_unique<TensorrtFuncState>();
|
||||
// translate tactic sources string to nvinfer1::TacticSources
|
||||
nvinfer1::TacticSources tactics = 0;
|
||||
if (!tactic_sources_.empty()) {
|
||||
tactics = GetTacticSourceFromString(tactic_sources_);
|
||||
}
|
||||
*p = {context->allocate_func, context->release_func, context->allocator_handle, &parsers_[context->node_name],
|
||||
&engines_[context->node_name], &contexts_[context->node_name], &builders_[context->node_name],
|
||||
&networks_[context->node_name], input_info_[context->node_name], output_info_[context->node_name],
|
||||
|
|
@ -1591,7 +1731,8 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
|
|||
dla_enable_, dla_core_, &max_workspace_size_, trt_node_name_with_precision, engine_cache_enable_, cache_path_,
|
||||
runtime_.get(), nullptr, allocator_, context_memory_sharing_enable_, &max_ctx_mem_size_, &context_memory_,
|
||||
dynamic_range_map, engine_decryption_enable_, engine_decryption_, engine_encryption_, timing_cache_enable_,
|
||||
force_timing_cache_match_, detailed_build_log_};
|
||||
force_timing_cache_match_, detailed_build_log_, build_heuristics_enable_, sparsity_enable_,
|
||||
builder_optimization_level_, auxiliary_streams_ , !tactic_sources_.empty(), tactics};
|
||||
*state = p.release();
|
||||
return 0;
|
||||
};
|
||||
|
|
@ -1872,6 +2013,45 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
|
|||
trt_config->setDLACore(trt_state->dla_core);
|
||||
}
|
||||
|
||||
// enable sparse weights
|
||||
if (trt_state->sparsity_enable) {
|
||||
trt_config->setFlag(nvinfer1::BuilderFlag::kSPARSE_WEIGHTS);
|
||||
LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Sparse weights are allowed";
|
||||
}
|
||||
|
||||
// enable builder heuristics
|
||||
if (trt_state->build_heuristics_enable) {
|
||||
trt_config->setFlag(nvinfer1::BuilderFlag::kENABLE_TACTIC_HEURISTIC );
|
||||
LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Builder heuristics are enabled";
|
||||
}
|
||||
#if NV_TENSORRT_MINOR > 5 && NV_TENSORRT_MAJOR >= 8
|
||||
// switch optimizaion level
|
||||
if (trt_state->builder_optimization_level != 2) {
|
||||
trt_config->setBuilderOptimizationLevel(trt_state->builder_optimization_level);
|
||||
LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Builder optimization level is set to " << builder_optimization_level_;
|
||||
}
|
||||
|
||||
// limit auxiliary streams
|
||||
if (trt_state->auxiliary_streams >= 0) {
|
||||
trt_config->setMaxAuxStreams(trt_state->auxiliary_streams);
|
||||
LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Auxiliary streams are se to " << trt_state->auxiliary_streams;
|
||||
}
|
||||
#else
|
||||
if (trt_state->builder_optimization_level != 2) {
|
||||
LOGS_DEFAULT(WARNING) << "[TensorRT EP] Builder optimization level can only be used on TRT 8.6 onwards!";
|
||||
}
|
||||
if (trt_state->auxiliary_streams >= 0) {
|
||||
LOGS_DEFAULT(WARNING) << "[TensorRT EP] Auxiliary streams can only be set on TRT 8.6 onwards!";
|
||||
}
|
||||
#endif
|
||||
// limit used tactic sources
|
||||
if (trt_state->filter_tactic_sources) {
|
||||
nvinfer1::TacticSources tactics = trt_config->getTacticSources();
|
||||
tactics |= trt_state->tactic_sources;
|
||||
trt_config->setTacticSources(tactics);
|
||||
LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Tactic sources are limited using bitmask " << tactics;
|
||||
}
|
||||
|
||||
// Load timing cache from file. Create a fresh cache if the file doesn't exist
|
||||
std::unique_ptr<nvinfer1::ITimingCache> timing_cache = nullptr;
|
||||
if (trt_state->timing_cache_enable) {
|
||||
|
|
|
|||
|
|
@ -33,6 +33,11 @@ static const std::string kLayerNormFP32Fallback = "ORT_TENSORRT_LAYER_NORM_FP32_
|
|||
static const std::string kTimingCacheEnable = "ORT_TENSORRT_TIMING_CACHE_ENABLE";
|
||||
static const std::string kForceTimingCache = "ORT_TENSORRT_FORCE_TIMING_CACHE_ENABLE";
|
||||
static const std::string kDetailedBuildLog = "ORT_TENSORRT_DETAILED_BUILD_LOG_ENABLE";
|
||||
static const std::string kBuildHeuristics = "ORT_TENSORRT_BUILD_HEURISTICS_ENABLE";
|
||||
static const std::string kSparsityEnable = "ORT_TENSORRT_SPARSITY_ENABLE";
|
||||
static const std::string kBuilderOptimizationLevel = "ORT_TENSORRT_BUILDER_OPTIMIZATION_LEVEL";
|
||||
static const std::string kAuxiliaryStreams = "ORT_TENSORRT_AUXILIARY_STREAMS";
|
||||
static const std::string kTacticSources = "ORT_TENSORRT_TACTIC_SOURCES";
|
||||
// Old env variable for backward compatibility
|
||||
static const std::string kEngineCachePath = "ORT_TENSORRT_ENGINE_CACHE_PATH";
|
||||
} // namespace tensorrt_env_vars
|
||||
|
|
@ -120,6 +125,12 @@ struct TensorrtFuncState {
|
|||
bool timing_cache_enable = true;
|
||||
bool force_timing_cache = false;
|
||||
bool detailed_build_log = false;
|
||||
bool build_heuristics_enable = false;
|
||||
bool sparsity_enable = false;
|
||||
int builder_optimization_level = 2;
|
||||
int auxiliary_streams = -1;
|
||||
bool filter_tactic_sources = false;
|
||||
nvinfer1::TacticSources tactic_sources;
|
||||
};
|
||||
|
||||
// Logical device representation.
|
||||
|
|
@ -169,6 +180,11 @@ class TensorrtExecutionProvider : public IExecutionProvider {
|
|||
bool int8_use_native_tensorrt_calibration_table_ = false;
|
||||
bool dump_subgraphs_ = false;
|
||||
bool engine_cache_enable_ = false;
|
||||
bool build_heuristics_enable_ = false;
|
||||
bool sparsity_enable_ = false;
|
||||
int builder_optimization_level_ = 2;
|
||||
int auxiliary_streams_ = -1;
|
||||
std::string tactic_sources_;
|
||||
std::string cache_path_, engine_decryption_lib_path_;
|
||||
std::unique_ptr<nvinfer1::IRuntime> runtime_ = nullptr;
|
||||
OrtMutex tensorrt_mu_;
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
// Licensed under the MIT License.
|
||||
|
||||
#include "core/providers/tensorrt/tensorrt_execution_provider_info.h"
|
||||
#include "core/providers/tensorrt/tensorrt_provider_options.h"
|
||||
|
||||
#include "core/common/make_string.h"
|
||||
#include "core/common/parse_string.h"
|
||||
|
|
@ -33,6 +34,11 @@ constexpr const char* kLayerNormFP32Fallback = "trt_layer_norm_fp32_fallback";
|
|||
constexpr const char* kTimingCacheEnable = "trt_timing_cache_enable";
|
||||
constexpr const char* kForceTimingCacheMatch = "trt_force_timing_cache_match";
|
||||
constexpr const char* kDetailedBuildLog = "trt_detailed_build_log";
|
||||
constexpr const char* kBuildHeuristics = "trt_build_heuristics_enable";
|
||||
constexpr const char* kSparsityEnable = "trt_sparsity_enable";
|
||||
constexpr const char* kBuilderOptimizationLevel = "trt_builder_optimization_level";
|
||||
constexpr const char* kAuxiliaryStreams = "trt_auxiliary_streams";
|
||||
constexpr const char* kTacticSources = "trt_tactic_sources";
|
||||
} // namespace provider_option_names
|
||||
} // namespace tensorrt
|
||||
|
||||
|
|
@ -72,6 +78,11 @@ TensorrtExecutionProviderInfo TensorrtExecutionProviderInfo::FromProviderOptions
|
|||
.AddAssignmentToReference(tensorrt::provider_option_names::kTimingCacheEnable, info.timing_cache_enable)
|
||||
.AddAssignmentToReference(tensorrt::provider_option_names::kForceTimingCacheMatch, info.force_timing_cache)
|
||||
.AddAssignmentToReference(tensorrt::provider_option_names::kDetailedBuildLog, info.detailed_build_log)
|
||||
.AddAssignmentToReference(tensorrt::provider_option_names::kBuildHeuristics, info.build_heuristics_enable)
|
||||
.AddAssignmentToReference(tensorrt::provider_option_names::kSparsityEnable, info.sparsity_enable)
|
||||
.AddAssignmentToReference(tensorrt::provider_option_names::kBuilderOptimizationLevel, info.builder_optimization_level)
|
||||
.AddAssignmentToReference(tensorrt::provider_option_names::kAuxiliaryStreams, info.auxiliary_streams)
|
||||
.AddAssignmentToReference(tensorrt::provider_option_names::kTacticSources, info.tactic_sources)
|
||||
.Parse(options)); // add new provider option here.
|
||||
|
||||
return info;
|
||||
|
|
@ -102,15 +113,21 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const TensorrtE
|
|||
{tensorrt::provider_option_names::kTimingCacheEnable, MakeStringWithClassicLocale(info.timing_cache_enable)},
|
||||
{tensorrt::provider_option_names::kForceTimingCacheMatch, MakeStringWithClassicLocale(info.force_timing_cache)},
|
||||
{tensorrt::provider_option_names::kDetailedBuildLog, MakeStringWithClassicLocale(info.detailed_build_log)},
|
||||
{tensorrt::provider_option_names::kBuildHeuristics, MakeStringWithClassicLocale(info.build_heuristics_enable)},
|
||||
{tensorrt::provider_option_names::kSparsityEnable, MakeStringWithClassicLocale(info.sparsity_enable)},
|
||||
{tensorrt::provider_option_names::kBuilderOptimizationLevel, MakeStringWithClassicLocale(info.builder_optimization_level)},
|
||||
{tensorrt::provider_option_names::kAuxiliaryStreams, MakeStringWithClassicLocale(info.auxiliary_streams)},
|
||||
{tensorrt::provider_option_names::kTacticSources, MakeStringWithClassicLocale(info.tactic_sources)},
|
||||
};
|
||||
return options;
|
||||
}
|
||||
|
||||
ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const OrtTensorRTProviderOptions& info) {
|
||||
ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const OrtTensorRTProviderOptionsV2& info) {
|
||||
|
||||
auto empty_if_null = [](const char* s) { return s != nullptr ? std::string{s} : std::string{}; };
|
||||
const std::string kInt8CalibTable_ = empty_if_null(info.trt_int8_calibration_table_name);
|
||||
const std::string kCachePath_ = empty_if_null(info.trt_engine_cache_path);
|
||||
const std::string kTacticSources_ = empty_if_null(info.trt_tactic_sources);
|
||||
const std::string kDecryptionLibPath_ = empty_if_null(info.trt_engine_decryption_lib_path);
|
||||
|
||||
const ProviderOptions options{
|
||||
|
|
@ -130,6 +147,16 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const OrtTensor
|
|||
{tensorrt::provider_option_names::kDecryptionEnable, MakeStringWithClassicLocale(info.trt_engine_decryption_enable)},
|
||||
{tensorrt::provider_option_names::kDecryptionLibPath, kDecryptionLibPath_},
|
||||
{tensorrt::provider_option_names::kForceSequentialEngineBuild, MakeStringWithClassicLocale(info.trt_force_sequential_engine_build)},
|
||||
{tensorrt::provider_option_names::kContextMemorySharingEnable, MakeStringWithClassicLocale(info.trt_context_memory_sharing_enable)},
|
||||
{tensorrt::provider_option_names::kLayerNormFP32Fallback, MakeStringWithClassicLocale(info.trt_layer_norm_fp32_fallback)},
|
||||
{tensorrt::provider_option_names::kTimingCacheEnable, MakeStringWithClassicLocale(info.trt_timing_cache_enable)},
|
||||
{tensorrt::provider_option_names::kForceTimingCacheMatch, MakeStringWithClassicLocale(info.trt_force_timing_cache)},
|
||||
{tensorrt::provider_option_names::kDetailedBuildLog, MakeStringWithClassicLocale(info.trt_detailed_build_log)},
|
||||
{tensorrt::provider_option_names::kBuildHeuristics, MakeStringWithClassicLocale(info.trt_build_heuristics_enable)},
|
||||
{tensorrt::provider_option_names::kSparsityEnable, MakeStringWithClassicLocale(info.trt_sparsity_enable)},
|
||||
{tensorrt::provider_option_names::kBuilderOptimizationLevel, MakeStringWithClassicLocale(info.trt_builder_optimization_level)},
|
||||
{tensorrt::provider_option_names::kAuxiliaryStreams, MakeStringWithClassicLocale(info.trt_auxiliary_streams)},
|
||||
{tensorrt::provider_option_names::kTacticSources, kTacticSources_},
|
||||
};
|
||||
return options;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,9 +36,14 @@ struct TensorrtExecutionProviderInfo {
|
|||
bool timing_cache_enable{false};
|
||||
bool force_timing_cache{false};
|
||||
bool detailed_build_log{false};
|
||||
bool build_heuristics_enable{false};
|
||||
bool sparsity_enable{false};
|
||||
int builder_optimization_level{2};
|
||||
int auxiliary_streams{-1};
|
||||
std::string tactic_sources{""};
|
||||
|
||||
static TensorrtExecutionProviderInfo FromProviderOptions(const ProviderOptions& options);
|
||||
static ProviderOptions ToProviderOptions(const TensorrtExecutionProviderInfo& info);
|
||||
static ProviderOptions ToProviderOptions(const OrtTensorRTProviderOptions& info);
|
||||
static ProviderOptions ToProviderOptions(const OrtTensorRTProviderOptionsV2& info);
|
||||
};
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -73,6 +73,11 @@ struct Tensorrt_Provider : Provider {
|
|||
info.timing_cache_enable = options.trt_timing_cache_enable != 0;
|
||||
info.force_timing_cache = options.trt_force_timing_cache != 0;
|
||||
info.detailed_build_log = options.trt_detailed_build_log != 0;
|
||||
info.build_heuristics_enable = options.trt_build_heuristics_enable != 0;
|
||||
info.sparsity_enable = options.trt_sparsity_enable;
|
||||
info.builder_optimization_level = options.trt_builder_optimization_level;
|
||||
info.auxiliary_streams = options.trt_auxiliary_streams;
|
||||
info.tactic_sources = options.trt_tactic_sources == nullptr ? "" : options.trt_tactic_sources;
|
||||
return std::make_shared<TensorrtProviderFactory>(info);
|
||||
}
|
||||
|
||||
|
|
@ -143,10 +148,28 @@ struct Tensorrt_Provider : Provider {
|
|||
trt_options.trt_timing_cache_enable = internal_options.timing_cache_enable;
|
||||
trt_options.trt_force_timing_cache = internal_options.force_timing_cache;
|
||||
trt_options.trt_detailed_build_log = internal_options.detailed_build_log;
|
||||
trt_options.trt_build_heuristics_enable = internal_options.build_heuristics_enable;
|
||||
trt_options.trt_sparsity_enable = internal_options.sparsity_enable;
|
||||
trt_options.trt_builder_optimization_level = internal_options.builder_optimization_level;
|
||||
trt_options.trt_auxiliary_streams = internal_options.auxiliary_streams;
|
||||
str_size = internal_options.tactic_sources.size();
|
||||
if (str_size == 0) {
|
||||
trt_options.trt_tactic_sources = nullptr;
|
||||
} else {
|
||||
dest = new char[str_size + 1];
|
||||
#ifdef _MSC_VER
|
||||
strncpy_s(dest, str_size + 1, internal_options.tactic_sources.c_str(), str_size);
|
||||
#else
|
||||
strncpy(dest, internal_options.tactic_sources.c_str(), str_size);
|
||||
#endif
|
||||
dest[str_size] = '\0';
|
||||
trt_options.trt_tactic_sources = (const char*)dest;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
ProviderOptions GetProviderOptions(const void* provider_options) override {
|
||||
auto& options = *reinterpret_cast<const OrtTensorRTProviderOptions*>(provider_options);
|
||||
auto& options = *reinterpret_cast<const OrtTensorRTProviderOptionsV2*>(provider_options);
|
||||
return onnxruntime::TensorrtExecutionProviderInfo::ToProviderOptions(options);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1280,6 +1280,11 @@ OrtTensorRTProviderOptionsV2 OrtTensorRTProviderOptionsToOrtTensorRTProviderOpti
|
|||
trt_options_converted.trt_detailed_build_log = 0;
|
||||
trt_options_converted.trt_context_memory_sharing_enable = 0;
|
||||
trt_options_converted.trt_layer_norm_fp32_fallback = 0;
|
||||
trt_options_converted.trt_build_heuristics_enable = 0;
|
||||
trt_options_converted.trt_sparsity_enable = 0;
|
||||
trt_options_converted.trt_builder_optimization_level = 2;
|
||||
trt_options_converted.trt_auxiliary_streams = -1;
|
||||
trt_options_converted.trt_tactic_sources = "";
|
||||
return trt_options_converted;
|
||||
}
|
||||
|
||||
|
|
@ -1413,11 +1418,11 @@ INcclService& INcclService::GetInstance() {
|
|||
} // namespace rocm
|
||||
#endif
|
||||
|
||||
void UpdateProviderInfo_Tensorrt(OrtTensorRTProviderOptions* provider_options, const ProviderOptions& options) {
|
||||
void UpdateProviderInfo_Tensorrt(OrtTensorRTProviderOptionsV2* provider_options, const ProviderOptions& options) {
|
||||
s_library_tensorrt.Get().UpdateProviderOptions(reinterpret_cast<void*>(provider_options), options);
|
||||
}
|
||||
|
||||
ProviderOptions GetProviderInfo_Tensorrt(const OrtTensorRTProviderOptions* provider_options) {
|
||||
ProviderOptions GetProviderInfo_Tensorrt(const OrtTensorRTProviderOptionsV2* provider_options) {
|
||||
return s_library_tensorrt.Get().GetProviderOptions(reinterpret_cast<const void*>(provider_options));
|
||||
}
|
||||
|
||||
|
|
@ -1632,7 +1637,7 @@ ORT_API_STATUS_IMPL(OrtApis::UpdateTensorRTProviderOptions,
|
|||
provider_options_map[provider_options_keys[i]] = provider_options_values[i];
|
||||
}
|
||||
|
||||
onnxruntime::UpdateProviderInfo_Tensorrt(reinterpret_cast<OrtTensorRTProviderOptions*>(tensorrt_options),
|
||||
onnxruntime::UpdateProviderInfo_Tensorrt(tensorrt_options,
|
||||
reinterpret_cast<const onnxruntime::ProviderOptions&>(provider_options_map));
|
||||
return nullptr;
|
||||
#else
|
||||
|
|
@ -1649,7 +1654,7 @@ ORT_API_STATUS_IMPL(OrtApis::GetTensorRTProviderOptionsAsString, _In_ const OrtT
|
|||
_Outptr_ char** ptr) {
|
||||
API_IMPL_BEGIN
|
||||
#ifdef USE_TENSORRT
|
||||
onnxruntime::ProviderOptions options = onnxruntime::GetProviderInfo_Tensorrt(reinterpret_cast<const OrtTensorRTProviderOptions*>(tensorrt_options));
|
||||
onnxruntime::ProviderOptions options = onnxruntime::GetProviderInfo_Tensorrt(tensorrt_options);
|
||||
onnxruntime::ProviderOptions::iterator it = options.begin();
|
||||
std::string options_str = "";
|
||||
|
||||
|
|
|
|||
|
|
@ -371,7 +371,12 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
|
|||
0,
|
||||
0,
|
||||
0,
|
||||
0};
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
2,
|
||||
-1,
|
||||
nullptr};
|
||||
for (auto option : it->second) {
|
||||
if (option.first == "device_id") {
|
||||
if (!option.second.empty()) {
|
||||
|
|
|
|||
|
|
@ -124,6 +124,11 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
|
|||
bool trt_timing_cache_enable = false;
|
||||
bool trt_force_timing_cache = false;
|
||||
bool trt_detailed_build_log = false;
|
||||
bool trt_build_heuristics_enable = false;
|
||||
bool trt_sparsity_enable = false;
|
||||
int trt_builder_optimization_level = 2;
|
||||
int trt_auxiliary_streams = -1;
|
||||
std::string trt_tactic_sources = "";
|
||||
|
||||
#ifdef _MSC_VER
|
||||
std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string);
|
||||
|
|
@ -295,6 +300,40 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
|
|||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_detailed_build_log' should be a boolean i.e. true or false. Default value is false.\n");
|
||||
}
|
||||
} else if (key == "trt_build_heuristics_enable") {
|
||||
if (value == "true" || value == "True") {
|
||||
trt_build_heuristics_enable = true;
|
||||
} else if (value == "false" || value == "False") {
|
||||
trt_build_heuristics_enable = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_build_heuristics_enable' should be a boolean i.e. true or false. Default value is false.\n");
|
||||
}
|
||||
} else if (key == "trt_sparsity_enable") {
|
||||
if (value == "true" || value == "True") {
|
||||
trt_sparsity_enable = true;
|
||||
} else if (value == "false" || value == "False") {
|
||||
trt_sparsity_enable = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_sparsity_enable' should be a boolean i.e. true or false. Default value is false.\n");
|
||||
}
|
||||
} else if (key == "trt_builder_optimization_level") {
|
||||
if (!value.empty()) {
|
||||
trt_builder_optimization_level = std::stoi(value);
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_builder_optimization_level' should be a number and default to 2.\n");
|
||||
}
|
||||
} else if (key == "trt_auxiliary_streams") {
|
||||
if (!value.empty()) {
|
||||
trt_auxiliary_streams = std::stoi(value);
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_auxiliary_streams' should be a number.\n");
|
||||
}
|
||||
} else if (key == "trt_tactic_sources") {
|
||||
if (!value.empty()) {
|
||||
trt_tactic_sources = value;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_tactic_sources' should be a non-emtpy string.\n");
|
||||
}
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [TensorRT] wrong key type entered. Choose from the following runtime key options that are available for TensorRT. ['device_id', 'trt_max_partition_iterations', 'trt_min_subgraph_size', 'trt_max_workspace_size', 'trt_fp16_enable', 'trt_int8_enable', 'trt_int8_calibration_table_name', 'trt_int8_use_native_calibration_table', 'trt_dla_enable', 'trt_dla_core', 'trt_dump_subgraphs', 'trt_engine_cache_enable', 'trt_engine_cache_path', 'trt_engine_decryption_enable', 'trt_engine_decryption_lib_path', 'trt_force_sequential_engine_build', 'trt_context_memory_sharing_enable', 'trt_layer_norm_fp32_fallback'] \n");
|
||||
}
|
||||
|
|
@ -323,6 +362,11 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
|
|||
tensorrt_options.trt_timing_cache_enable = trt_timing_cache_enable;
|
||||
tensorrt_options.trt_force_timing_cache = trt_force_timing_cache;
|
||||
tensorrt_options.trt_detailed_build_log = trt_detailed_build_log;
|
||||
tensorrt_options.trt_build_heuristics_enable = trt_build_heuristics_enable;
|
||||
tensorrt_options.trt_sparsity_enable = trt_sparsity_enable;
|
||||
tensorrt_options.trt_builder_optimization_level = trt_builder_optimization_level;
|
||||
tensorrt_options.trt_auxiliary_streams = trt_auxiliary_streams;
|
||||
tensorrt_options.trt_tactic_sources = trt_tactic_sources.c_str();
|
||||
session_options.AppendExecutionProvider_TensorRT_V2(tensorrt_options);
|
||||
|
||||
OrtCUDAProviderOptions cuda_options;
|
||||
|
|
@ -473,7 +517,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
|
|||
} else if (key == "rpc_control_latency") {
|
||||
qnn_options[key] = value;
|
||||
} else {
|
||||
ORT_THROW(R"(Wrong key type entered. Choose from options:
|
||||
ORT_THROW(R"(Wrong key type entered. Choose from options:
|
||||
['backend_path', 'profiling_level', 'rpc_control_latency'])");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -685,7 +685,8 @@ TEST_P(ModelTest, Run) {
|
|||
if (test_case_name.find(ORT_TSTR("FLOAT16")) != std::string::npos) {
|
||||
OrtTensorRTProviderOptionsV2 params{0, 0, nullptr, 1000, 1, 1 << 30,
|
||||
1, // enable fp16
|
||||
0, nullptr, 0, 0, 0, 0, 0, nullptr, 0, nullptr, 0, 0, 0, 0, 0, 0};
|
||||
0, nullptr, 0, 0, 0, 0, 0, nullptr, 0, nullptr, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
2, -1, nullptr};
|
||||
ortso.AppendExecutionProvider_TensorRT_V2(params);
|
||||
} else {
|
||||
OrtTensorRTProviderOptionsV2* ep_option = nullptr;
|
||||
|
|
|
|||
|
|
@ -156,7 +156,12 @@ void RunWithOneSessionSingleThreadInference(std::string model_name, std::string
|
|||
0,
|
||||
0,
|
||||
0,
|
||||
0};
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
2,
|
||||
-1,
|
||||
nullptr};
|
||||
|
||||
params.trt_engine_cache_enable = 1;
|
||||
std::unique_ptr<IExecutionProvider> execution_provider = TensorrtExecutionProviderWithOptions(¶ms);
|
||||
|
|
@ -230,7 +235,12 @@ void RunWithOneSessionMultiThreadsInference(std::string model_name, std::string
|
|||
0,
|
||||
0,
|
||||
0,
|
||||
0};
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
2,
|
||||
-1,
|
||||
nullptr};
|
||||
|
||||
params.trt_engine_cache_enable = 1;
|
||||
std::unique_ptr<IExecutionProvider> execution_provider = TensorrtExecutionProviderWithOptions(¶ms);
|
||||
|
|
@ -397,7 +407,12 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) {
|
|||
0,
|
||||
0,
|
||||
0,
|
||||
0};
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
2,
|
||||
-1,
|
||||
nullptr};
|
||||
|
||||
if (cache_type.compare("engine") == 0) {
|
||||
|
||||
|
|
|
|||
|
|
@ -2496,9 +2496,16 @@ TEST(CApiTest, ConfigureCudaArenaAndDemonstrateMemoryArenaShrinkage) {
|
|||
#endif
|
||||
|
||||
#ifdef USE_TENSORRT
|
||||
class CApiTensorRTTest : public testing::Test, public ::testing::WithParamInterface<std::string> {};
|
||||
|
||||
// This test uses CreateTensorRTProviderOptions/UpdateTensorRTProviderOptions APIs to configure and create a TensorRT Execution Provider
|
||||
TEST(CApiTest, TestConfigureTensorRTProviderOptions) {
|
||||
TEST_P(CApiTensorRTTest, TestConfigureTensorRTProviderOptions) {
|
||||
std::string param = GetParam();
|
||||
size_t pos = param.find("=");
|
||||
std::string option_name = param.substr(0, pos);
|
||||
std::string option_value = param.substr(pos + 1);
|
||||
ASSERT_NE(pos, std::string::npos);
|
||||
|
||||
const auto& api = Ort::GetApi();
|
||||
OrtTensorRTProviderOptionsV2* trt_options;
|
||||
OrtAllocator* allocator;
|
||||
|
|
@ -2508,16 +2515,19 @@ TEST(CApiTest, TestConfigureTensorRTProviderOptions) {
|
|||
|
||||
const char* engine_cache_path = "./trt_engine_folder";
|
||||
|
||||
std::vector<const char*> keys{"device_id", "trt_fp16_enable", "trt_int8_enable", "trt_engine_cache_enable", "trt_engine_cache_path"};
|
||||
std::vector<const char*> keys{"device_id", "trt_fp16_enable", "trt_int8_enable", "trt_engine_cache_enable",
|
||||
"trt_engine_cache_path", option_name.c_str()};
|
||||
|
||||
std::vector<const char*> values{"0", "1", "0", "1", engine_cache_path};
|
||||
std::vector<const char*> values{"0", "1", "0", "1",
|
||||
engine_cache_path, option_value.c_str()};
|
||||
|
||||
ASSERT_TRUE(api.UpdateTensorRTProviderOptions(rel_trt_options.get(), keys.data(), values.data(), 5) == nullptr);
|
||||
ASSERT_TRUE(api.UpdateTensorRTProviderOptions(rel_trt_options.get(), keys.data(), values.data(), keys.size()) == nullptr);
|
||||
|
||||
ASSERT_TRUE(api.GetAllocatorWithDefaultOptions(&allocator) == nullptr);
|
||||
ASSERT_TRUE(api.GetTensorRTProviderOptionsAsString(rel_trt_options.get(), allocator, &trt_options_str) == nullptr);
|
||||
std::string s(trt_options_str);
|
||||
ASSERT_TRUE(s.find(engine_cache_path) != std::string::npos);
|
||||
ASSERT_TRUE(s.find(param.c_str()) != std::string::npos);
|
||||
ASSERT_TRUE(api.AllocatorFree(allocator, (void*)trt_options_str) == nullptr);
|
||||
|
||||
Ort::SessionOptions session_options;
|
||||
|
|
@ -2552,6 +2562,12 @@ TEST(CApiTest, TestConfigureTensorRTProviderOptions) {
|
|||
struct stat buffer;
|
||||
ASSERT_TRUE(stat(engine_cache_path, &buffer) == 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* The TensorrtExecutionProviderOptionsTest can be used to test TRT options
|
||||
*/
|
||||
INSTANTIATE_TEST_SUITE_P(CApiTensorRTTest, CApiTensorRTTest,
|
||||
::testing::Values("trt_build_heuristics_enable=1", "trt_sparsity_enable=1", "trt_builder_optimization_level=0", "trt_tactic_sources=-CUDNN,+CUBLAS", "trt_auxiliary_streams=2"));
|
||||
#endif
|
||||
|
||||
#ifdef USE_CUDA
|
||||
|
|
|
|||
Loading…
Reference in a new issue