diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h index 8f2b5af870..680ce1cc5b 100644 --- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h +++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h @@ -25,13 +25,14 @@ struct OrtTensorRTProviderOptionsV2 { int trt_dla_core{0}; // DLA core number. Default 0 int trt_dump_subgraphs{0}; // dump TRT subgraph. Default 0 = false, nonzero = true int trt_engine_cache_enable{0}; // enable engine caching. Default 0 = false, nonzero = true - const char* trt_engine_cache_path{nullptr}; // specify engine cache path + const char* trt_engine_cache_path{nullptr}; // specify engine cache path, defaults to the working directory int trt_engine_decryption_enable{0}; // enable engine decryption. Default 0 = false, nonzero = true const char* trt_engine_decryption_lib_path{nullptr}; // specify engine decryption library path int trt_force_sequential_engine_build{0}; // force building TensorRT engine sequentially. Default 0 = false, nonzero = true int trt_context_memory_sharing_enable{0}; // enable context memory sharing between subgraphs. Default 0 = false, nonzero = true int trt_layer_norm_fp32_fallback{0}; // force Pow + Reduce ops in layer norm to FP32. Default 0 = false, nonzero = true int trt_timing_cache_enable{0}; // enable TensorRT timing cache. Default 0 = false, nonzero = true + const char* trt_timing_cache_path{nullptr}; // specify timing cache path, if none is provided the trt_engine_cache_path is used int trt_force_timing_cache{0}; // force the TensorRT cache to be used even if device profile does not match. Default 0 = false, nonzero = true int trt_detailed_build_log{0}; // Enable detailed build step logging on TensorRT EP with timing for each engine build. Default 0 = false, nonzero = true int trt_build_heuristics_enable{0}; // Build engine using heuristics to reduce build time. Default 0 = false, nonzero = true diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index ef1f0bf9f8..a1fc67ff60 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -824,6 +824,14 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv if (engine_cache_enable_ || int8_enable_ || timing_cache_enable_) { cache_path_ = info.engine_cache_path; } + // use a more global cache if given + if (timing_cache_enable_) { + if (!info.timing_cache_path.empty()) { + global_cache_path_ = info.timing_cache_path; + } else { + global_cache_path_ = cache_path_; + } + } engine_decryption_enable_ = info.engine_decryption_enable; if (engine_decryption_enable_) { engine_decryption_lib_path_ = info.engine_decryption_lib_path; @@ -928,6 +936,15 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv LOGS_DEFAULT(WARNING) << "[TensorRT EP] ORT_TENSORRT_ENGINE_CACHE_PATH is deprecated! Please use ORT_TENSORRT_CACHE_PATH to specify engine cache path"; } } + if (timing_cache_enable_) { + std::string timing_cache_path = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kTimingCachePath); + // use a more global cache if given + if (!timing_cache_path.empty()) { + global_cache_path_ = timing_cache_path; + } else { + global_cache_path_ = cache_path_; + } + } const std::string engine_decryption_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kDecryptionEnable); if (!engine_decryption_enable_env.empty()) { @@ -1019,6 +1036,11 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv throw std::runtime_error("Failed to create directory " + cache_path_); } } + if (!global_cache_path_.empty() && !fs::is_directory(global_cache_path_)) { + if (!fs::create_directory(global_cache_path_)) { + throw std::runtime_error("Failed to create directory " + global_cache_path_); + } + } { auto lock = GetApiLock(); runtime_ = std::unique_ptr(nvinfer1::createInferRuntime(GetTensorrtLogger())); @@ -1104,6 +1126,7 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv << ", trt_dump_subgraphs: " << dump_subgraphs_ << ", trt_engine_cache_enable: " << engine_cache_enable_ << ", trt_cache_path: " << cache_path_ + << ", trt_global_cache_path: " << global_cache_path_ << ", trt_engine_decryption_enable: " << engine_decryption_enable_ << ", trt_engine_decryption_lib_path: " << engine_decryption_lib_path_ << ", trt_force_sequential_engine_build: " << force_sequential_engine_build_ @@ -2199,7 +2222,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vectornode_name], context_memory_sharing_enable_, &max_ctx_mem_size_, dynamic_range_map, engine_decryption_enable_, engine_decryption_, engine_encryption_, timing_cache_enable_, - force_timing_cache_match_, detailed_build_log_, build_heuristics_enable_, sparsity_enable_, + global_cache_path_, force_timing_cache_match_, detailed_build_log_, build_heuristics_enable_, sparsity_enable_, builder_optimization_level_, auxiliary_streams_, !tactic_sources_.empty(), tactics}; *state = p.release(); return 0; @@ -2460,7 +2483,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector runtime_ = nullptr; OrtMutex tensorrt_mu_; int device_id_; diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc index cb7a568d09..3ead33f913 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc @@ -25,7 +25,7 @@ constexpr const char* kDLAEnable = "trt_dla_enable"; constexpr const char* kDLACore = "trt_dla_core"; constexpr const char* kDumpSubgraphs = "trt_dump_subgraphs"; constexpr const char* kEngineCacheEnable = "trt_engine_cache_enable"; -constexpr const char* kCachePath = "trt_engine_cache_path"; +constexpr const char* kEngineCachePath = "trt_engine_cache_path"; constexpr const char* kDecryptionEnable = "trt_engine_decryption_enable"; constexpr const char* kDecryptionLibPath = "trt_engine_decryption_lib_path"; constexpr const char* kForceSequentialEngineBuild = "trt_force_sequential_engine_build"; @@ -33,7 +33,8 @@ constexpr const char* kForceSequentialEngineBuild = "trt_force_sequential_engine constexpr const char* kContextMemorySharingEnable = "trt_context_memory_sharing_enable"; constexpr const char* kLayerNormFP32Fallback = "trt_layer_norm_fp32_fallback"; constexpr const char* kTimingCacheEnable = "trt_timing_cache_enable"; -constexpr const char* kForceTimingCacheMatch = "trt_force_timing_cache_match"; +constexpr const char* kTimingCachePath = "trt_timing_cache_path"; +constexpr const char* kForceTimingCacheMatch = "trt_force_timing_cache"; constexpr const char* kDetailedBuildLog = "trt_detailed_build_log"; constexpr const char* kBuildHeuristics = "trt_build_heuristics_enable"; constexpr const char* kSparsityEnable = "trt_sparsity_enable"; @@ -76,13 +77,14 @@ TensorrtExecutionProviderInfo TensorrtExecutionProviderInfo::FromProviderOptions .AddAssignmentToReference(tensorrt::provider_option_names::kDLACore, info.dla_core) .AddAssignmentToReference(tensorrt::provider_option_names::kDumpSubgraphs, info.dump_subgraphs) .AddAssignmentToReference(tensorrt::provider_option_names::kEngineCacheEnable, info.engine_cache_enable) - .AddAssignmentToReference(tensorrt::provider_option_names::kCachePath, info.engine_cache_path) + .AddAssignmentToReference(tensorrt::provider_option_names::kEngineCachePath, info.engine_cache_path) .AddAssignmentToReference(tensorrt::provider_option_names::kDecryptionEnable, info.engine_decryption_enable) .AddAssignmentToReference(tensorrt::provider_option_names::kDecryptionLibPath, info.engine_decryption_lib_path) .AddAssignmentToReference(tensorrt::provider_option_names::kForceSequentialEngineBuild, info.force_sequential_engine_build) .AddAssignmentToReference(tensorrt::provider_option_names::kContextMemorySharingEnable, info.context_memory_sharing_enable) .AddAssignmentToReference(tensorrt::provider_option_names::kLayerNormFP32Fallback, info.layer_norm_fp32_fallback) .AddAssignmentToReference(tensorrt::provider_option_names::kTimingCacheEnable, info.timing_cache_enable) + .AddAssignmentToReference(tensorrt::provider_option_names::kTimingCachePath, info.timing_cache_path) .AddAssignmentToReference(tensorrt::provider_option_names::kForceTimingCacheMatch, info.force_timing_cache) .AddAssignmentToReference(tensorrt::provider_option_names::kDetailedBuildLog, info.detailed_build_log) .AddAssignmentToReference(tensorrt::provider_option_names::kBuildHeuristics, info.build_heuristics_enable) @@ -115,7 +117,7 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const TensorrtE {tensorrt::provider_option_names::kDLACore, MakeStringWithClassicLocale(info.dla_core)}, {tensorrt::provider_option_names::kDumpSubgraphs, MakeStringWithClassicLocale(info.dump_subgraphs)}, {tensorrt::provider_option_names::kEngineCacheEnable, MakeStringWithClassicLocale(info.engine_cache_enable)}, - {tensorrt::provider_option_names::kCachePath, MakeStringWithClassicLocale(info.engine_cache_path)}, + {tensorrt::provider_option_names::kEngineCachePath, MakeStringWithClassicLocale(info.engine_cache_path)}, {tensorrt::provider_option_names::kDecryptionEnable, MakeStringWithClassicLocale(info.engine_decryption_enable)}, {tensorrt::provider_option_names::kDecryptionLibPath, MakeStringWithClassicLocale(info.engine_decryption_lib_path)}, {tensorrt::provider_option_names::kForceSequentialEngineBuild, MakeStringWithClassicLocale(info.force_sequential_engine_build)}, @@ -123,6 +125,7 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const TensorrtE {tensorrt::provider_option_names::kContextMemorySharingEnable, MakeStringWithClassicLocale(info.context_memory_sharing_enable)}, {tensorrt::provider_option_names::kLayerNormFP32Fallback, MakeStringWithClassicLocale(info.layer_norm_fp32_fallback)}, {tensorrt::provider_option_names::kTimingCacheEnable, MakeStringWithClassicLocale(info.timing_cache_enable)}, + {tensorrt::provider_option_names::kTimingCachePath, MakeStringWithClassicLocale(info.timing_cache_path)}, {tensorrt::provider_option_names::kForceTimingCacheMatch, MakeStringWithClassicLocale(info.force_timing_cache)}, {tensorrt::provider_option_names::kDetailedBuildLog, MakeStringWithClassicLocale(info.detailed_build_log)}, {tensorrt::provider_option_names::kBuildHeuristics, MakeStringWithClassicLocale(info.build_heuristics_enable)}, @@ -142,7 +145,8 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const TensorrtE ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const OrtTensorRTProviderOptionsV2& info) { auto empty_if_null = [](const char* s) { return s != nullptr ? std::string{s} : std::string{}; }; const std::string kInt8CalibTable_ = empty_if_null(info.trt_int8_calibration_table_name); - const std::string kCachePath_ = empty_if_null(info.trt_engine_cache_path); + const std::string kEngineCachePath_ = empty_if_null(info.trt_engine_cache_path); + const std::string kTimingCachePath_ = empty_if_null(info.trt_timing_cache_path); const std::string kTacticSources_ = empty_if_null(info.trt_tactic_sources); const std::string kDecryptionLibPath_ = empty_if_null(info.trt_engine_decryption_lib_path); const std::string kExtraPluginLibPaths_ = empty_if_null(info.trt_extra_plugin_lib_paths); @@ -164,13 +168,14 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const OrtTensor {tensorrt::provider_option_names::kDLACore, MakeStringWithClassicLocale(info.trt_dla_core)}, {tensorrt::provider_option_names::kDumpSubgraphs, MakeStringWithClassicLocale(info.trt_dump_subgraphs)}, {tensorrt::provider_option_names::kEngineCacheEnable, MakeStringWithClassicLocale(info.trt_engine_cache_enable)}, - {tensorrt::provider_option_names::kCachePath, kCachePath_}, + {tensorrt::provider_option_names::kEngineCachePath, kEngineCachePath_}, {tensorrt::provider_option_names::kDecryptionEnable, MakeStringWithClassicLocale(info.trt_engine_decryption_enable)}, {tensorrt::provider_option_names::kDecryptionLibPath, kDecryptionLibPath_}, {tensorrt::provider_option_names::kForceSequentialEngineBuild, MakeStringWithClassicLocale(info.trt_force_sequential_engine_build)}, {tensorrt::provider_option_names::kContextMemorySharingEnable, MakeStringWithClassicLocale(info.trt_context_memory_sharing_enable)}, {tensorrt::provider_option_names::kLayerNormFP32Fallback, MakeStringWithClassicLocale(info.trt_layer_norm_fp32_fallback)}, {tensorrt::provider_option_names::kTimingCacheEnable, MakeStringWithClassicLocale(info.trt_timing_cache_enable)}, + {tensorrt::provider_option_names::kTimingCachePath, kTimingCachePath_}, {tensorrt::provider_option_names::kForceTimingCacheMatch, MakeStringWithClassicLocale(info.trt_force_timing_cache)}, {tensorrt::provider_option_names::kDetailedBuildLog, MakeStringWithClassicLocale(info.trt_detailed_build_log)}, {tensorrt::provider_option_names::kBuildHeuristics, MakeStringWithClassicLocale(info.trt_build_heuristics_enable)}, @@ -204,6 +209,27 @@ void TensorrtExecutionProviderInfo::UpdateProviderOptions(void* provider_options if (provider_options == nullptr) { return; } + auto copy_string_if_needed = [&](std::string& s_in) { + if (string_copy) { + char* dest = nullptr; + auto str_size = s_in.size(); + if (str_size == 0) { + return (const char*)nullptr; + } else { + dest = new char[str_size + 1]; +#ifdef _MSC_VER + strncpy_s(dest, str_size + 1, s_in.c_str(), str_size); +#else + strncpy(dest, s_in.c_str(), str_size); +#endif + dest[str_size] = '\0'; + return (const char*)dest; + } + } else { + return s_in.c_str(); + } + }; + TensorrtExecutionProviderInfo internal_options = onnxruntime::TensorrtExecutionProviderInfo::FromProviderOptions(options); auto& trt_provider_options_v2 = *reinterpret_cast(provider_options); trt_provider_options_v2.device_id = internal_options.device_id; @@ -220,24 +246,7 @@ void TensorrtExecutionProviderInfo::UpdateProviderOptions(void* provider_options trt_provider_options_v2.trt_fp16_enable = internal_options.fp16_enable; trt_provider_options_v2.trt_int8_enable = internal_options.int8_enable; - if (string_copy) { - char* dest = nullptr; - auto str_size = internal_options.int8_calibration_table_name.size(); - if (str_size == 0) { - trt_provider_options_v2.trt_int8_calibration_table_name = nullptr; - } else { - dest = new char[str_size + 1]; -#ifdef _MSC_VER - strncpy_s(dest, str_size + 1, internal_options.int8_calibration_table_name.c_str(), str_size); -#else - strncpy(dest, internal_options.int8_calibration_table_name.c_str(), str_size); -#endif - dest[str_size] = '\0'; - trt_provider_options_v2.trt_int8_calibration_table_name = (const char*)dest; - } - } else { - trt_provider_options_v2.trt_int8_calibration_table_name = internal_options.int8_calibration_table_name.c_str(); - } + trt_provider_options_v2.trt_int8_calibration_table_name = copy_string_if_needed(internal_options.int8_calibration_table_name); trt_provider_options_v2.trt_int8_use_native_calibration_table = internal_options.int8_use_native_calibration_table; trt_provider_options_v2.trt_dla_enable = internal_options.dla_enable; @@ -245,45 +254,12 @@ void TensorrtExecutionProviderInfo::UpdateProviderOptions(void* provider_options trt_provider_options_v2.trt_dump_subgraphs = internal_options.dump_subgraphs; trt_provider_options_v2.trt_engine_cache_enable = internal_options.engine_cache_enable; - if (string_copy) { - char* dest = nullptr; - auto str_size = internal_options.engine_cache_path.size(); - if (str_size == 0) { - trt_provider_options_v2.trt_engine_cache_path = nullptr; - } else { - dest = new char[str_size + 1]; -#ifdef _MSC_VER - strncpy_s(dest, str_size + 1, internal_options.engine_cache_path.c_str(), str_size); -#else - strncpy(dest, internal_options.engine_cache_path.c_str(), str_size); -#endif - dest[str_size] = '\0'; - trt_provider_options_v2.trt_engine_cache_path = (const char*)dest; - } - } else { - trt_provider_options_v2.trt_engine_cache_path = internal_options.engine_cache_path.c_str(); - } + trt_provider_options_v2.trt_engine_cache_path = copy_string_if_needed(internal_options.engine_cache_path); + trt_provider_options_v2.trt_timing_cache_path = copy_string_if_needed(internal_options.timing_cache_path); trt_provider_options_v2.trt_engine_decryption_enable = internal_options.engine_decryption_enable; - if (string_copy) { - char* dest = nullptr; - auto str_size = internal_options.engine_decryption_lib_path.size(); - if (str_size == 0) { - trt_provider_options_v2.trt_engine_decryption_lib_path = nullptr; - } else { - dest = new char[str_size + 1]; -#ifdef _MSC_VER - strncpy_s(dest, str_size + 1, internal_options.engine_decryption_lib_path.c_str(), str_size); -#else - strncpy(dest, internal_options.engine_decryption_lib_path.c_str(), str_size); -#endif - dest[str_size] = '\0'; - trt_provider_options_v2.trt_engine_decryption_lib_path = (const char*)dest; - } - } else { - trt_provider_options_v2.trt_engine_decryption_lib_path = internal_options.engine_decryption_lib_path.c_str(); - } + trt_provider_options_v2.trt_engine_decryption_lib_path = copy_string_if_needed(internal_options.engine_decryption_lib_path); trt_provider_options_v2.trt_force_sequential_engine_build = internal_options.force_sequential_engine_build; trt_provider_options_v2.trt_context_memory_sharing_enable = internal_options.context_memory_sharing_enable; @@ -296,100 +272,11 @@ void TensorrtExecutionProviderInfo::UpdateProviderOptions(void* provider_options trt_provider_options_v2.trt_builder_optimization_level = internal_options.builder_optimization_level; trt_provider_options_v2.trt_auxiliary_streams = internal_options.auxiliary_streams; - if (string_copy) { - char* dest = nullptr; - auto str_size = internal_options.tactic_sources.size(); - if (str_size == 0) { - trt_provider_options_v2.trt_tactic_sources = nullptr; - } else { - dest = new char[str_size + 1]; -#ifdef _MSC_VER - strncpy_s(dest, str_size + 1, internal_options.tactic_sources.c_str(), str_size); -#else - strncpy(dest, internal_options.tactic_sources.c_str(), str_size); -#endif - dest[str_size] = '\0'; - trt_provider_options_v2.trt_tactic_sources = (const char*)dest; - } - } else { - trt_provider_options_v2.trt_tactic_sources = internal_options.tactic_sources.c_str(); - } - - if (string_copy) { - char* dest = nullptr; - auto str_size = internal_options.extra_plugin_lib_paths.size(); - if (str_size == 0) { - trt_provider_options_v2.trt_extra_plugin_lib_paths = nullptr; - } else { - dest = new char[str_size + 1]; -#ifdef _MSC_VER - strncpy_s(dest, str_size + 1, internal_options.extra_plugin_lib_paths.c_str(), str_size); -#else - strncpy(dest, internal_options.extra_plugin_lib_paths.c_str(), str_size); -#endif - dest[str_size] = '\0'; - trt_provider_options_v2.trt_extra_plugin_lib_paths = (const char*)dest; - } - } else { - trt_provider_options_v2.trt_extra_plugin_lib_paths = internal_options.extra_plugin_lib_paths.c_str(); - } - - if (string_copy) { - char* dest = nullptr; - auto str_size = internal_options.profile_min_shapes.size(); - if (str_size == 0) { - trt_provider_options_v2.trt_profile_min_shapes = nullptr; - } else { - dest = new char[str_size + 1]; -#ifdef _MSC_VER - strncpy_s(dest, str_size + 1, internal_options.profile_min_shapes.c_str(), str_size); -#else - strncpy(dest, internal_options.profile_min_shapes.c_str(), str_size); -#endif - dest[str_size] = '\0'; - trt_provider_options_v2.trt_profile_min_shapes = (const char*)dest; - } - } else { - trt_provider_options_v2.trt_profile_min_shapes = internal_options.profile_min_shapes.c_str(); - } - - if (string_copy) { - char* dest = nullptr; - auto str_size = internal_options.profile_max_shapes.size(); - if (str_size == 0) { - trt_provider_options_v2.trt_profile_max_shapes = nullptr; - } else { - dest = new char[str_size + 1]; -#ifdef _MSC_VER - strncpy_s(dest, str_size + 1, internal_options.profile_max_shapes.c_str(), str_size); -#else - strncpy(dest, internal_options.profile_max_shapes.c_str(), str_size); -#endif - dest[str_size] = '\0'; - trt_provider_options_v2.trt_profile_max_shapes = (const char*)dest; - } - } else { - trt_provider_options_v2.trt_profile_max_shapes = internal_options.profile_max_shapes.c_str(); - } - - if (string_copy) { - char* dest = nullptr; - auto str_size = internal_options.profile_opt_shapes.size(); - if (str_size == 0) { - trt_provider_options_v2.trt_profile_opt_shapes = nullptr; - } else { - dest = new char[str_size + 1]; -#ifdef _MSC_VER - strncpy_s(dest, str_size + 1, internal_options.profile_opt_shapes.c_str(), str_size); -#else - strncpy(dest, internal_options.profile_opt_shapes.c_str(), str_size); -#endif - dest[str_size] = '\0'; - trt_provider_options_v2.trt_profile_opt_shapes = (const char*)dest; - } - } else { - trt_provider_options_v2.trt_profile_opt_shapes = internal_options.profile_opt_shapes.c_str(); - } + trt_provider_options_v2.trt_tactic_sources = copy_string_if_needed(internal_options.tactic_sources); + trt_provider_options_v2.trt_extra_plugin_lib_paths = copy_string_if_needed(internal_options.extra_plugin_lib_paths); + trt_provider_options_v2.trt_profile_min_shapes = copy_string_if_needed(internal_options.profile_min_shapes); + trt_provider_options_v2.trt_profile_max_shapes = copy_string_if_needed(internal_options.profile_max_shapes); + trt_provider_options_v2.trt_profile_opt_shapes = copy_string_if_needed(internal_options.profile_opt_shapes); trt_provider_options_v2.trt_cuda_graph_enable = internal_options.cuda_graph_enable; } diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h index 61a6bf0821..b16543aa3d 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h @@ -38,6 +38,7 @@ struct TensorrtExecutionProviderInfo { bool context_memory_sharing_enable{false}; bool layer_norm_fp32_fallback{false}; bool timing_cache_enable{false}; + std::string timing_cache_path{""}; bool force_timing_cache{false}; bool detailed_build_log{false}; bool build_heuristics_enable{false}; diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc index d7e13df000..426584553f 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc @@ -103,6 +103,7 @@ struct Tensorrt_Provider : Provider { info.context_memory_sharing_enable = options.trt_context_memory_sharing_enable != 0; info.layer_norm_fp32_fallback = options.trt_layer_norm_fp32_fallback != 0; info.timing_cache_enable = options.trt_timing_cache_enable != 0; + info.timing_cache_path = options.trt_timing_cache_path == nullptr ? "" : options.trt_timing_cache_path; info.force_timing_cache = options.trt_force_timing_cache != 0; info.detailed_build_log = options.trt_detailed_build_log != 0; info.build_heuristics_enable = options.trt_build_heuristics_enable != 0; diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index d307f79c37..9e59883478 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -1931,6 +1931,7 @@ ORT_API(void, OrtApis::ReleaseTensorRTProviderOptions, _Frees_ptr_opt_ OrtTensor if (ptr != nullptr) { delete[] ptr->trt_int8_calibration_table_name; delete[] ptr->trt_engine_cache_path; + delete[] ptr->trt_timing_cache_path; delete[] ptr->trt_engine_decryption_lib_path; delete[] ptr->trt_tactic_sources; delete[] ptr->trt_extra_plugin_lib_paths; diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index 90271b5458..7faca3b468 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -479,7 +479,7 @@ std::unique_ptr CreateExecutionProviderInstance( // So we need these std::string variables defined here as they will be kept alive for the lifetime of TRT EP and we can still access them from OrtTensorRTProviderOptionsV2 instance. // (The reason is string copy is involved, for example params.trt_engine_cache_path = cache_path.c_str() and those std::string variable is referenced by OrtTensorRTProviderOptionsV2 instance // and TRT EP instance, so it won't be released.) - std::string calibration_table, cache_path, lib_path, trt_tactic_sources, trt_extra_plugin_lib_paths, min_profile, max_profile, opt_profile; + std::string calibration_table, cache_path, timing_cache_path, lib_path, trt_tactic_sources, trt_extra_plugin_lib_paths, min_profile, max_profile, opt_profile; auto it = provider_options_map.find(type); if (it != provider_options_map.end()) { OrtTensorRTProviderOptionsV2 params; @@ -623,6 +623,13 @@ std::unique_ptr CreateExecutionProviderInstance( } else { ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_timing_cache_enable' should be 'True' or 'False'. Default value is 'False'.\n"); } + } else if (option.first == "trt_timing_cache_path") { + if (!option.second.empty()) { + timing_cache_path = option.second; + params.trt_timing_cache_path = timing_cache_path.c_str(); + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_timing_cache_path' should be a path string i.e. 'cache_folder/'.\n"); + } } else if (option.first == "trt_force_timing_cache") { if (option.second == "True" || option.second == "true") { params.trt_force_timing_cache = true; diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index a7f0b7584a..e828a7cee5 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include "core/session/onnxruntime_session_options_config_keys.h" @@ -100,36 +101,28 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device const auto& api = Ort::GetApi(); OrtCUDAProviderOptionsV2* cuda_options; Ort::ThrowOnError(api.CreateCUDAProviderOptions(&cuda_options)); - - const char* cudnn_conv_algo_search = "cudnn_conv_algo_search"; - const char* default_conv = "DEFAULT"; - const char* benchmarking = "EXHAUSTIVE"; - const char* heuristic = "HEURISTIC"; + std::vector option_keys, option_values; + // used to keep all option keys and value strings alive + std::list buffer; + buffer.emplace_back("cudnn_conv_algo_search"); + option_keys.push_back(buffer.back().c_str()); switch (performance_test_config.run_config.cudnn_conv_algo) { case 0: - Ort::ThrowOnError( - api.UpdateCUDAProviderOptions(cuda_options, &cudnn_conv_algo_search, &benchmarking, 1)); + buffer.emplace_back("EXHAUSTIVE"); break; case 1: - Ort::ThrowOnError( - api.UpdateCUDAProviderOptions(cuda_options, &cudnn_conv_algo_search, &heuristic, 1)); + buffer.emplace_back("HEURISTIC"); break; default: - Ort::ThrowOnError( - api.UpdateCUDAProviderOptions(cuda_options, &cudnn_conv_algo_search, &default_conv, 1)); + buffer.emplace_back("DEFAULT"); break; } + option_values.push_back(buffer.back().c_str()); - const char* do_copy_in_default_stream = "do_copy_in_default_stream"; - if (performance_test_config.run_config.do_cuda_copy_in_separate_stream) { - const char* v = "1"; - Ort::ThrowOnError( - api.UpdateCUDAProviderOptions(cuda_options, &do_copy_in_default_stream, &v, 1)); - } else { - const char* v = "0"; - Ort::ThrowOnError( - api.UpdateCUDAProviderOptions(cuda_options, &do_copy_in_default_stream, &v, 1)); - } + buffer.emplace_back("do_copy_in_default_stream"); + option_keys.push_back(buffer.back().c_str()); + buffer.emplace_back(performance_test_config.run_config.do_cuda_copy_in_separate_stream ? "1" : "0"); + option_values.push_back(buffer.back().c_str()); #ifdef _MSC_VER std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); @@ -148,51 +141,34 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device "[ERROR] [CUDA] Use a '|' to separate the key and value for the run-time option you are trying to use.\n"); } - auto key = token.substr(0, pos); - auto value = token.substr(pos + 1); - auto key_p = key.c_str(); - auto value_p = value.c_str(); - Ort::ThrowOnError( - api.UpdateCUDAProviderOptions(cuda_options, &key_p, &value_p, 1)); + buffer.emplace_back(token.substr(0, pos)); + option_keys.push_back(buffer.back().c_str()); + buffer.emplace_back(token.substr(pos + 1)); + option_values.push_back(buffer.back().c_str()); } + Ort::Status status(api.UpdateCUDAProviderOptions(cuda_options, + option_keys.data(), option_values.data(), option_keys.size())); + if (!status.IsOK()) { + OrtAllocator* allocator; + char* options; + Ort::ThrowOnError(api.GetAllocatorWithDefaultOptions(&allocator)); + Ort::ThrowOnError(api.GetCUDAProviderOptionsAsString(cuda_options, allocator, &options)); + ORT_THROW("[ERROR] [CUDA] Configuring the CUDA options failed with message: ", status.GetErrorMessage(), + "\nSupported options are:\n", options); + } session_options.AppendExecutionProvider_CUDA_V2(*cuda_options); #else ORT_THROW("CUDA is not supported in this build\n"); #endif } else if (provider_name == onnxruntime::kTensorrtExecutionProvider) { #ifdef USE_TENSORRT - int device_id = 0; - int trt_max_partition_iterations = 1000; - int trt_min_subgraph_size = 1; - size_t trt_max_workspace_size = 1 << 30; - bool trt_fp16_enable = false; - bool trt_int8_enable = false; - std::string trt_int8_calibration_table_name = ""; - bool trt_int8_use_native_calibration_table = false; - bool trt_dla_enable = false; - int trt_dla_core = 0; - bool trt_dump_subgraphs = false; - bool trt_engine_cache_enable = false; - std::string trt_engine_cache_path = ""; - bool trt_engine_decryption_enable = false; - std::string trt_engine_decryption_lib_path = ""; - bool trt_force_sequential_engine_build = false; - bool trt_context_memory_sharing_enable = false; - bool trt_layer_norm_fp32_fallback = false; - bool trt_timing_cache_enable = false; - bool trt_force_timing_cache = false; - bool trt_detailed_build_log = false; - bool trt_build_heuristics_enable = false; - bool trt_sparsity_enable = false; - int trt_builder_optimization_level = 3; - int trt_auxiliary_streams = -1; - std::string trt_tactic_sources = ""; - std::string trt_extra_plugin_lib_paths = ""; - std::string trt_profile_min_shapes = ""; - std::string trt_profile_max_shapes = ""; - std::string trt_profile_opt_shapes = ""; - bool trt_cuda_graph_enable = false; + const auto& api = Ort::GetApi(); + OrtTensorRTProviderOptionsV2* tensorrt_options; + Ort::ThrowOnError(api.CreateTensorRTProviderOptions(&tensorrt_options)); + std::vector option_keys, option_values; + // used to keep all option keys and value strings alive + std::list buffer; #ifdef _MSC_VER std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); @@ -207,272 +183,31 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device } auto pos = token.find("|"); if (pos == std::string::npos || pos == 0 || pos == token.length()) { - ORT_THROW("[ERROR] [TensorRT] Use a '|' to separate the key and value for the run-time option you are trying to use.\n"); + ORT_THROW( + "[ERROR] [TensorRT] Use a '|' to separate the key and value for the run-time option you are trying to use.\n"); } - auto key = token.substr(0, pos); - auto value = token.substr(pos + 1); - if (key == "device_id") { - if (!value.empty()) { - device_id = std::stoi(value); - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'device_id' should be a number.\n"); - } - } else if (key == "trt_max_partition_iterations") { - if (!value.empty()) { - trt_max_partition_iterations = std::stoi(value); - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_partition_iterations' should be a number.\n"); - } - } else if (key == "trt_min_subgraph_size") { - if (!value.empty()) { - trt_min_subgraph_size = std::stoi(value); - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_min_subgraph_size' should be a number.\n"); - } - } else if (key == "trt_max_workspace_size") { - if (!value.empty()) { - trt_max_workspace_size = std::stoull(value); - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_workspace_size' should be a number.\n"); - } - } else if (key == "trt_fp16_enable") { - if (value == "true" || value == "True") { - trt_fp16_enable = true; - } else if (value == "false" || value == "False") { - trt_fp16_enable = false; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_fp16_enable' should be a boolean i.e. true or false. Default value is false.\n"); - } - } else if (key == "trt_int8_enable") { - if (value == "true" || value == "True") { - trt_int8_enable = true; - } else if (value == "false" || value == "False") { - trt_int8_enable = false; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_enable' should be a boolean i.e. true or false. Default value is false.\n"); - } - } else if (key == "trt_int8_calibration_table_name") { - if (!value.empty()) { - trt_int8_calibration_table_name = value; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_calibration_table_name' should be a non-empty string.\n"); - } - } else if (key == "trt_int8_use_native_calibration_table") { - if (value == "true" || value == "True") { - trt_int8_use_native_calibration_table = true; - } else if (value == "false" || value == "False") { - trt_int8_use_native_calibration_table = false; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_use_native_calibration_table' should be a boolean i.e. true or false. Default value is false.\n"); - } - } else if (key == "trt_dla_enable") { - if (value == "true" || value == "True") { - trt_dla_enable = true; - } else if (value == "false" || value == "False") { - trt_dla_enable = false; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_enable' should be a boolean i.e. true or false. Default value is false.\n"); - } - } else if (key == "trt_dla_core") { - if (!value.empty()) { - trt_dla_core = std::stoi(value); - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_core' should be a number.\n"); - } - } else if (key == "trt_dump_subgraphs") { - if (value == "true" || value == "True") { - trt_dump_subgraphs = true; - } else if (value == "false" || value == "False") { - trt_dump_subgraphs = false; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dump_subgraphs' should be a boolean i.e. true or false. Default value is false.\n"); - } - } else if (key == "trt_engine_cache_enable") { - if (value == "true" || value == "True") { - trt_engine_cache_enable = true; - } else if (value == "false" || value == "False") { - trt_engine_cache_enable = false; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_enable' should be a boolean i.e. true or false. Default value is false.\n"); - } - } else if (key == "trt_engine_cache_path") { - if (!value.empty()) { - trt_engine_cache_path = value; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_path' should be a non-empty string.\n"); - } - } else if (key == "trt_engine_decryption_enable") { - if (value == "true" || value == "True") { - trt_engine_decryption_enable = true; - } else if (value == "false" || value == "False") { - trt_engine_decryption_enable = false; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_enable' should be a boolean i.e. true or false. Default value is false.\n"); - } - } else if (key == "trt_engine_decryption_lib_path") { - if (!value.empty()) { - trt_engine_decryption_lib_path = value; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_lib_path' should be a non-empty string.\n"); - } - } else if (key == "trt_force_sequential_engine_build") { - if (value == "true" || value == "True") { - trt_force_sequential_engine_build = true; - } else if (value == "false" || value == "False") { - trt_force_sequential_engine_build = false; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_force_sequential_engine_build' should be a boolean i.e. true or false. Default value is false.\n"); - } - } else if (key == "trt_context_memory_sharing_enable") { - if (value == "true" || value == "True") { - trt_context_memory_sharing_enable = true; - } else if (value == "false" || value == "False") { - trt_context_memory_sharing_enable = false; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_context_memory_sharing_enable' should be a boolean i.e. true or false. Default value is false.\n"); - } - } else if (key == "trt_layer_norm_fp32_fallback") { - if (value == "true" || value == "True") { - trt_layer_norm_fp32_fallback = true; - } else if (value == "false" || value == "False") { - trt_layer_norm_fp32_fallback = false; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_layer_norm_fp32_fallback' should be a boolean i.e. true or false. Default value is false.\n"); - } - } else if (key == "trt_timing_cache_enable") { - if (value == "true" || value == "True") { - trt_timing_cache_enable = true; - } else if (value == "false" || value == "False") { - trt_timing_cache_enable = false; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_timing_cache_enable' should be a boolean i.e. true or false. Default value is false.\n"); - } - } else if (key == "trt_force_timing_cache") { - if (value == "true" || value == "True") { - trt_force_timing_cache = true; - } else if (value == "false" || value == "False") { - trt_force_timing_cache = false; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_force_timing_cache' should be a boolean i.e. true or false. Default value is false.\n"); - } - } else if (key == "trt_detailed_build_log") { - if (value == "true" || value == "True") { - trt_detailed_build_log = true; - } else if (value == "false" || value == "False") { - trt_detailed_build_log = false; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_detailed_build_log' should be a boolean i.e. true or false. Default value is false.\n"); - } - } else if (key == "trt_build_heuristics_enable") { - if (value == "true" || value == "True") { - trt_build_heuristics_enable = true; - } else if (value == "false" || value == "False") { - trt_build_heuristics_enable = false; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_build_heuristics_enable' should be a boolean i.e. true or false. Default value is false.\n"); - } - } else if (key == "trt_sparsity_enable") { - if (value == "true" || value == "True") { - trt_sparsity_enable = true; - } else if (value == "false" || value == "False") { - trt_sparsity_enable = false; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_sparsity_enable' should be a boolean i.e. true or false. Default value is false.\n"); - } - } else if (key == "trt_builder_optimization_level") { - if (!value.empty()) { - trt_builder_optimization_level = std::stoi(value); - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_builder_optimization_level' should be a number and default to 2.\n"); - } - } else if (key == "trt_auxiliary_streams") { - if (!value.empty()) { - trt_auxiliary_streams = std::stoi(value); - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_auxiliary_streams' should be a number.\n"); - } - } else if (key == "trt_tactic_sources") { - if (!value.empty()) { - trt_tactic_sources = value; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_tactic_sources' should be a non-empty string.\n"); - } - } else if (key == "trt_extra_plugin_lib_paths") { - if (!value.empty()) { - trt_extra_plugin_lib_paths = value; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_extra_plugin_lib_paths' should be a non-empty string.\n"); - } - } else if (key == "trt_profile_min_shapes") { - if (!value.empty()) { - trt_profile_min_shapes = value; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_profile_min_shapes' should be a non-empty string.\n"); - } - } else if (key == "trt_profile_max_shapes") { - if (!value.empty()) { - trt_profile_max_shapes = value; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_profile_max_shapes' should be a non-empty string.\n"); - } - } else if (key == "trt_profile_opt_shapes") { - if (!value.empty()) { - trt_profile_opt_shapes = value; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_profile_opt_shapes' should be a non-empty string.\n"); - } - } else if (key == "trt_cuda_graph_enable") { - if (value == "true" || value == "True") { - trt_cuda_graph_enable = true; - } else if (value == "false" || value == "False") { - trt_cuda_graph_enable = false; - } else { - ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_cuda_graph_enable' should be a boolean i.e. true or false. Default value is false.\n"); - } - } else { - ORT_THROW("[ERROR] [TensorRT] wrong key type entered. Choose from the following runtime key options that are available for TensorRT. ['device_id', 'trt_max_partition_iterations', 'trt_min_subgraph_size', 'trt_max_workspace_size', 'trt_fp16_enable', 'trt_int8_enable', 'trt_int8_calibration_table_name', 'trt_int8_use_native_calibration_table', 'trt_dla_enable', 'trt_dla_core', 'trt_dump_subgraphs', 'trt_engine_cache_enable', 'trt_engine_cache_path', 'trt_engine_decryption_enable', 'trt_engine_decryption_lib_path', 'trt_force_sequential_engine_build', 'trt_context_memory_sharing_enable', 'trt_layer_norm_fp32_fallback', 'trt_timing_cache_enable', 'trt_force_timing_cache', 'trt_detailed_build_log', 'trt_build_heuristics_enable', 'trt_sparsity_enable', 'trt_builder_optimization_level', 'trt_auxiliary_streams', 'trt_tactic_sources', 'trt_extra_plugin_lib_paths', 'trt_profile_min_shapes', 'trt_profile_max_shapes', 'trt_profile_opt_shapes', 'trt_cuda_graph_enable'] \n"); - } + buffer.emplace_back(token.substr(0, pos)); + option_keys.push_back(buffer.back().c_str()); + buffer.emplace_back(token.substr(pos + 1)); + option_values.push_back(buffer.back().c_str()); } - OrtTensorRTProviderOptionsV2 tensorrt_options; - tensorrt_options.device_id = device_id; - tensorrt_options.has_user_compute_stream = 0; - tensorrt_options.user_compute_stream = nullptr; - tensorrt_options.trt_max_partition_iterations = trt_max_partition_iterations; - tensorrt_options.trt_min_subgraph_size = trt_min_subgraph_size; - tensorrt_options.trt_max_workspace_size = trt_max_workspace_size; - tensorrt_options.trt_fp16_enable = trt_fp16_enable; - tensorrt_options.trt_int8_enable = trt_int8_enable; - tensorrt_options.trt_int8_calibration_table_name = trt_int8_calibration_table_name.c_str(); - tensorrt_options.trt_int8_use_native_calibration_table = trt_int8_use_native_calibration_table; - tensorrt_options.trt_dla_enable = trt_dla_enable; - tensorrt_options.trt_dla_core = trt_dla_core; - tensorrt_options.trt_dump_subgraphs = trt_dump_subgraphs; - tensorrt_options.trt_engine_cache_enable = trt_engine_cache_enable; - tensorrt_options.trt_engine_cache_path = trt_engine_cache_path.c_str(); - tensorrt_options.trt_engine_decryption_enable = trt_engine_decryption_enable; - tensorrt_options.trt_engine_decryption_lib_path = trt_engine_decryption_lib_path.c_str(); - tensorrt_options.trt_force_sequential_engine_build = trt_force_sequential_engine_build; - tensorrt_options.trt_context_memory_sharing_enable = trt_context_memory_sharing_enable; - tensorrt_options.trt_layer_norm_fp32_fallback = trt_layer_norm_fp32_fallback; - tensorrt_options.trt_timing_cache_enable = trt_timing_cache_enable; - tensorrt_options.trt_force_timing_cache = trt_force_timing_cache; - tensorrt_options.trt_detailed_build_log = trt_detailed_build_log; - tensorrt_options.trt_build_heuristics_enable = trt_build_heuristics_enable; - tensorrt_options.trt_sparsity_enable = trt_sparsity_enable; - tensorrt_options.trt_builder_optimization_level = trt_builder_optimization_level; - tensorrt_options.trt_auxiliary_streams = trt_auxiliary_streams; - tensorrt_options.trt_tactic_sources = trt_tactic_sources.c_str(); - tensorrt_options.trt_extra_plugin_lib_paths = trt_extra_plugin_lib_paths.c_str(); - tensorrt_options.trt_profile_min_shapes = trt_profile_min_shapes.c_str(); - tensorrt_options.trt_profile_max_shapes = trt_profile_max_shapes.c_str(); - tensorrt_options.trt_profile_opt_shapes = trt_profile_opt_shapes.c_str(); - tensorrt_options.trt_cuda_graph_enable = trt_cuda_graph_enable; - session_options.AppendExecutionProvider_TensorRT_V2(tensorrt_options); + Ort::Status status(api.UpdateTensorRTProviderOptions(tensorrt_options, + option_keys.data(), option_values.data(), option_keys.size())); + if (!status.IsOK()) { + OrtAllocator* allocator; + char* options; + Ort::ThrowOnError(api.GetAllocatorWithDefaultOptions(&allocator)); + Ort::ThrowOnError(api.GetTensorRTProviderOptionsAsString(tensorrt_options, allocator, &options)); + ORT_THROW("[ERROR] [TensorRT] Configuring the CUDA options failed with message: ", status.GetErrorMessage(), + "\nSupported options are:\n", options); + } + + session_options.AppendExecutionProvider_TensorRT_V2(*tensorrt_options); OrtCUDAProviderOptions cuda_options; - cuda_options.device_id = device_id; + cuda_options.device_id = tensorrt_options->device_id; cuda_options.cudnn_conv_algo_search = static_cast(performance_test_config.run_config.cudnn_conv_algo); cuda_options.do_copy_in_default_stream = !performance_test_config.run_config.do_cuda_copy_in_separate_stream; // TODO: Support arena configuration for users of perf test diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc index aa96e15336..d9f917f6d1 100644 --- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc +++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc @@ -590,6 +590,7 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) { // uint64_t compilation_without_cache_ms, compilation_with_cache_ms; // First session is created with TRT EP with timing cache enabled + // Not specifying a trt_timing_cache_path will result in using the working directory params.trt_timing_cache_enable = 1; { // auto start = chrono::steady_clock::now();