Enable global TRT timing cache (#17865)

I am adding a new `trt_timing_cache_path` option. Internally it is handled as `global_cache_path_` and will be set via a fall through approach: 1. no path provided => workdir 2. `trt_engine_cache_path` provided but no `trt_timing_cache_path` => `trt_engine_cache_path` 3. `trt_timing_cache_path` provided => `trt_timing_cache_path` (if not provided `trt_engine_cache_path` will still be workdir) ### Motivation and Context A TRT timing cache can be reused across multiple models as it only holds kernel timings and it is common that network "patterns" are reused. This can accelerate build times a lot. --------- Co-authored-by: Carson M <carson@pyke.io>
2026-07-17 18:40:28 +00:00 · 2023-10-27 18:23:19 +02:00 · 2023-10-27 18:23:19 +02:00 · 2eeafc37bc
commit 2eeafc37bc
parent 58f1d15d19
10 changed files with 138 additions and 478 deletions
--- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
+++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
@ -25,13 +25,14 @@ struct OrtTensorRTProviderOptionsV2 {
  int trt_dla_core{0};                                   // DLA core number. Default 0
  int trt_dump_subgraphs{0};                             // dump TRT subgraph. Default 0 = false, nonzero = true
  int trt_engine_cache_enable{0};                        // enable engine caching. Default 0 = false, nonzero = true
-  const char* trt_engine_cache_path{nullptr};            // specify engine cache path
+  const char* trt_engine_cache_path{nullptr};            // specify engine cache path, defaults to the working directory
  int trt_engine_decryption_enable{0};                   // enable engine decryption. Default 0 = false, nonzero = true
  const char* trt_engine_decryption_lib_path{nullptr};   // specify engine decryption library path
  int trt_force_sequential_engine_build{0};              // force building TensorRT engine sequentially. Default 0 = false, nonzero = true
  int trt_context_memory_sharing_enable{0};              // enable context memory sharing between subgraphs. Default 0 = false, nonzero = true
  int trt_layer_norm_fp32_fallback{0};                   // force Pow + Reduce ops in layer norm to FP32. Default 0 = false, nonzero = true
  int trt_timing_cache_enable{0};                        // enable TensorRT timing cache. Default 0 = false, nonzero = true
+  const char* trt_timing_cache_path{nullptr};            // specify timing cache path, if none is provided the trt_engine_cache_path is used
  int trt_force_timing_cache{0};                         // force the TensorRT cache to be used even if device profile does not match. Default 0 = false, nonzero = true
  int trt_detailed_build_log{0};                         // Enable detailed build step logging on TensorRT EP with timing for each engine build. Default 0 = false, nonzero = true
  int trt_build_heuristics_enable{0};                    // Build engine using heuristics to reduce build time. Default 0 = false, nonzero = true
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@ -824,6 +824,14 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
    if (engine_cache_enable_ || int8_enable_ || timing_cache_enable_) {
      cache_path_ = info.engine_cache_path;
    }
+    // use a more global cache if given
+    if (timing_cache_enable_) {
+      if (!info.timing_cache_path.empty()) {
+        global_cache_path_ = info.timing_cache_path;
+      } else {
+        global_cache_path_ = cache_path_;
+      }
+    }
    engine_decryption_enable_ = info.engine_decryption_enable;
    if (engine_decryption_enable_) {
      engine_decryption_lib_path_ = info.engine_decryption_lib_path;
@ -928,6 +936,15 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
          LOGS_DEFAULT(WARNING) << "[TensorRT EP] ORT_TENSORRT_ENGINE_CACHE_PATH is deprecated! Please use ORT_TENSORRT_CACHE_PATH to specify engine cache path";
        }
      }
+      if (timing_cache_enable_) {
+        std::string timing_cache_path = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kTimingCachePath);
+        // use a more global cache if given
+        if (!timing_cache_path.empty()) {
+          global_cache_path_ = timing_cache_path;
+        } else {
+          global_cache_path_ = cache_path_;
+        }
+      }

      const std::string engine_decryption_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kDecryptionEnable);
      if (!engine_decryption_enable_env.empty()) {
@ -1019,6 +1036,11 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
        throw std::runtime_error("Failed to create directory " + cache_path_);
      }
    }
+    if (!global_cache_path_.empty() && !fs::is_directory(global_cache_path_)) {
+      if (!fs::create_directory(global_cache_path_)) {
+        throw std::runtime_error("Failed to create directory " + global_cache_path_);
+      }
+    }
    {
      auto lock = GetApiLock();
      runtime_ = std::unique_ptr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(GetTensorrtLogger()));
@ -1104,6 +1126,7 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
                        << ", trt_dump_subgraphs: " << dump_subgraphs_
                        << ", trt_engine_cache_enable: " << engine_cache_enable_
                        << ", trt_cache_path: " << cache_path_
+                        << ", trt_global_cache_path: " << global_cache_path_
                        << ", trt_engine_decryption_enable: " << engine_decryption_enable_
                        << ", trt_engine_decryption_lib_path: " << engine_decryption_lib_path_
                        << ", trt_force_sequential_engine_build: " << force_sequential_engine_build_
@ -2199,7 +2222,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
      std::string timing_cache_path = "";
      bool engine_update = false;
      if (timing_cache_enable_) {
-        timing_cache_path = GetTimingCachePath(cache_path_, prop);
+        timing_cache_path = GetTimingCachePath(global_cache_path_, prop);
      }
      {
        // ifstream file check, engine serialization/deserialization and engine build are in critical section. It needs lock protection to prevent race condition when inferencing with multithreading.
@ -2398,7 +2421,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
            dla_enable_, dla_core_, &max_workspace_size_, trt_node_name_with_precision, engine_cache_enable_, cache_path_,
            runtime_.get(), profiles_[context->node_name], context_memory_sharing_enable_, &max_ctx_mem_size_,
            dynamic_range_map, engine_decryption_enable_, engine_decryption_, engine_encryption_, timing_cache_enable_,
-            force_timing_cache_match_, detailed_build_log_, build_heuristics_enable_, sparsity_enable_,
+            global_cache_path_, force_timing_cache_match_, detailed_build_log_, build_heuristics_enable_, sparsity_enable_,
            builder_optimization_level_, auxiliary_streams_, !tactic_sources_.empty(), tactics};
      *state = p.release();
      return 0;
@ -2460,7 +2483,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
      const std::string profile_cache_path = cache_path + "_sm" + compute_capability + ".profile";
      std::string timing_cache_path = "";
      if (timing_cache_enable_) {
-        timing_cache_path = GetTimingCachePath(cache_path_, prop);
+        timing_cache_path = GetTimingCachePath(global_cache_path_, prop);
      }

      // Load serialized engine
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
@ -26,6 +26,8 @@ static const std::string kDLACore = "ORT_TENSORRT_DLA_CORE";
 static const std::string kDumpSubgraphs = "ORT_TENSORRT_DUMP_SUBGRAPHS";
 static const std::string kEngineCacheEnable = "ORT_TENSORRT_ENGINE_CACHE_ENABLE";
 static const std::string kCachePath = "ORT_TENSORRT_CACHE_PATH";
+// As a timing cache can be used across multiple ONNX files it makes sense to have a seperate cache path
+static const std::string kTimingCachePath = "ORT_TENSORRT_GLOBAL_CACHE_PATH";
 static const std::string kDecryptionEnable = "ORT_TENSORRT_ENGINE_DECRYPTION_ENABLE";
 static const std::string kDecryptionLibPath = "ORT_TENSORRT_ENGINE_DECRYPTION_LIB_PATH";
 static const std::string kForceSequentialEngineBuild = "ORT_TENSORRT_FORCE_SEQUENTIAL_ENGINE_BUILD";
@ -131,6 +133,7 @@ struct TensorrtFuncState {
  int (*engine_decryption)(const char*, char*, size_t*) = nullptr;
  int (*engine_encryption)(const char*, char*, size_t) = nullptr;
  bool timing_cache_enable = true;
+  std::string timing_cache_path;
  bool force_timing_cache = false;
  bool detailed_build_log = false;
  bool build_heuristics_enable = false;
@ -218,7 +221,7 @@ class TensorrtExecutionProvider : public IExecutionProvider {
  int builder_optimization_level_ = 3;
  int auxiliary_streams_ = -1;
  std::string tactic_sources_;
-  std::string cache_path_, engine_decryption_lib_path_;
+  std::string global_cache_path_, cache_path_, engine_decryption_lib_path_;
  std::unique_ptr<nvinfer1::IRuntime> runtime_ = nullptr;
  OrtMutex tensorrt_mu_;
  int device_id_;
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc
@ -25,7 +25,7 @@ constexpr const char* kDLAEnable = "trt_dla_enable";
 constexpr const char* kDLACore = "trt_dla_core";
 constexpr const char* kDumpSubgraphs = "trt_dump_subgraphs";
 constexpr const char* kEngineCacheEnable = "trt_engine_cache_enable";
-constexpr const char* kCachePath = "trt_engine_cache_path";
+constexpr const char* kEngineCachePath = "trt_engine_cache_path";
 constexpr const char* kDecryptionEnable = "trt_engine_decryption_enable";
 constexpr const char* kDecryptionLibPath = "trt_engine_decryption_lib_path";
 constexpr const char* kForceSequentialEngineBuild = "trt_force_sequential_engine_build";
@ -33,7 +33,8 @@ constexpr const char* kForceSequentialEngineBuild = "trt_force_sequential_engine
 constexpr const char* kContextMemorySharingEnable = "trt_context_memory_sharing_enable";
 constexpr const char* kLayerNormFP32Fallback = "trt_layer_norm_fp32_fallback";
 constexpr const char* kTimingCacheEnable = "trt_timing_cache_enable";
-constexpr const char* kForceTimingCacheMatch = "trt_force_timing_cache_match";
+constexpr const char* kTimingCachePath = "trt_timing_cache_path";
+constexpr const char* kForceTimingCacheMatch = "trt_force_timing_cache";
 constexpr const char* kDetailedBuildLog = "trt_detailed_build_log";
 constexpr const char* kBuildHeuristics = "trt_build_heuristics_enable";
 constexpr const char* kSparsityEnable = "trt_sparsity_enable";
@ -76,13 +77,14 @@ TensorrtExecutionProviderInfo TensorrtExecutionProviderInfo::FromProviderOptions
          .AddAssignmentToReference(tensorrt::provider_option_names::kDLACore, info.dla_core)
          .AddAssignmentToReference(tensorrt::provider_option_names::kDumpSubgraphs, info.dump_subgraphs)
          .AddAssignmentToReference(tensorrt::provider_option_names::kEngineCacheEnable, info.engine_cache_enable)
-          .AddAssignmentToReference(tensorrt::provider_option_names::kCachePath, info.engine_cache_path)
+          .AddAssignmentToReference(tensorrt::provider_option_names::kEngineCachePath, info.engine_cache_path)
          .AddAssignmentToReference(tensorrt::provider_option_names::kDecryptionEnable, info.engine_decryption_enable)
          .AddAssignmentToReference(tensorrt::provider_option_names::kDecryptionLibPath, info.engine_decryption_lib_path)
          .AddAssignmentToReference(tensorrt::provider_option_names::kForceSequentialEngineBuild, info.force_sequential_engine_build)
          .AddAssignmentToReference(tensorrt::provider_option_names::kContextMemorySharingEnable, info.context_memory_sharing_enable)
          .AddAssignmentToReference(tensorrt::provider_option_names::kLayerNormFP32Fallback, info.layer_norm_fp32_fallback)
          .AddAssignmentToReference(tensorrt::provider_option_names::kTimingCacheEnable, info.timing_cache_enable)
+          .AddAssignmentToReference(tensorrt::provider_option_names::kTimingCachePath, info.timing_cache_path)
          .AddAssignmentToReference(tensorrt::provider_option_names::kForceTimingCacheMatch, info.force_timing_cache)
          .AddAssignmentToReference(tensorrt::provider_option_names::kDetailedBuildLog, info.detailed_build_log)
          .AddAssignmentToReference(tensorrt::provider_option_names::kBuildHeuristics, info.build_heuristics_enable)
@ -115,7 +117,7 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const TensorrtE
      {tensorrt::provider_option_names::kDLACore, MakeStringWithClassicLocale(info.dla_core)},
      {tensorrt::provider_option_names::kDumpSubgraphs, MakeStringWithClassicLocale(info.dump_subgraphs)},
      {tensorrt::provider_option_names::kEngineCacheEnable, MakeStringWithClassicLocale(info.engine_cache_enable)},
-      {tensorrt::provider_option_names::kCachePath, MakeStringWithClassicLocale(info.engine_cache_path)},
+      {tensorrt::provider_option_names::kEngineCachePath, MakeStringWithClassicLocale(info.engine_cache_path)},
      {tensorrt::provider_option_names::kDecryptionEnable, MakeStringWithClassicLocale(info.engine_decryption_enable)},
      {tensorrt::provider_option_names::kDecryptionLibPath, MakeStringWithClassicLocale(info.engine_decryption_lib_path)},
      {tensorrt::provider_option_names::kForceSequentialEngineBuild, MakeStringWithClassicLocale(info.force_sequential_engine_build)},
@ -123,6 +125,7 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const TensorrtE
      {tensorrt::provider_option_names::kContextMemorySharingEnable, MakeStringWithClassicLocale(info.context_memory_sharing_enable)},
      {tensorrt::provider_option_names::kLayerNormFP32Fallback, MakeStringWithClassicLocale(info.layer_norm_fp32_fallback)},
      {tensorrt::provider_option_names::kTimingCacheEnable, MakeStringWithClassicLocale(info.timing_cache_enable)},
+      {tensorrt::provider_option_names::kTimingCachePath, MakeStringWithClassicLocale(info.timing_cache_path)},
      {tensorrt::provider_option_names::kForceTimingCacheMatch, MakeStringWithClassicLocale(info.force_timing_cache)},
      {tensorrt::provider_option_names::kDetailedBuildLog, MakeStringWithClassicLocale(info.detailed_build_log)},
      {tensorrt::provider_option_names::kBuildHeuristics, MakeStringWithClassicLocale(info.build_heuristics_enable)},
@ -142,7 +145,8 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const TensorrtE
 ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const OrtTensorRTProviderOptionsV2& info) {
  auto empty_if_null = [](const char* s) { return s != nullptr ? std::string{s} : std::string{}; };
  const std::string kInt8CalibTable_ = empty_if_null(info.trt_int8_calibration_table_name);
-  const std::string kCachePath_ = empty_if_null(info.trt_engine_cache_path);
+  const std::string kEngineCachePath_ = empty_if_null(info.trt_engine_cache_path);
+  const std::string kTimingCachePath_ = empty_if_null(info.trt_timing_cache_path);
  const std::string kTacticSources_ = empty_if_null(info.trt_tactic_sources);
  const std::string kDecryptionLibPath_ = empty_if_null(info.trt_engine_decryption_lib_path);
  const std::string kExtraPluginLibPaths_ = empty_if_null(info.trt_extra_plugin_lib_paths);
@ -164,13 +168,14 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const OrtTensor
      {tensorrt::provider_option_names::kDLACore, MakeStringWithClassicLocale(info.trt_dla_core)},
      {tensorrt::provider_option_names::kDumpSubgraphs, MakeStringWithClassicLocale(info.trt_dump_subgraphs)},
      {tensorrt::provider_option_names::kEngineCacheEnable, MakeStringWithClassicLocale(info.trt_engine_cache_enable)},
-      {tensorrt::provider_option_names::kCachePath, kCachePath_},
+      {tensorrt::provider_option_names::kEngineCachePath, kEngineCachePath_},
      {tensorrt::provider_option_names::kDecryptionEnable, MakeStringWithClassicLocale(info.trt_engine_decryption_enable)},
      {tensorrt::provider_option_names::kDecryptionLibPath, kDecryptionLibPath_},
      {tensorrt::provider_option_names::kForceSequentialEngineBuild, MakeStringWithClassicLocale(info.trt_force_sequential_engine_build)},
      {tensorrt::provider_option_names::kContextMemorySharingEnable, MakeStringWithClassicLocale(info.trt_context_memory_sharing_enable)},
      {tensorrt::provider_option_names::kLayerNormFP32Fallback, MakeStringWithClassicLocale(info.trt_layer_norm_fp32_fallback)},
      {tensorrt::provider_option_names::kTimingCacheEnable, MakeStringWithClassicLocale(info.trt_timing_cache_enable)},
+      {tensorrt::provider_option_names::kTimingCachePath, kTimingCachePath_},
      {tensorrt::provider_option_names::kForceTimingCacheMatch, MakeStringWithClassicLocale(info.trt_force_timing_cache)},
      {tensorrt::provider_option_names::kDetailedBuildLog, MakeStringWithClassicLocale(info.trt_detailed_build_log)},
      {tensorrt::provider_option_names::kBuildHeuristics, MakeStringWithClassicLocale(info.trt_build_heuristics_enable)},
@ -204,6 +209,27 @@ void TensorrtExecutionProviderInfo::UpdateProviderOptions(void* provider_options
  if (provider_options == nullptr) {
    return;
  }
+  auto copy_string_if_needed = [&](std::string& s_in) {
+    if (string_copy) {
+      char* dest = nullptr;
+      auto str_size = s_in.size();
+      if (str_size == 0) {
+        return (const char*)nullptr;
+      } else {
+        dest = new char[str_size + 1];
+#ifdef _MSC_VER
+        strncpy_s(dest, str_size + 1, s_in.c_str(), str_size);
+#else
+        strncpy(dest, s_in.c_str(), str_size);
+#endif
+        dest[str_size] = '\0';
+        return (const char*)dest;
+      }
+    } else {
+      return s_in.c_str();
+    }
+  };
+
  TensorrtExecutionProviderInfo internal_options = onnxruntime::TensorrtExecutionProviderInfo::FromProviderOptions(options);
  auto& trt_provider_options_v2 = *reinterpret_cast<OrtTensorRTProviderOptionsV2*>(provider_options);
  trt_provider_options_v2.device_id = internal_options.device_id;
@ -220,24 +246,7 @@ void TensorrtExecutionProviderInfo::UpdateProviderOptions(void* provider_options
  trt_provider_options_v2.trt_fp16_enable = internal_options.fp16_enable;
  trt_provider_options_v2.trt_int8_enable = internal_options.int8_enable;

-  if (string_copy) {
-    char* dest = nullptr;
-    auto str_size = internal_options.int8_calibration_table_name.size();
-    if (str_size == 0) {
-      trt_provider_options_v2.trt_int8_calibration_table_name = nullptr;
-    } else {
-      dest = new char[str_size + 1];
-#ifdef _MSC_VER
-      strncpy_s(dest, str_size + 1, internal_options.int8_calibration_table_name.c_str(), str_size);
-#else
-      strncpy(dest, internal_options.int8_calibration_table_name.c_str(), str_size);
-#endif
-      dest[str_size] = '\0';
-      trt_provider_options_v2.trt_int8_calibration_table_name = (const char*)dest;
-    }
-  } else {
-    trt_provider_options_v2.trt_int8_calibration_table_name = internal_options.int8_calibration_table_name.c_str();
-  }
+  trt_provider_options_v2.trt_int8_calibration_table_name = copy_string_if_needed(internal_options.int8_calibration_table_name);

  trt_provider_options_v2.trt_int8_use_native_calibration_table = internal_options.int8_use_native_calibration_table;
  trt_provider_options_v2.trt_dla_enable = internal_options.dla_enable;
@ -245,45 +254,12 @@ void TensorrtExecutionProviderInfo::UpdateProviderOptions(void* provider_options
  trt_provider_options_v2.trt_dump_subgraphs = internal_options.dump_subgraphs;
  trt_provider_options_v2.trt_engine_cache_enable = internal_options.engine_cache_enable;

-  if (string_copy) {
-    char* dest = nullptr;
-    auto str_size = internal_options.engine_cache_path.size();
-    if (str_size == 0) {
-      trt_provider_options_v2.trt_engine_cache_path = nullptr;
-    } else {
-      dest = new char[str_size + 1];
-#ifdef _MSC_VER
-      strncpy_s(dest, str_size + 1, internal_options.engine_cache_path.c_str(), str_size);
-#else
-      strncpy(dest, internal_options.engine_cache_path.c_str(), str_size);
-#endif
-      dest[str_size] = '\0';
-      trt_provider_options_v2.trt_engine_cache_path = (const char*)dest;
-    }
-  } else {
-    trt_provider_options_v2.trt_engine_cache_path = internal_options.engine_cache_path.c_str();
-  }
+  trt_provider_options_v2.trt_engine_cache_path = copy_string_if_needed(internal_options.engine_cache_path);
+  trt_provider_options_v2.trt_timing_cache_path = copy_string_if_needed(internal_options.timing_cache_path);

  trt_provider_options_v2.trt_engine_decryption_enable = internal_options.engine_decryption_enable;

-  if (string_copy) {
-    char* dest = nullptr;
-    auto str_size = internal_options.engine_decryption_lib_path.size();
-    if (str_size == 0) {
-      trt_provider_options_v2.trt_engine_decryption_lib_path = nullptr;
-    } else {
-      dest = new char[str_size + 1];
-#ifdef _MSC_VER
-      strncpy_s(dest, str_size + 1, internal_options.engine_decryption_lib_path.c_str(), str_size);
-#else
-      strncpy(dest, internal_options.engine_decryption_lib_path.c_str(), str_size);
-#endif
-      dest[str_size] = '\0';
-      trt_provider_options_v2.trt_engine_decryption_lib_path = (const char*)dest;
-    }
-  } else {
-    trt_provider_options_v2.trt_engine_decryption_lib_path = internal_options.engine_decryption_lib_path.c_str();
-  }
+  trt_provider_options_v2.trt_engine_decryption_lib_path = copy_string_if_needed(internal_options.engine_decryption_lib_path);

  trt_provider_options_v2.trt_force_sequential_engine_build = internal_options.force_sequential_engine_build;
  trt_provider_options_v2.trt_context_memory_sharing_enable = internal_options.context_memory_sharing_enable;
@ -296,100 +272,11 @@ void TensorrtExecutionProviderInfo::UpdateProviderOptions(void* provider_options
  trt_provider_options_v2.trt_builder_optimization_level = internal_options.builder_optimization_level;
  trt_provider_options_v2.trt_auxiliary_streams = internal_options.auxiliary_streams;

-  if (string_copy) {
-    char* dest = nullptr;
-    auto str_size = internal_options.tactic_sources.size();
-    if (str_size == 0) {
-      trt_provider_options_v2.trt_tactic_sources = nullptr;
-    } else {
-      dest = new char[str_size + 1];
-#ifdef _MSC_VER
-      strncpy_s(dest, str_size + 1, internal_options.tactic_sources.c_str(), str_size);
-#else
-      strncpy(dest, internal_options.tactic_sources.c_str(), str_size);
-#endif
-      dest[str_size] = '\0';
-      trt_provider_options_v2.trt_tactic_sources = (const char*)dest;
-    }
-  } else {
-    trt_provider_options_v2.trt_tactic_sources = internal_options.tactic_sources.c_str();
-  }
-
-  if (string_copy) {
-    char* dest = nullptr;
-    auto str_size = internal_options.extra_plugin_lib_paths.size();
-    if (str_size == 0) {
-      trt_provider_options_v2.trt_extra_plugin_lib_paths = nullptr;
-    } else {
-      dest = new char[str_size + 1];
-#ifdef _MSC_VER
-      strncpy_s(dest, str_size + 1, internal_options.extra_plugin_lib_paths.c_str(), str_size);
-#else
-      strncpy(dest, internal_options.extra_plugin_lib_paths.c_str(), str_size);
-#endif
-      dest[str_size] = '\0';
-      trt_provider_options_v2.trt_extra_plugin_lib_paths = (const char*)dest;
-    }
-  } else {
-    trt_provider_options_v2.trt_extra_plugin_lib_paths = internal_options.extra_plugin_lib_paths.c_str();
-  }
-
-  if (string_copy) {
-    char* dest = nullptr;
-    auto str_size = internal_options.profile_min_shapes.size();
-    if (str_size == 0) {
-      trt_provider_options_v2.trt_profile_min_shapes = nullptr;
-    } else {
-      dest = new char[str_size + 1];
-#ifdef _MSC_VER
-      strncpy_s(dest, str_size + 1, internal_options.profile_min_shapes.c_str(), str_size);
-#else
-      strncpy(dest, internal_options.profile_min_shapes.c_str(), str_size);
-#endif
-      dest[str_size] = '\0';
-      trt_provider_options_v2.trt_profile_min_shapes = (const char*)dest;
-    }
-  } else {
-    trt_provider_options_v2.trt_profile_min_shapes = internal_options.profile_min_shapes.c_str();
-  }
-
-  if (string_copy) {
-    char* dest = nullptr;
-    auto str_size = internal_options.profile_max_shapes.size();
-    if (str_size == 0) {
-      trt_provider_options_v2.trt_profile_max_shapes = nullptr;
-    } else {
-      dest = new char[str_size + 1];
-#ifdef _MSC_VER
-      strncpy_s(dest, str_size + 1, internal_options.profile_max_shapes.c_str(), str_size);
-#else
-      strncpy(dest, internal_options.profile_max_shapes.c_str(), str_size);
-#endif
-      dest[str_size] = '\0';
-      trt_provider_options_v2.trt_profile_max_shapes = (const char*)dest;
-    }
-  } else {
-    trt_provider_options_v2.trt_profile_max_shapes = internal_options.profile_max_shapes.c_str();
-  }
-
-  if (string_copy) {
-    char* dest = nullptr;
-    auto str_size = internal_options.profile_opt_shapes.size();
-    if (str_size == 0) {
-      trt_provider_options_v2.trt_profile_opt_shapes = nullptr;
-    } else {
-      dest = new char[str_size + 1];
-#ifdef _MSC_VER
-      strncpy_s(dest, str_size + 1, internal_options.profile_opt_shapes.c_str(), str_size);
-#else
-      strncpy(dest, internal_options.profile_opt_shapes.c_str(), str_size);
-#endif
-      dest[str_size] = '\0';
-      trt_provider_options_v2.trt_profile_opt_shapes = (const char*)dest;
-    }
-  } else {
-    trt_provider_options_v2.trt_profile_opt_shapes = internal_options.profile_opt_shapes.c_str();
-  }
+  trt_provider_options_v2.trt_tactic_sources = copy_string_if_needed(internal_options.tactic_sources);
+  trt_provider_options_v2.trt_extra_plugin_lib_paths = copy_string_if_needed(internal_options.extra_plugin_lib_paths);
+  trt_provider_options_v2.trt_profile_min_shapes = copy_string_if_needed(internal_options.profile_min_shapes);
+  trt_provider_options_v2.trt_profile_max_shapes = copy_string_if_needed(internal_options.profile_max_shapes);
+  trt_provider_options_v2.trt_profile_opt_shapes = copy_string_if_needed(internal_options.profile_opt_shapes);

  trt_provider_options_v2.trt_cuda_graph_enable = internal_options.cuda_graph_enable;
 }
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h
@ -38,6 +38,7 @@ struct TensorrtExecutionProviderInfo {
  bool context_memory_sharing_enable{false};
  bool layer_norm_fp32_fallback{false};
  bool timing_cache_enable{false};
+  std::string timing_cache_path{""};
  bool force_timing_cache{false};
  bool detailed_build_log{false};
  bool build_heuristics_enable{false};
--- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
@ -103,6 +103,7 @@ struct Tensorrt_Provider : Provider {
    info.context_memory_sharing_enable = options.trt_context_memory_sharing_enable != 0;
    info.layer_norm_fp32_fallback = options.trt_layer_norm_fp32_fallback != 0;
    info.timing_cache_enable = options.trt_timing_cache_enable != 0;
+    info.timing_cache_path = options.trt_timing_cache_path == nullptr ? "" : options.trt_timing_cache_path;
    info.force_timing_cache = options.trt_force_timing_cache != 0;
    info.detailed_build_log = options.trt_detailed_build_log != 0;
    info.build_heuristics_enable = options.trt_build_heuristics_enable != 0;
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@ -1931,6 +1931,7 @@ ORT_API(void, OrtApis::ReleaseTensorRTProviderOptions, _Frees_ptr_opt_ OrtTensor
  if (ptr != nullptr) {
    delete[] ptr->trt_int8_calibration_table_name;
    delete[] ptr->trt_engine_cache_path;
+    delete[] ptr->trt_timing_cache_path;
    delete[] ptr->trt_engine_decryption_lib_path;
    delete[] ptr->trt_tactic_sources;
    delete[] ptr->trt_extra_plugin_lib_paths;
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@ -479,7 +479,7 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
      // So we need these std::string variables defined here as they will be kept alive for the lifetime of TRT EP and we can still access them from OrtTensorRTProviderOptionsV2 instance.
      // (The reason is string copy is involved, for example params.trt_engine_cache_path = cache_path.c_str() and those std::string variable is referenced by OrtTensorRTProviderOptionsV2 instance
      // and TRT EP instance, so it won't be released.)
-      std::string calibration_table, cache_path, lib_path, trt_tactic_sources, trt_extra_plugin_lib_paths, min_profile, max_profile, opt_profile;
+      std::string calibration_table, cache_path, timing_cache_path, lib_path, trt_tactic_sources, trt_extra_plugin_lib_paths, min_profile, max_profile, opt_profile;
      auto it = provider_options_map.find(type);
      if (it != provider_options_map.end()) {
        OrtTensorRTProviderOptionsV2 params;
@ -623,6 +623,13 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
            } else {
              ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_timing_cache_enable' should be 'True' or 'False'. Default value is 'False'.\n");
            }
+          } else if (option.first == "trt_timing_cache_path") {
+            if (!option.second.empty()) {
+              timing_cache_path = option.second;
+              params.trt_timing_cache_path = timing_cache_path.c_str();
+            } else {
+              ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_timing_cache_path' should be a path string i.e. 'cache_folder/'.\n");
+            }
          } else if (option.first == "trt_force_timing_cache") {
            if (option.second == "True" || option.second == "true") {
              params.trt_force_timing_cache = true;
--- a/onnxruntime/test/perftest/ort_test_session.cc
+++ b/onnxruntime/test/perftest/ort_test_session.cc
@ -6,6 +6,7 @@
 #include <algorithm>
 #include <limits>
 #include <set>
+#include <list>
 #include <type_traits>
 #include <core/session/onnxruntime_cxx_api.h>
 #include "core/session/onnxruntime_session_options_config_keys.h"
@ -100,36 +101,28 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
    const auto& api = Ort::GetApi();
    OrtCUDAProviderOptionsV2* cuda_options;
    Ort::ThrowOnError(api.CreateCUDAProviderOptions(&cuda_options));
-
-    const char* cudnn_conv_algo_search = "cudnn_conv_algo_search";
-    const char* default_conv = "DEFAULT";
-    const char* benchmarking = "EXHAUSTIVE";
-    const char* heuristic = "HEURISTIC";
+    std::vector<const char*> option_keys, option_values;
+    // used to keep all option keys and value strings alive
+    std::list<std::string> buffer;
+    buffer.emplace_back("cudnn_conv_algo_search");
+    option_keys.push_back(buffer.back().c_str());
    switch (performance_test_config.run_config.cudnn_conv_algo) {
      case 0:
-        Ort::ThrowOnError(
-            api.UpdateCUDAProviderOptions(cuda_options, &cudnn_conv_algo_search, &benchmarking, 1));
+        buffer.emplace_back("EXHAUSTIVE");
        break;
      case 1:
-        Ort::ThrowOnError(
-            api.UpdateCUDAProviderOptions(cuda_options, &cudnn_conv_algo_search, &heuristic, 1));
+        buffer.emplace_back("HEURISTIC");
        break;
      default:
-        Ort::ThrowOnError(
-            api.UpdateCUDAProviderOptions(cuda_options, &cudnn_conv_algo_search, &default_conv, 1));
+        buffer.emplace_back("DEFAULT");
        break;
    }
+    option_values.push_back(buffer.back().c_str());

-    const char* do_copy_in_default_stream = "do_copy_in_default_stream";
-    if (performance_test_config.run_config.do_cuda_copy_in_separate_stream) {
-      const char* v = "1";
-      Ort::ThrowOnError(
-          api.UpdateCUDAProviderOptions(cuda_options, &do_copy_in_default_stream, &v, 1));
-    } else {
-      const char* v = "0";
-      Ort::ThrowOnError(
-          api.UpdateCUDAProviderOptions(cuda_options, &do_copy_in_default_stream, &v, 1));
-    }
+    buffer.emplace_back("do_copy_in_default_stream");
+    option_keys.push_back(buffer.back().c_str());
+    buffer.emplace_back(performance_test_config.run_config.do_cuda_copy_in_separate_stream ? "1" : "0");
+    option_values.push_back(buffer.back().c_str());

 #ifdef _MSC_VER
    std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string);
@ -148,51 +141,34 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
            "[ERROR] [CUDA] Use a '|' to separate the key and value for the run-time option you are trying to use.\n");
      }

-      auto key = token.substr(0, pos);
-      auto value = token.substr(pos + 1);
-      auto key_p = key.c_str();
-      auto value_p = value.c_str();
-      Ort::ThrowOnError(
-          api.UpdateCUDAProviderOptions(cuda_options, &key_p, &value_p, 1));
+      buffer.emplace_back(token.substr(0, pos));
+      option_keys.push_back(buffer.back().c_str());
+      buffer.emplace_back(token.substr(pos + 1));
+      option_values.push_back(buffer.back().c_str());
    }

+    Ort::Status status(api.UpdateCUDAProviderOptions(cuda_options,
+                                                     option_keys.data(), option_values.data(), option_keys.size()));
+    if (!status.IsOK()) {
+      OrtAllocator* allocator;
+      char* options;
+      Ort::ThrowOnError(api.GetAllocatorWithDefaultOptions(&allocator));
+      Ort::ThrowOnError(api.GetCUDAProviderOptionsAsString(cuda_options, allocator, &options));
+      ORT_THROW("[ERROR] [CUDA] Configuring the CUDA options failed with message: ", status.GetErrorMessage(),
+                "\nSupported options are:\n", options);
+    }
    session_options.AppendExecutionProvider_CUDA_V2(*cuda_options);
 #else
    ORT_THROW("CUDA is not supported in this build\n");
 #endif
  } else if (provider_name == onnxruntime::kTensorrtExecutionProvider) {
 #ifdef USE_TENSORRT
-    int device_id = 0;
-    int trt_max_partition_iterations = 1000;
-    int trt_min_subgraph_size = 1;
-    size_t trt_max_workspace_size = 1 << 30;
-    bool trt_fp16_enable = false;
-    bool trt_int8_enable = false;
-    std::string trt_int8_calibration_table_name = "";
-    bool trt_int8_use_native_calibration_table = false;
-    bool trt_dla_enable = false;
-    int trt_dla_core = 0;
-    bool trt_dump_subgraphs = false;
-    bool trt_engine_cache_enable = false;
-    std::string trt_engine_cache_path = "";
-    bool trt_engine_decryption_enable = false;
-    std::string trt_engine_decryption_lib_path = "";
-    bool trt_force_sequential_engine_build = false;
-    bool trt_context_memory_sharing_enable = false;
-    bool trt_layer_norm_fp32_fallback = false;
-    bool trt_timing_cache_enable = false;
-    bool trt_force_timing_cache = false;
-    bool trt_detailed_build_log = false;
-    bool trt_build_heuristics_enable = false;
-    bool trt_sparsity_enable = false;
-    int trt_builder_optimization_level = 3;
-    int trt_auxiliary_streams = -1;
-    std::string trt_tactic_sources = "";
-    std::string trt_extra_plugin_lib_paths = "";
-    std::string trt_profile_min_shapes = "";
-    std::string trt_profile_max_shapes = "";
-    std::string trt_profile_opt_shapes = "";
-    bool trt_cuda_graph_enable = false;
+    const auto& api = Ort::GetApi();
+    OrtTensorRTProviderOptionsV2* tensorrt_options;
+    Ort::ThrowOnError(api.CreateTensorRTProviderOptions(&tensorrt_options));
+    std::vector<const char*> option_keys, option_values;
+    // used to keep all option keys and value strings alive
+    std::list<std::string> buffer;

 #ifdef _MSC_VER
    std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string);
@ -207,272 +183,31 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
      }
      auto pos = token.find("|");
      if (pos == std::string::npos || pos == 0 || pos == token.length()) {
-        ORT_THROW("[ERROR] [TensorRT] Use a '|' to separate the key and value for the run-time option you are trying to use.\n");
+        ORT_THROW(
+            "[ERROR] [TensorRT] Use a '|' to separate the key and value for the run-time option you are trying to use.\n");
      }

-      auto key = token.substr(0, pos);
-      auto value = token.substr(pos + 1);
-      if (key == "device_id") {
-        if (!value.empty()) {
-          device_id = std::stoi(value);
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'device_id' should be a number.\n");
-        }
-      } else if (key == "trt_max_partition_iterations") {
-        if (!value.empty()) {
-          trt_max_partition_iterations = std::stoi(value);
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_partition_iterations' should be a number.\n");
-        }
-      } else if (key == "trt_min_subgraph_size") {
-        if (!value.empty()) {
-          trt_min_subgraph_size = std::stoi(value);
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_min_subgraph_size' should be a number.\n");
-        }
-      } else if (key == "trt_max_workspace_size") {
-        if (!value.empty()) {
-          trt_max_workspace_size = std::stoull(value);
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_workspace_size' should be a number.\n");
-        }
-      } else if (key == "trt_fp16_enable") {
-        if (value == "true" || value == "True") {
-          trt_fp16_enable = true;
-        } else if (value == "false" || value == "False") {
-          trt_fp16_enable = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_fp16_enable' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_int8_enable") {
-        if (value == "true" || value == "True") {
-          trt_int8_enable = true;
-        } else if (value == "false" || value == "False") {
-          trt_int8_enable = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_enable' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_int8_calibration_table_name") {
-        if (!value.empty()) {
-          trt_int8_calibration_table_name = value;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_calibration_table_name' should be a non-empty string.\n");
-        }
-      } else if (key == "trt_int8_use_native_calibration_table") {
-        if (value == "true" || value == "True") {
-          trt_int8_use_native_calibration_table = true;
-        } else if (value == "false" || value == "False") {
-          trt_int8_use_native_calibration_table = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_use_native_calibration_table' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_dla_enable") {
-        if (value == "true" || value == "True") {
-          trt_dla_enable = true;
-        } else if (value == "false" || value == "False") {
-          trt_dla_enable = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_enable' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_dla_core") {
-        if (!value.empty()) {
-          trt_dla_core = std::stoi(value);
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_core' should be a number.\n");
-        }
-      } else if (key == "trt_dump_subgraphs") {
-        if (value == "true" || value == "True") {
-          trt_dump_subgraphs = true;
-        } else if (value == "false" || value == "False") {
-          trt_dump_subgraphs = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dump_subgraphs' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_engine_cache_enable") {
-        if (value == "true" || value == "True") {
-          trt_engine_cache_enable = true;
-        } else if (value == "false" || value == "False") {
-          trt_engine_cache_enable = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_enable' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_engine_cache_path") {
-        if (!value.empty()) {
-          trt_engine_cache_path = value;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_path' should be a non-empty string.\n");
-        }
-      } else if (key == "trt_engine_decryption_enable") {
-        if (value == "true" || value == "True") {
-          trt_engine_decryption_enable = true;
-        } else if (value == "false" || value == "False") {
-          trt_engine_decryption_enable = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_enable' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_engine_decryption_lib_path") {
-        if (!value.empty()) {
-          trt_engine_decryption_lib_path = value;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_lib_path' should be a non-empty string.\n");
-        }
-      } else if (key == "trt_force_sequential_engine_build") {
-        if (value == "true" || value == "True") {
-          trt_force_sequential_engine_build = true;
-        } else if (value == "false" || value == "False") {
-          trt_force_sequential_engine_build = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_force_sequential_engine_build' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_context_memory_sharing_enable") {
-        if (value == "true" || value == "True") {
-          trt_context_memory_sharing_enable = true;
-        } else if (value == "false" || value == "False") {
-          trt_context_memory_sharing_enable = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_context_memory_sharing_enable' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_layer_norm_fp32_fallback") {
-        if (value == "true" || value == "True") {
-          trt_layer_norm_fp32_fallback = true;
-        } else if (value == "false" || value == "False") {
-          trt_layer_norm_fp32_fallback = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_layer_norm_fp32_fallback' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_timing_cache_enable") {
-        if (value == "true" || value == "True") {
-          trt_timing_cache_enable = true;
-        } else if (value == "false" || value == "False") {
-          trt_timing_cache_enable = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_timing_cache_enable' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_force_timing_cache") {
-        if (value == "true" || value == "True") {
-          trt_force_timing_cache = true;
-        } else if (value == "false" || value == "False") {
-          trt_force_timing_cache = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_force_timing_cache' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_detailed_build_log") {
-        if (value == "true" || value == "True") {
-          trt_detailed_build_log = true;
-        } else if (value == "false" || value == "False") {
-          trt_detailed_build_log = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_detailed_build_log' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_build_heuristics_enable") {
-        if (value == "true" || value == "True") {
-          trt_build_heuristics_enable = true;
-        } else if (value == "false" || value == "False") {
-          trt_build_heuristics_enable = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_build_heuristics_enable' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_sparsity_enable") {
-        if (value == "true" || value == "True") {
-          trt_sparsity_enable = true;
-        } else if (value == "false" || value == "False") {
-          trt_sparsity_enable = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_sparsity_enable' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_builder_optimization_level") {
-        if (!value.empty()) {
-          trt_builder_optimization_level = std::stoi(value);
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_builder_optimization_level' should be a number and default to 2.\n");
-        }
-      } else if (key == "trt_auxiliary_streams") {
-        if (!value.empty()) {
-          trt_auxiliary_streams = std::stoi(value);
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_auxiliary_streams' should be a number.\n");
-        }
-      } else if (key == "trt_tactic_sources") {
-        if (!value.empty()) {
-          trt_tactic_sources = value;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_tactic_sources' should be a non-empty string.\n");
-        }
-      } else if (key == "trt_extra_plugin_lib_paths") {
-        if (!value.empty()) {
-          trt_extra_plugin_lib_paths = value;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_extra_plugin_lib_paths' should be a non-empty string.\n");
-        }
-      } else if (key == "trt_profile_min_shapes") {
-        if (!value.empty()) {
-          trt_profile_min_shapes = value;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_profile_min_shapes' should be a non-empty string.\n");
-        }
-      } else if (key == "trt_profile_max_shapes") {
-        if (!value.empty()) {
-          trt_profile_max_shapes = value;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_profile_max_shapes' should be a non-empty string.\n");
-        }
-      } else if (key == "trt_profile_opt_shapes") {
-        if (!value.empty()) {
-          trt_profile_opt_shapes = value;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_profile_opt_shapes' should be a non-empty string.\n");
-        }
-      } else if (key == "trt_cuda_graph_enable") {
-        if (value == "true" || value == "True") {
-          trt_cuda_graph_enable = true;
-        } else if (value == "false" || value == "False") {
-          trt_cuda_graph_enable = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_cuda_graph_enable' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else {
-        ORT_THROW("[ERROR] [TensorRT] wrong key type entered. Choose from the following runtime key options that are available for TensorRT. ['device_id', 'trt_max_partition_iterations', 'trt_min_subgraph_size', 'trt_max_workspace_size', 'trt_fp16_enable', 'trt_int8_enable', 'trt_int8_calibration_table_name', 'trt_int8_use_native_calibration_table', 'trt_dla_enable', 'trt_dla_core', 'trt_dump_subgraphs', 'trt_engine_cache_enable', 'trt_engine_cache_path', 'trt_engine_decryption_enable', 'trt_engine_decryption_lib_path', 'trt_force_sequential_engine_build', 'trt_context_memory_sharing_enable', 'trt_layer_norm_fp32_fallback', 'trt_timing_cache_enable', 'trt_force_timing_cache', 'trt_detailed_build_log', 'trt_build_heuristics_enable', 'trt_sparsity_enable', 'trt_builder_optimization_level', 'trt_auxiliary_streams', 'trt_tactic_sources', 'trt_extra_plugin_lib_paths', 'trt_profile_min_shapes', 'trt_profile_max_shapes', 'trt_profile_opt_shapes', 'trt_cuda_graph_enable'] \n");
-      }
+      buffer.emplace_back(token.substr(0, pos));
+      option_keys.push_back(buffer.back().c_str());
+      buffer.emplace_back(token.substr(pos + 1));
+      option_values.push_back(buffer.back().c_str());
    }
-    OrtTensorRTProviderOptionsV2 tensorrt_options;
-    tensorrt_options.device_id = device_id;
-    tensorrt_options.has_user_compute_stream = 0;
-    tensorrt_options.user_compute_stream = nullptr;
-    tensorrt_options.trt_max_partition_iterations = trt_max_partition_iterations;
-    tensorrt_options.trt_min_subgraph_size = trt_min_subgraph_size;
-    tensorrt_options.trt_max_workspace_size = trt_max_workspace_size;
-    tensorrt_options.trt_fp16_enable = trt_fp16_enable;
-    tensorrt_options.trt_int8_enable = trt_int8_enable;
-    tensorrt_options.trt_int8_calibration_table_name = trt_int8_calibration_table_name.c_str();
-    tensorrt_options.trt_int8_use_native_calibration_table = trt_int8_use_native_calibration_table;
-    tensorrt_options.trt_dla_enable = trt_dla_enable;
-    tensorrt_options.trt_dla_core = trt_dla_core;
-    tensorrt_options.trt_dump_subgraphs = trt_dump_subgraphs;
-    tensorrt_options.trt_engine_cache_enable = trt_engine_cache_enable;
-    tensorrt_options.trt_engine_cache_path = trt_engine_cache_path.c_str();
-    tensorrt_options.trt_engine_decryption_enable = trt_engine_decryption_enable;
-    tensorrt_options.trt_engine_decryption_lib_path = trt_engine_decryption_lib_path.c_str();
-    tensorrt_options.trt_force_sequential_engine_build = trt_force_sequential_engine_build;
-    tensorrt_options.trt_context_memory_sharing_enable = trt_context_memory_sharing_enable;
-    tensorrt_options.trt_layer_norm_fp32_fallback = trt_layer_norm_fp32_fallback;
-    tensorrt_options.trt_timing_cache_enable = trt_timing_cache_enable;
-    tensorrt_options.trt_force_timing_cache = trt_force_timing_cache;
-    tensorrt_options.trt_detailed_build_log = trt_detailed_build_log;
-    tensorrt_options.trt_build_heuristics_enable = trt_build_heuristics_enable;
-    tensorrt_options.trt_sparsity_enable = trt_sparsity_enable;
-    tensorrt_options.trt_builder_optimization_level = trt_builder_optimization_level;
-    tensorrt_options.trt_auxiliary_streams = trt_auxiliary_streams;
-    tensorrt_options.trt_tactic_sources = trt_tactic_sources.c_str();
-    tensorrt_options.trt_extra_plugin_lib_paths = trt_extra_plugin_lib_paths.c_str();
-    tensorrt_options.trt_profile_min_shapes = trt_profile_min_shapes.c_str();
-    tensorrt_options.trt_profile_max_shapes = trt_profile_max_shapes.c_str();
-    tensorrt_options.trt_profile_opt_shapes = trt_profile_opt_shapes.c_str();
-    tensorrt_options.trt_cuda_graph_enable = trt_cuda_graph_enable;

-    session_options.AppendExecutionProvider_TensorRT_V2(tensorrt_options);
+    Ort::Status status(api.UpdateTensorRTProviderOptions(tensorrt_options,
+                                                         option_keys.data(), option_values.data(), option_keys.size()));
+    if (!status.IsOK()) {
+      OrtAllocator* allocator;
+      char* options;
+      Ort::ThrowOnError(api.GetAllocatorWithDefaultOptions(&allocator));
+      Ort::ThrowOnError(api.GetTensorRTProviderOptionsAsString(tensorrt_options, allocator, &options));
+      ORT_THROW("[ERROR] [TensorRT] Configuring the CUDA options failed with message: ", status.GetErrorMessage(),
+                "\nSupported options are:\n", options);
+    }
+
+    session_options.AppendExecutionProvider_TensorRT_V2(*tensorrt_options);

    OrtCUDAProviderOptions cuda_options;
-    cuda_options.device_id = device_id;
+    cuda_options.device_id = tensorrt_options->device_id;
    cuda_options.cudnn_conv_algo_search = static_cast<OrtCudnnConvAlgoSearch>(performance_test_config.run_config.cudnn_conv_algo);
    cuda_options.do_copy_in_default_stream = !performance_test_config.run_config.do_cuda_copy_in_separate_stream;
    // TODO: Support arena configuration for users of perf test
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@ -590,6 +590,7 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) {
    // uint64_t compilation_without_cache_ms, compilation_with_cache_ms;

    // First session is created with TRT EP with timing cache enabled
+    // Not specifying a trt_timing_cache_path will result in using the working directory
    params.trt_timing_cache_enable = 1;
    {
      // auto start = chrono::steady_clock::now();