diff --git a/include/onnxruntime/core/providers/cuda/cuda_provider_options.h b/include/onnxruntime/core/providers/cuda/cuda_provider_options.h
index 084e66fd81..eaf0e5337b 100644
--- a/include/onnxruntime/core/providers/cuda/cuda_provider_options.h
+++ b/include/onnxruntime/core/providers/cuda/cuda_provider_options.h
@@ -29,6 +29,7 @@ struct OrtCUDAProviderOptionsV2 {
   int cudnn_conv1d_pad_to_nc1d = 0;                                                                            // flag specifying if pad Conv1D's input [N,C,D] to [N,C,1,D] or [N,C,D,1].
   int tunable_op_enable = 0;                                                                                   // flag specifying if TunableOp is enabled.
   int tunable_op_tuning_enable = 0;                                                                            // flag specifying if TunableOp is enabled for tuning, this relies on TunableOp is enabled.
+  int tunable_op_max_tuning_duration_ms = 0;                                                                   // Max tuning duration time limit for TunableOp.
   int enable_skip_layer_norm_strict_mode = 0;                                                                  // flag specifying if SkipLayerNorm is in strict mode. If true, use LayerNormalization kernel.
                                                                                                                // The strict mode has better accuracy but lower performance.
 };
diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index 0be5f5e8b4..3c6df85c04 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -406,7 +406,8 @@ typedef struct OrtCUDAProviderOptions {
         user_compute_stream{},
         default_memory_arena_cfg{},
         tunable_op_enable{false},
-        tunable_op_tuning_enable{false} {}
+        tunable_op_tuning_enable{false},
+        tunable_op_max_tuning_duration_ms{} {}
 #endif
 
   /** \brief CUDA device Id
@@ -469,6 +470,11 @@ typedef struct OrtCUDAProviderOptions {
    */
   int tunable_op_tuning_enable;
 
+  /** \brief Max tuning duration time limit for each instance of TunableOp.
+   *   Defaults to 0 to disable the limit.
+   */
+  int tunable_op_max_tuning_duration_ms;
+
 } OrtCUDAProviderOptions;
 
 /** \brief ROCM Provider Options
@@ -487,7 +493,8 @@ typedef struct OrtROCMProviderOptions {
         user_compute_stream{},
         default_memory_arena_cfg{},
         tunable_op_enable{false},
-        tunable_op_tuning_enable{false} {}
+        tunable_op_tuning_enable{false},
+        tunable_op_max_tuning_duration_ms{} {}
 #endif
 
   /** \brief ROCM device Id
@@ -549,6 +556,11 @@ typedef struct OrtROCMProviderOptions {
    */
   int tunable_op_tuning_enable;
 
+  /** \brief Max tuning duration time limit for each instance of TunableOp.
+   *   Defaults to 0 to disable the limit.
+   */
+  int tunable_op_max_tuning_duration_ms;
+
 } OrtROCMProviderOptions;
 
 /** \brief TensorRT Provider Options
diff --git a/onnxruntime/core/framework/tunable.h b/onnxruntime/core/framework/tunable.h
index 7c4467d348..3c3e957f75 100644
--- a/onnxruntime/core/framework/tunable.h
+++ b/onnxruntime/core/framework/tunable.h
@@ -209,14 +209,13 @@ class TunableOp {
 
  private:
   static void WarmUp(Op<ParamsT>& op, const ParamsT* param) {
-    constexpr const int num_iter = 4;
+    constexpr const int num_iter = 1;
     for (int i = 0; i < num_iter; i++) {
       ORT_THROW_IF_ERROR(op(param));
     }
   }
 
-  static double Profile(Op<ParamsT>& op, const ParamsT* param) {
-    constexpr const int num_iter = 100;
+  static double Profile(Op<ParamsT>& op, const ParamsT* param, int num_iter) {
     TimerT timer{param->Stream()};
     timer.Start();
     for (int i = 0; i < num_iter; i++) {
@@ -242,12 +241,16 @@ class TunableOp {
   }
 
   int FindFastestImpl(const ParamsT* params, const std::vector<Op<ParamsT>>& candidates) {
+    ITuningContext* ctx = params->TuningContext();
     auto op_sig = Signature();
     auto param_sig = params->Signature();
     LOGS_DEFAULT(VERBOSE) << "FindFastestImpl for " << op_sig << '(' << param_sig << ')';
     auto min_time = std::numeric_limits<double>::infinity();
     int id = -1;
 
+    constexpr const int max_tuning_iter = 100;
+    constexpr const int approx_num_iter = 3;
+
     for (size_t i = 0; i < candidates.size(); i++) {
       auto& candidate = const_cast<Op<ParamsT>&>(candidates[i]);
       if (!IsSupported(candidate, params)) {
@@ -256,7 +259,13 @@ class TunableOp {
       }
 
       WarmUp(candidate, params);
-      auto time = Profile(candidate, params);
+
+      auto approx_duration = Profile(candidate, params, approx_num_iter);
+      int tuning_iter = std::max(1, int(std::min(double(max_tuning_iter), ctx->GetMaxTuningDurationMs() / approx_duration)));
+
+      LOGS_DEFAULT(VERBOSE) << "FindFastestImpl run instance " << op_sig << '(' << param_sig << ") id=" << i << " " << tuning_iter << " times.";
+
+      auto time = Profile(candidate, params, tuning_iter);
       if (time < min_time) {
         min_time = time;
         id = static_cast<int>(i);
diff --git a/onnxruntime/core/framework/tuning_context.h b/onnxruntime/core/framework/tuning_context.h
index 3fea4cb85f..5c5c595601 100644
--- a/onnxruntime/core/framework/tuning_context.h
+++ b/onnxruntime/core/framework/tuning_context.h
@@ -28,6 +28,8 @@ class ITuningContext {
   virtual void DisableTuning() = 0;
   virtual bool IsTuningEnabled() const = 0;
 
+  virtual void SetMaxTuningDurationMs(int max_duration_ms) = 0;
+  virtual int GetMaxTuningDurationMs() const = 0;
   virtual void EnableTunableOpAndTuning() final {
     EnableTunableOp();
     EnableTuning();
diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider_info.cc b/onnxruntime/core/providers/cuda/cuda_execution_provider_info.cc
index bb840e9de2..e713dfc132 100644
--- a/onnxruntime/core/providers/cuda/cuda_execution_provider_info.cc
+++ b/onnxruntime/core/providers/cuda/cuda_execution_provider_info.cc
@@ -26,6 +26,7 @@ constexpr const char* kEnableCudaGraph = "enable_cuda_graph";
 constexpr const char* kCudnnConv1dPadToNc1d = "cudnn_conv1d_pad_to_nc1d";
 constexpr const char* kTunableOpEnable = "tunable_op_enable";
 constexpr const char* kTunableOpTuningEnable = "tunable_op_tuning_enable";
+constexpr const char* kTunableOpMaxTuningDurationMs = "tunable_op_max_tuning_duration_ms";
 constexpr const char* kEnableSkipLayerNormStrictMode = "enable_skip_layer_norm_strict_mode";
 }  // namespace provider_option_names
 }  // namespace cuda
@@ -108,6 +109,12 @@ CUDAExecutionProviderInfo CUDAExecutionProviderInfo::FromProviderOptions(const P
                 ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value_str, info.tunable_op.tuning_enable));
                 return Status::OK();
               })
+          .AddValueParser(
+              cuda::provider_option_names::kTunableOpMaxTuningDurationMs,
+              [&info](const std::string& value_str) -> Status {
+                ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value_str, info.tunable_op.max_tuning_duration_ms));
+                return Status::OK();
+              })
           .Parse(options));
 
   CUDAExecutionProviderExternalAllocatorInfo alloc_info{alloc, free, empty_cache};
@@ -132,6 +139,7 @@ ProviderOptions CUDAExecutionProviderInfo::ToProviderOptions(const CUDAExecution
       {cuda::provider_option_names::kCudnnConv1dPadToNc1d, MakeStringWithClassicLocale(info.cudnn_conv1d_pad_to_nc1d)},
       {cuda::provider_option_names::kTunableOpEnable, MakeStringWithClassicLocale(info.tunable_op.enable)},
       {cuda::provider_option_names::kTunableOpTuningEnable, MakeStringWithClassicLocale(info.tunable_op.tuning_enable)},
+      {cuda::provider_option_names::kTunableOpMaxTuningDurationMs, MakeStringWithClassicLocale(info.tunable_op.max_tuning_duration_ms)},
       {cuda::provider_option_names::kEnableSkipLayerNormStrictMode, MakeStringWithClassicLocale(info.enable_skip_layer_norm_strict_mode)},
   };
 
@@ -149,6 +157,7 @@ ProviderOptions CUDAExecutionProviderInfo::ToProviderOptions(const OrtCUDAProvid
       {cuda::provider_option_names::kCudnnConv1dPadToNc1d, MakeStringWithClassicLocale(info.cudnn_conv1d_pad_to_nc1d)},
       {cuda::provider_option_names::kTunableOpEnable, MakeStringWithClassicLocale(info.tunable_op_enable)},
       {cuda::provider_option_names::kTunableOpTuningEnable, MakeStringWithClassicLocale(info.tunable_op_tuning_enable)},
+      {cuda::provider_option_names::kTunableOpMaxTuningDurationMs, MakeStringWithClassicLocale(info.tunable_op_max_tuning_duration_ms)},
   };
 
   return options;
diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider_info.h b/onnxruntime/core/providers/cuda/cuda_execution_provider_info.h
index c268518974..789b02b0e1 100644
--- a/onnxruntime/core/providers/cuda/cuda_execution_provider_info.h
+++ b/onnxruntime/core/providers/cuda/cuda_execution_provider_info.h
@@ -40,6 +40,7 @@ namespace cuda {
 struct TunableOpInfo {
   bool enable{false};
   bool tuning_enable{false};
+  int max_tuning_duration_ms{};
 };
 }  // namespace cuda
 
@@ -83,6 +84,7 @@ struct std::hash<::onnxruntime::cuda::TunableOpInfo> {
     size_t seed_and_value{0xbc9f1d34};
     onnxruntime::HashCombine(info.enable, seed_and_value);
     onnxruntime::HashCombine(info.tuning_enable, seed_and_value);
+    onnxruntime::HashCombine(info.max_tuning_duration_ms, seed_and_value);
     return seed_and_value;
   }
 };
diff --git a/onnxruntime/core/providers/cuda/cuda_provider_factory.cc b/onnxruntime/core/providers/cuda/cuda_provider_factory.cc
index 6e8c4d0ff0..d9fd8606e7 100644
--- a/onnxruntime/core/providers/cuda/cuda_provider_factory.cc
+++ b/onnxruntime/core/providers/cuda/cuda_provider_factory.cc
@@ -252,6 +252,7 @@ struct CUDA_Provider : Provider {
     info.cudnn_conv1d_pad_to_nc1d = params->cudnn_conv1d_pad_to_nc1d != 0;
     info.tunable_op.enable = params->tunable_op_enable;
     info.tunable_op.tuning_enable = params->tunable_op_tuning_enable;
+    info.tunable_op.max_tuning_duration_ms = params->tunable_op_max_tuning_duration_ms;
     info.enable_skip_layer_norm_strict_mode = params->enable_skip_layer_norm_strict_mode != 0;
 
     return std::make_shared<CUDAProviderFactory>(info);
diff --git a/onnxruntime/core/providers/cuda/tunable/cuda_tuning_context.cc b/onnxruntime/core/providers/cuda/tunable/cuda_tuning_context.cc
index 4e302a2c66..b427b53fb6 100644
--- a/onnxruntime/core/providers/cuda/tunable/cuda_tuning_context.cc
+++ b/onnxruntime/core/providers/cuda/tunable/cuda_tuning_context.cc
@@ -77,6 +77,14 @@ bool CudaTuningContext::IsTuningEnabled() const {
   return info_->tuning_enable;
 }
 
+void CudaTuningContext::SetMaxTuningDurationMs(int max_duration_ms) {
+  info_->max_tuning_duration_ms = max_duration_ms;
+}
+
+int CudaTuningContext::GetMaxTuningDurationMs() const {
+  return info_->max_tuning_duration_ms > 0 ? info_->max_tuning_duration_ms : std::numeric_limits<int>::max();
+}
+
 TuningResultsManager& CudaTuningContext::GetTuningResultsManager() {
   return manager_;
 }
diff --git a/onnxruntime/core/providers/cuda/tunable/cuda_tuning_context.h b/onnxruntime/core/providers/cuda/tunable/cuda_tuning_context.h
index ec961890e6..2184a5d60b 100644
--- a/onnxruntime/core/providers/cuda/tunable/cuda_tuning_context.h
+++ b/onnxruntime/core/providers/cuda/tunable/cuda_tuning_context.h
@@ -39,6 +39,9 @@ class CudaTuningContext : public ITuningContext {
   void DisableTuning() override;
   bool IsTuningEnabled() const override;
 
+  void SetMaxTuningDurationMs(int max_duration_ms) override;
+  int GetMaxTuningDurationMs() const override;
+
   TuningResultsManager& GetTuningResultsManager() override;
   const TuningResultsManager& GetTuningResultsManager() const override;
 
diff --git a/onnxruntime/core/providers/rocm/rocm_execution_provider_info.cc b/onnxruntime/core/providers/rocm/rocm_execution_provider_info.cc
index 2901c1a83d..91e3aaaa42 100644
--- a/onnxruntime/core/providers/rocm/rocm_execution_provider_info.cc
+++ b/onnxruntime/core/providers/rocm/rocm_execution_provider_info.cc
@@ -23,6 +23,7 @@ constexpr const char* kGpuExternalEmptyCache = "gpu_external_empty_cache";
 constexpr const char* kMiopenConvUseMaxWorkspace = "miopen_conv_use_max_workspace";
 constexpr const char* kTunableOpEnable = "tunable_op_enable";
 constexpr const char* kTunableOpTuningEnable = "tunable_op_tuning_enable";
+constexpr const char* kTunableOpMaxTuningDurationMs = "tunable_op_max_tuning_duration_ms";
 }  // namespace provider_option_names
 }  // namespace rocm
 
@@ -95,6 +96,12 @@ ROCMExecutionProviderInfo ROCMExecutionProviderInfo::FromProviderOptions(const P
                 ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value_str, info.tunable_op.tuning_enable));
                 return Status::OK();
               })
+          .AddValueParser(
+              rocm::provider_option_names::kTunableOpMaxTuningDurationMs,
+              [&info](const std::string& value_str) -> Status {
+                ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value_str, info.tunable_op.max_tuning_duration_ms));
+                return Status::OK();
+              })
           .Parse(options));
 
   ROCMExecutionProviderExternalAllocatorInfo alloc_info{alloc, free, empty_cache};
@@ -116,6 +123,7 @@ ProviderOptions ROCMExecutionProviderInfo::ToProviderOptions(const ROCMExecution
       {rocm::provider_option_names::kMiopenConvUseMaxWorkspace, MakeStringWithClassicLocale(info.miopen_conv_use_max_workspace)},
       {rocm::provider_option_names::kTunableOpEnable, MakeStringWithClassicLocale(info.tunable_op.enable)},
       {rocm::provider_option_names::kTunableOpTuningEnable, MakeStringWithClassicLocale(info.tunable_op.tuning_enable)},
+      {rocm::provider_option_names::kTunableOpMaxTuningDurationMs, MakeStringWithClassicLocale(info.tunable_op.max_tuning_duration_ms)},
   };
 
   return options;
@@ -130,6 +138,7 @@ ProviderOptions ROCMExecutionProviderInfo::ToProviderOptions(const OrtROCMProvid
       {rocm::provider_option_names::kDoCopyInDefaultStream, MakeStringWithClassicLocale(info.do_copy_in_default_stream)},
       {rocm::provider_option_names::kTunableOpEnable, MakeStringWithClassicLocale(info.tunable_op_enable)},
       {rocm::provider_option_names::kTunableOpTuningEnable, MakeStringWithClassicLocale(info.tunable_op_tuning_enable)},
+      {rocm::provider_option_names::kTunableOpMaxTuningDurationMs, MakeStringWithClassicLocale(info.tunable_op_max_tuning_duration_ms)},
   };
 
   return options;
diff --git a/onnxruntime/core/providers/rocm/rocm_execution_provider_info.h b/onnxruntime/core/providers/rocm/rocm_execution_provider_info.h
index 5b1fd09589..e35c0cc0af 100644
--- a/onnxruntime/core/providers/rocm/rocm_execution_provider_info.h
+++ b/onnxruntime/core/providers/rocm/rocm_execution_provider_info.h
@@ -40,6 +40,7 @@ namespace rocm {
 struct TunableOpInfo {
   bool enable{false};
   bool tuning_enable{false};
+  int max_tuning_duration_ms{};
 };
 }  // namespace rocm
 
@@ -76,6 +77,7 @@ struct std::hash<::onnxruntime::rocm::TunableOpInfo> {
     size_t seed_and_value{0xbc9f1d34};
     onnxruntime::HashCombine(info.enable, seed_and_value);
     onnxruntime::HashCombine(info.tuning_enable, seed_and_value);
+    onnxruntime::HashCombine(info.max_tuning_duration_ms, seed_and_value);
     return seed_and_value;
   }
 };
diff --git a/onnxruntime/core/providers/rocm/rocm_provider_factory.cc b/onnxruntime/core/providers/rocm/rocm_provider_factory.cc
index e1a07a3486..f452c1ae82 100644
--- a/onnxruntime/core/providers/rocm/rocm_provider_factory.cc
+++ b/onnxruntime/core/providers/rocm/rocm_provider_factory.cc
@@ -175,6 +175,7 @@ struct ROCM_Provider : Provider {
     info.default_memory_arena_cfg = params->default_memory_arena_cfg;
     info.tunable_op.enable = params->tunable_op_enable;
     info.tunable_op.tuning_enable = params->tunable_op_tuning_enable;
+    info.tunable_op.max_tuning_duration_ms = params->tunable_op_max_tuning_duration_ms;
 
     return std::make_shared<ROCMProviderFactory>(info);
   }
@@ -193,6 +194,7 @@ struct ROCM_Provider : Provider {
     rocm_options.default_memory_arena_cfg = info.default_memory_arena_cfg;
     rocm_options.tunable_op_enable = info.tunable_op.enable;
     rocm_options.tunable_op_tuning_enable = info.tunable_op.tuning_enable;
+    rocm_options.tunable_op_max_tuning_duration_ms = info.tunable_op.max_tuning_duration_ms;
   }
 
   ProviderOptions GetProviderOptions(const void* provider_options) override {
diff --git a/onnxruntime/core/providers/rocm/tunable/rocm_tuning_context.cc b/onnxruntime/core/providers/rocm/tunable/rocm_tuning_context.cc
index 1cc45ce840..4a1b37387b 100644
--- a/onnxruntime/core/providers/rocm/tunable/rocm_tuning_context.cc
+++ b/onnxruntime/core/providers/rocm/tunable/rocm_tuning_context.cc
@@ -114,6 +114,14 @@ bool RocmTuningContext::IsTuningEnabled() const {
   return info_->tuning_enable;
 }
 
+void RocmTuningContext::SetMaxTuningDurationMs(int max_duration_ms) {
+  info_->max_tuning_duration_ms = max_duration_ms;
+}
+
+int RocmTuningContext::GetMaxTuningDurationMs() const {
+  return info_->max_tuning_duration_ms > 0 ? info_->max_tuning_duration_ms : std::numeric_limits<int>::max();
+}
+
 TuningResultsManager& RocmTuningContext::GetTuningResultsManager() {
   return manager_;
 }
diff --git a/onnxruntime/core/providers/rocm/tunable/rocm_tuning_context.h b/onnxruntime/core/providers/rocm/tunable/rocm_tuning_context.h
index d2ddb37dd0..f0feb1cb74 100644
--- a/onnxruntime/core/providers/rocm/tunable/rocm_tuning_context.h
+++ b/onnxruntime/core/providers/rocm/tunable/rocm_tuning_context.h
@@ -41,6 +41,9 @@ class RocmTuningContext : public ITuningContext {
   void DisableTuning() override;
   bool IsTuningEnabled() const override;
 
+  void SetMaxTuningDurationMs(int max_duration_ms) override;
+  int GetMaxTuningDurationMs() const override;
+
   TuningResultsManager& GetTuningResultsManager() override;
   const TuningResultsManager& GetTuningResultsManager() const override;
 
diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
index 68891489d1..f3a424f085 100644
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -2086,6 +2086,7 @@ ORT_API_STATUS_IMPL(OrtApis::CreateROCMProviderOptions, _Outptr_ OrtROCMProvider
   options->default_memory_arena_cfg = nullptr;
   options->tunable_op_enable = 0;
   options->tunable_op_tuning_enable = 0;
+  options->tunable_op_max_tuning_duration_ms = 0;
 
   *out = options.release();
   return nullptr;
diff --git a/onnxruntime/test/framework/tunable_op_test.cc b/onnxruntime/test/framework/tunable_op_test.cc
index 6793b1c49c..4565a99e87 100644
--- a/onnxruntime/test/framework/tunable_op_test.cc
+++ b/onnxruntime/test/framework/tunable_op_test.cc
@@ -57,6 +57,11 @@ class TestTuningContext : public ITuningContext {
   void DisableTuning() override { tuning_enabled_ = false; }
   bool IsTuningEnabled() const override { return tuning_enabled_; }
 
+  void SetMaxTuningDurationMs(int max_duration_ms) override { max_tuning_duration_ms_ = max_duration_ms; }
+  int GetMaxTuningDurationMs() const override {
+    return max_tuning_duration_ms_ > 0 ? max_tuning_duration_ms_ : std::numeric_limits<int>::max();
+  }
+
   TuningResultsManager& GetTuningResultsManager() override { return manager_; }
   const TuningResultsManager& GetTuningResultsManager() const override { return manager_; }
 
@@ -67,6 +72,7 @@ class TestTuningContext : public ITuningContext {
  private:
   bool op_enabled_{false};
   bool tuning_enabled_{false};
+  int max_tuning_duration_ms_{};
   TuningResultsManager manager_{};
   TestTuningResultsValidator validator_{};
 };
@@ -402,6 +408,13 @@ TEST(TunableOp, SelectFastIfTuning) {
   status = op(&params);
   ASSERT_TRUE(status.IsOK());
   ASSERT_EQ(last_run, "FastFull");
+
+  // Also set max_tuning_duration_ms, fast should be selected
+  params.TuningContext()->SetMaxTuningDurationMs(10);
+  status = op(&params);
+  ASSERT_TRUE(status.IsOK());
+  ASSERT_EQ(last_run, "FastFull");
+
 #endif
 }
 
diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py
index b49547a040..3bea82b12c 100644
--- a/onnxruntime/test/python/onnxruntime_test_python.py
+++ b/onnxruntime/test/python/onnxruntime_test_python.py
@@ -259,6 +259,8 @@ class TestInferenceSession(unittest.TestCase):
 
                 test_get_and_set_option_with_values("tunable_op_tuning_enable", ["1", "0"])
 
+                test_get_and_set_option_with_values("tunable_op_max_tuning_duration_ms", ["-1", "1"])
+
                 option["gpu_external_alloc"] = "0"
                 option["gpu_external_free"] = "0"
                 option["gpu_external_empty_cache"] = "0"
@@ -395,6 +397,8 @@ class TestInferenceSession(unittest.TestCase):
 
                 test_get_and_set_option_with_values("tunable_op_tuning_enable", ["1", "0"])
 
+                test_get_and_set_option_with_values("tunable_op_max_tuning_duration_ms", ["-1", "1"])
+
             runRocmOptionsTest()
 
     def testInvalidSetProviders(self):  # noqa: N802
diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc
index d5847ddd4e..da8710eea8 100644
--- a/onnxruntime/test/util/default_providers.cc
+++ b/onnxruntime/test/util/default_providers.cc
@@ -191,6 +191,7 @@ std::unique_ptr<IExecutionProvider> DefaultRocmExecutionProvider(bool test_tunab
   provider_options.do_copy_in_default_stream = true;
   provider_options.tunable_op_enable = test_tunable_op ? 1 : 0;
   provider_options.tunable_op_tuning_enable = test_tunable_op ? 1 : 0;
+  provider_options.tunable_op_max_tuning_duration_ms = 0;
   if (auto factory = RocmProviderFactoryCreator::Create(&provider_options))
     return factory->CreateProvider();
 #endif