diff --git a/onnxruntime/core/providers/cuda/nn/conv.cc b/onnxruntime/core/providers/cuda/nn/conv.cc
index 644bbe4d2d..00f5e0bb54 100644
--- a/onnxruntime/core/providers/cuda/nn/conv.cc
+++ b/onnxruntime/core/providers/cuda/nn/conv.cc
@@ -33,53 +33,6 @@ REGISTER_KERNEL_TYPED(float)
 REGISTER_KERNEL_TYPED(double)
 REGISTER_KERNEL_TYPED(MLFloat16)
 
-template <typename T>
-const cudnnConvolutionFwdAlgo_t Conv<T>::kAllAlgos[] = {
-    CUDNN_CONVOLUTION_FWD_ALGO_GEMM,
-    CUDNN_CONVOLUTION_FWD_ALGO_FFT,
-    CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING,
-    CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM,
-    CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM,
-    CUDNN_CONVOLUTION_FWD_ALGO_DIRECT,
-    CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD,
-    CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED,
-};
-
-cudnnStatus_t getWorkspaceSize(const CudnnConvState<cudnnConvolutionFwdAlgoPerf_t>& s,
-                               cudnnConvolutionFwdAlgo_t algo, size_t* sz) {
-  return cudnnGetConvolutionForwardWorkspaceSize(
-      s.handle,
-      s.x_tensor,
-      s.w_desc,
-      s.conv_desc,
-      s.y_tensor,
-      algo,
-      sz);
-}
-
-template <typename algo_t>
-size_t getMaxWorkspaceSize(const CudnnConvState<cudnnConvolutionFwdAlgoPerf_t>& s,
-                           const algo_t* algo, int n_algo) {
-  size_t max_ws_size = 0;
-
-  // TODO: get maximum available size from memory areana
-
-  size_t free, total;
-  CUDA_CALL_THROW(cudaMemGetInfo(&free, &total));
-  // Assuming 10% of fragmentation
-  free = static_cast<size_t>(static_cast<double>(free) * 0.9);
-
-  for (int i = 0; i < n_algo; i++) {
-    cudnnStatus_t err;
-    size_t sz;
-    err = getWorkspaceSize(s, algo[i], &sz);
-    if (CUDNN_STATUS_SUCCESS != err || sz == 0 || sz < max_ws_size || sz > free)
-      continue;
-    max_ws_size = sz;
-  }
-  return max_ws_size;
-}
-
 Status SliceOutUnwantedOutputSection(cudaStream_t stream,
                                      const void* input_data, const std::vector<int64_t>& input_dims,
                                      void* output_data,
@@ -260,12 +213,9 @@ Status Conv<T>::UpdateState(OpKernelContext* context, bool bias_expected) const
       ORT_ENFORCE(cudnn_conv_algo > -1 && cudnn_conv_algo < 3, "cudnn_conv_algo should be 0, 1 or 2, but got ", cudnn_conv_algo);
       switch (cudnn_conv_algo) {
         case 0: {
-          static constexpr int num_algos = CUDNN_CONVOLUTION_FWD_ALGO_COUNT;
-          size_t max_ws_size = getMaxWorkspaceSize(s_, kAllAlgos, num_algos);
-          IAllocatorUniquePtr<void> algo_search_workspace = GetScratchBuffer<void>(max_ws_size);
-
+          IAllocatorUniquePtr<void> algo_search_workspace = GetScratchBuffer<void>(AlgoSearchWorkspaceSize);
           CUDNN_RETURN_IF_ERROR(cudnnFindConvolutionForwardAlgorithmEx(
-              s_.handle,
+              CudnnHandle(),
               s_.x_tensor,
               s_.x_data,
               s_.w_desc,
@@ -277,12 +227,12 @@ Status Conv<T>::UpdateState(OpKernelContext* context, bool bias_expected) const
               &algo_count,  // returnedAlgoCount
               &perf,
               algo_search_workspace.get(),
-              max_ws_size));
+              AlgoSearchWorkspaceSize));
           break;
         }
         case 1:
           CUDNN_RETURN_IF_ERROR(cudnnGetConvolutionForwardAlgorithm_v7(
-              s_.handle,
+              CudnnHandle(),
               s_.x_tensor,
               s_.w_desc,
               s_.conv_desc,
@@ -294,7 +244,14 @@ Status Conv<T>::UpdateState(OpKernelContext* context, bool bias_expected) const
 
         default:
           perf.algo = kDefaultConvAlgo;
-          CUDNN_RETURN_IF_ERROR(getWorkspaceSize(s_, perf.algo, &perf.memory));
+          CUDNN_RETURN_IF_ERROR(cudnnGetConvolutionForwardWorkspaceSize(
+              CudnnHandle(),
+              s_.x_tensor,
+              s_.w_desc,
+              s_.conv_desc,
+              s_.y_tensor,
+              perf.algo,
+              &perf.memory));
           if (std::is_same<T, MLFloat16>::value) {
             perf.mathType = CUDNN_TENSOR_OP_MATH;
           } else {
@@ -333,7 +290,7 @@ Status Conv<T>::ComputeInternal(OpKernelContext* context) const {
   const auto alpha = Consts<CudaT>::One;
   const auto beta = Consts<CudaT>::Zero;
   IAllocatorUniquePtr<void> workspace = GetWorkSpace();
-  CUDNN_RETURN_IF_ERROR(cudnnConvolutionForward(s_.handle,
+  CUDNN_RETURN_IF_ERROR(cudnnConvolutionForward(CudnnHandle(),
                                                 &alpha,
                                                 s_.x_tensor,
                                                 s_.x_data,
@@ -347,7 +304,7 @@ Status Conv<T>::ComputeInternal(OpKernelContext* context) const {
                                                 s_.y_tensor,
                                                 s_.y_data));
   if (nullptr != s_.b_data) {
-    CUDNN_RETURN_IF_ERROR(cudnnAddTensor(s_.handle, &alpha, s_.b_tensor, s_.b_data,
+    CUDNN_RETURN_IF_ERROR(cudnnAddTensor(CudnnHandle(), &alpha, s_.b_tensor, s_.b_data,
                                          &alpha, s_.y_tensor, s_.y_data));
   }
   // To deal with asymmetric padding, we may have over-padded on one or both sides of the spatial dimensions
diff --git a/onnxruntime/core/providers/cuda/nn/conv.h b/onnxruntime/core/providers/cuda/nn/conv.h
index ab6647941b..5abaa1f595 100644
--- a/onnxruntime/core/providers/cuda/nn/conv.h
+++ b/onnxruntime/core/providers/cuda/nn/conv.h
@@ -111,8 +111,6 @@ constexpr size_t MAX_CACHED_ALGO_PERF_RESULTS = 10000;
 
 template <typename AlgoPerfType>
 struct CudnnConvState {
-  cudnnHandle_t handle;
-
   // if x/w dims changed, update algo and cudnnTensors
   std::vector<int64_t> last_x_dims;
   std::vector<int64_t> last_w_dims;
@@ -175,8 +173,6 @@ class Conv : public CudaKernel {
   Conv(const OpKernelInfo& info) : CudaKernel(info), conv_attrs_(info) {
     auto pads_size = conv_attrs_.pads.size();
     ORT_ENFORCE(pads_size % 2 == 0);
-
-    s_.handle = CudnnHandle();
   }
 
   Status ComputeInternal(OpKernelContext* context) const override;
@@ -190,7 +186,6 @@ class Conv : public CudaKernel {
   ConvAttributes conv_attrs_;
   mutable CudnnConvState<cudnnConvolutionFwdAlgoPerf_t> s_;
   constexpr static auto kDefaultConvAlgo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
-  static const cudnnConvolutionFwdAlgo_t kAllAlgos[];
 };
 
 Status SliceOutUnwantedOutputSection(cudaStream_t stream,