change default cudnn_conv_use_max_workspace =1 (#13981)

### Description Change the default value of cudnn_conv_use_max_workspace to be consistent with ORT Training: Test results with stable diffusion 1.4: Latency (Seconds per Query) | T4 | V100 | A100 -- | -- | -- | -- ORT FP32 (Before) | 28.4 | 10.1 | 7.2 ORT FP32 (After) | 26.2 | 8.3 | 4.9 Gain | 8% | 18% | 32% Latency (Seconds per Query) | T4 | V100 | A100 -- | -- | -- | -- ORT FP16 (Before) | 13.1 | 6.4 | 4.3 ORT FP16 (After) | 9.6 | 3.8 | 2.4 Gain | 27% | 41% | 44% We can see that there is significant gain after changing the default value. Normal user might not have knowledge for this. It is better to change the default value so that user can get best performance out of box.
2026-06-06 00:03:22 +00:00 · 2022-12-15 09:09:07 -08:00 · 2022-12-15 09:09:07 -08:00 · a3cd36dbfb
commit a3cd36dbfb
parent a81faee41e
2 changed files with 6 additions and 5 deletions
--- a/onnxruntime/core/providers/cuda/cuda_execution_provider_info.h
+++ b/onnxruntime/core/providers/cuda/cuda_execution_provider_info.h
@ -56,9 +56,10 @@ struct CUDAExecutionProviderInfo {
  // arena config.
  OrtArenaCfg* default_memory_arena_cfg{nullptr};
  CUDAExecutionProviderExternalAllocatorInfo external_allocator_info{};
-  // By default use fix workspace size (32M) for Conv algo search, the final algo might not be the best.
-  // If set to true, try to use as much as possible memory for algo search.
-  bool cudnn_conv_use_max_workspace{false};
+
+  // By default, try to use as much as possible memory for algo search.
+  // If set to false, use fix workspace size (32M) for Conv algo search, the final algo might not be the best.
+  bool cudnn_conv_use_max_workspace{true};

  bool enable_cuda_graph{false};

--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@ -1182,7 +1182,7 @@ OrtCUDAProviderOptionsV2 OrtCUDAProviderOptionsToOrtCUDAProviderOptionsV2(const
  cuda_options_converted.user_compute_stream = legacy_cuda_options->user_compute_stream;
  cuda_options_converted.default_memory_arena_cfg = legacy_cuda_options->default_memory_arena_cfg;
  // Use default value as this field is not available in OrtCUDAProviderOptions
-  cuda_options_converted.cudnn_conv_use_max_workspace = 0;
+  cuda_options_converted.cudnn_conv_use_max_workspace = 1;
  cuda_options_converted.enable_cuda_graph = 0;
  cuda_options_converted.cudnn_conv1d_pad_to_nc1d = 0;

@ -1668,7 +1668,7 @@ ORT_API_STATUS_IMPL(OrtApis::CreateCUDAProviderOptions, _Outptr_ OrtCUDAProvider
  (*out)->has_user_compute_stream = 0;
  (*out)->user_compute_stream = nullptr;
  (*out)->default_memory_arena_cfg = nullptr;
-  (*out)->cudnn_conv_use_max_workspace = 0;
+  (*out)->cudnn_conv_use_max_workspace = 1;
  (*out)->enable_cuda_graph = 0;
  (*out)->cudnn_conv1d_pad_to_nc1d = 0;
  return nullptr;