From a3cd36dbfbc9b3f33e5abef852752ac4e5472198 Mon Sep 17 00:00:00 2001 From: Tianlei Wu Date: Thu, 15 Dec 2022 09:09:07 -0800 Subject: [PATCH] change default cudnn_conv_use_max_workspace =1 (#13981) ### Description Change the default value of cudnn_conv_use_max_workspace to be consistent with ORT Training: Test results with stable diffusion 1.4: Latency (Seconds per Query) | T4 | V100 | A100 -- | -- | -- | -- ORT FP32 (Before) | 28.4 | 10.1 | 7.2 ORT FP32 (After) | 26.2 | 8.3 | 4.9 Gain | 8% | 18% | 32% Latency (Seconds per Query) | T4 | V100 | A100 -- | -- | -- | -- ORT FP16 (Before) | 13.1 | 6.4 | 4.3 ORT FP16 (After) | 9.6 | 3.8 | 2.4 Gain | 27% | 41% | 44% We can see that there is significant gain after changing the default value. Normal user might not have knowledge for this. It is better to change the default value so that user can get best performance out of box. --- .../core/providers/cuda/cuda_execution_provider_info.h | 7 ++++--- onnxruntime/core/session/provider_bridge_ort.cc | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider_info.h b/onnxruntime/core/providers/cuda/cuda_execution_provider_info.h index cd4d27f78f..534d3354ec 100644 --- a/onnxruntime/core/providers/cuda/cuda_execution_provider_info.h +++ b/onnxruntime/core/providers/cuda/cuda_execution_provider_info.h @@ -56,9 +56,10 @@ struct CUDAExecutionProviderInfo { // arena config. OrtArenaCfg* default_memory_arena_cfg{nullptr}; CUDAExecutionProviderExternalAllocatorInfo external_allocator_info{}; - // By default use fix workspace size (32M) for Conv algo search, the final algo might not be the best. - // If set to true, try to use as much as possible memory for algo search. - bool cudnn_conv_use_max_workspace{false}; + + // By default, try to use as much as possible memory for algo search. + // If set to false, use fix workspace size (32M) for Conv algo search, the final algo might not be the best. + bool cudnn_conv_use_max_workspace{true}; bool enable_cuda_graph{false}; diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 1198a7a3ef..319cb5f7b6 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -1182,7 +1182,7 @@ OrtCUDAProviderOptionsV2 OrtCUDAProviderOptionsToOrtCUDAProviderOptionsV2(const cuda_options_converted.user_compute_stream = legacy_cuda_options->user_compute_stream; cuda_options_converted.default_memory_arena_cfg = legacy_cuda_options->default_memory_arena_cfg; // Use default value as this field is not available in OrtCUDAProviderOptions - cuda_options_converted.cudnn_conv_use_max_workspace = 0; + cuda_options_converted.cudnn_conv_use_max_workspace = 1; cuda_options_converted.enable_cuda_graph = 0; cuda_options_converted.cudnn_conv1d_pad_to_nc1d = 0; @@ -1668,7 +1668,7 @@ ORT_API_STATUS_IMPL(OrtApis::CreateCUDAProviderOptions, _Outptr_ OrtCUDAProvider (*out)->has_user_compute_stream = 0; (*out)->user_compute_stream = nullptr; (*out)->default_memory_arena_cfg = nullptr; - (*out)->cudnn_conv_use_max_workspace = 0; + (*out)->cudnn_conv_use_max_workspace = 1; (*out)->enable_cuda_graph = 0; (*out)->cudnn_conv1d_pad_to_nc1d = 0; return nullptr;