change default cudnn_conv_use_max_workspace =1 (#13981)

### Description
Change the default value of cudnn_conv_use_max_workspace to be consistent with ORT Training:
Test results with stable diffusion 1.4:

Latency (Seconds per Query) | T4 | V100 | A100
-- | -- | -- | --
ORT FP32 (Before) | 28.4 | 10.1 | 7.2
ORT FP32 (After) | 26.2 | 8.3 | 4.9
Gain | 8% | 18% | 32%

Latency (Seconds per Query) | T4 | V100 | A100
-- | -- | -- | --
ORT FP16 (Before) | 13.1 | 6.4 | 4.3
ORT FP16 (After) | 9.6 | 3.8 | 2.4
Gain | 27% | 41% | 44%

We can see that there is significant gain after changing the default value. Normal user might not have knowledge for this. It is better to change the default value so that user can get best performance out of box.
This commit is contained in:
Tianlei Wu 2022-12-15 09:09:07 -08:00 committed by GitHub
parent a81faee41e
commit a3cd36dbfb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 6 additions and 5 deletions

View file

@ -56,9 +56,10 @@ struct CUDAExecutionProviderInfo {
// arena config.
OrtArenaCfg* default_memory_arena_cfg{nullptr};
CUDAExecutionProviderExternalAllocatorInfo external_allocator_info{};
// By default use fix workspace size (32M) for Conv algo search, the final algo might not be the best.
// If set to true, try to use as much as possible memory for algo search.
bool cudnn_conv_use_max_workspace{false};
// By default, try to use as much as possible memory for algo search.
// If set to false, use fix workspace size (32M) for Conv algo search, the final algo might not be the best.
bool cudnn_conv_use_max_workspace{true};
bool enable_cuda_graph{false};

View file

@ -1182,7 +1182,7 @@ OrtCUDAProviderOptionsV2 OrtCUDAProviderOptionsToOrtCUDAProviderOptionsV2(const
cuda_options_converted.user_compute_stream = legacy_cuda_options->user_compute_stream;
cuda_options_converted.default_memory_arena_cfg = legacy_cuda_options->default_memory_arena_cfg;
// Use default value as this field is not available in OrtCUDAProviderOptions
cuda_options_converted.cudnn_conv_use_max_workspace = 0;
cuda_options_converted.cudnn_conv_use_max_workspace = 1;
cuda_options_converted.enable_cuda_graph = 0;
cuda_options_converted.cudnn_conv1d_pad_to_nc1d = 0;
@ -1668,7 +1668,7 @@ ORT_API_STATUS_IMPL(OrtApis::CreateCUDAProviderOptions, _Outptr_ OrtCUDAProvider
(*out)->has_user_compute_stream = 0;
(*out)->user_compute_stream = nullptr;
(*out)->default_memory_arena_cfg = nullptr;
(*out)->cudnn_conv_use_max_workspace = 0;
(*out)->cudnn_conv_use_max_workspace = 1;
(*out)->enable_cuda_graph = 0;
(*out)->cudnn_conv1d_pad_to_nc1d = 0;
return nullptr;