mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-18 21:21:17 +00:00
[CUDA][ROCm][Training] Fix cuda/rocm provider info hash (#19398)
When I test a new provider option, the training pipeline failed. I found that training uses hash code of provider info to try get provider instance. If a provider option is not used in hashing, the provider instance fetched from cache might have different configuration for that option. Here I fix the hashing to use all provider options (except the default Arena config that cannot be set from python API since training is used with PyTorch in most cases). Fixed a few obvious typo in the touched files. Add regression test cases.
This commit is contained in:
parent
06a84c8a0d
commit
23996bbbbe
5 changed files with 75 additions and 38 deletions
|
|
@ -31,8 +31,9 @@ constexpr const char* kTunableOpEnable = "tunable_op_enable";
|
|||
constexpr const char* kTunableOpTuningEnable = "tunable_op_tuning_enable";
|
||||
constexpr const char* kTunableOpMaxTuningDurationMs = "tunable_op_max_tuning_duration_ms";
|
||||
constexpr const char* kEnableSkipLayerNormStrictMode = "enable_skip_layer_norm_strict_mode";
|
||||
constexpr const char* kPreferNCHWMode = "prefer_nhwc";
|
||||
constexpr const char* KUseEPLevelUnifiedStream = "use_ep_level_unified_stream";
|
||||
constexpr const char* kPreferNHWCMode = "prefer_nhwc";
|
||||
constexpr const char* kUseEPLevelUnifiedStream = "use_ep_level_unified_stream";
|
||||
|
||||
} // namespace provider_option_names
|
||||
} // namespace cuda
|
||||
|
||||
|
|
@ -112,8 +113,8 @@ CUDAExecutionProviderInfo CUDAExecutionProviderInfo::FromProviderOptions(const P
|
|||
.AddAssignmentToReference(cuda::provider_option_names::kEnableCudaGraph, info.enable_cuda_graph)
|
||||
.AddAssignmentToReference(cuda::provider_option_names::kCudnnConv1dPadToNc1d, info.cudnn_conv1d_pad_to_nc1d)
|
||||
.AddAssignmentToReference(cuda::provider_option_names::kEnableSkipLayerNormStrictMode, info.enable_skip_layer_norm_strict_mode)
|
||||
.AddAssignmentToReference(cuda::provider_option_names::kPreferNCHWMode, info.prefer_nhwc)
|
||||
.AddAssignmentToReference(cuda::provider_option_names::KUseEPLevelUnifiedStream, info.use_ep_level_unified_stream)
|
||||
.AddAssignmentToReference(cuda::provider_option_names::kPreferNHWCMode, info.prefer_nhwc)
|
||||
.AddAssignmentToReference(cuda::provider_option_names::kUseEPLevelUnifiedStream, info.use_ep_level_unified_stream)
|
||||
.AddValueParser(
|
||||
cuda::provider_option_names::kTunableOpEnable,
|
||||
[&info](const std::string& value_str) -> Status {
|
||||
|
|
@ -164,8 +165,8 @@ ProviderOptions CUDAExecutionProviderInfo::ToProviderOptions(const CUDAExecution
|
|||
{cuda::provider_option_names::kTunableOpTuningEnable, MakeStringWithClassicLocale(info.tunable_op.tuning_enable)},
|
||||
{cuda::provider_option_names::kTunableOpMaxTuningDurationMs, MakeStringWithClassicLocale(info.tunable_op.max_tuning_duration_ms)},
|
||||
{cuda::provider_option_names::kEnableSkipLayerNormStrictMode, MakeStringWithClassicLocale(info.enable_skip_layer_norm_strict_mode)},
|
||||
{cuda::provider_option_names::kPreferNCHWMode, MakeStringWithClassicLocale(info.prefer_nhwc)},
|
||||
{cuda::provider_option_names::KUseEPLevelUnifiedStream, MakeStringWithClassicLocale(info.use_ep_level_unified_stream)},
|
||||
{cuda::provider_option_names::kPreferNHWCMode, MakeStringWithClassicLocale(info.prefer_nhwc)},
|
||||
{cuda::provider_option_names::kUseEPLevelUnifiedStream, MakeStringWithClassicLocale(info.use_ep_level_unified_stream)},
|
||||
};
|
||||
|
||||
return options;
|
||||
|
|
@ -185,8 +186,8 @@ ProviderOptions CUDAExecutionProviderInfo::ToProviderOptions(const OrtCUDAProvid
|
|||
{cuda::provider_option_names::kTunableOpEnable, MakeStringWithClassicLocale(info.tunable_op_enable)},
|
||||
{cuda::provider_option_names::kTunableOpTuningEnable, MakeStringWithClassicLocale(info.tunable_op_tuning_enable)},
|
||||
{cuda::provider_option_names::kTunableOpMaxTuningDurationMs, MakeStringWithClassicLocale(info.tunable_op_max_tuning_duration_ms)},
|
||||
{cuda::provider_option_names::kPreferNCHWMode, MakeStringWithClassicLocale(info.prefer_nhwc)},
|
||||
{cuda::provider_option_names::KUseEPLevelUnifiedStream, MakeStringWithClassicLocale(info.use_ep_level_unified_stream)},
|
||||
{cuda::provider_option_names::kPreferNHWCMode, MakeStringWithClassicLocale(info.prefer_nhwc)},
|
||||
{cuda::provider_option_names::kUseEPLevelUnifiedStream, MakeStringWithClassicLocale(info.use_ep_level_unified_stream)},
|
||||
};
|
||||
|
||||
return options;
|
||||
|
|
|
|||
|
|
@ -83,12 +83,36 @@ struct CUDAExecutionProviderInfo {
|
|||
} // namespace onnxruntime
|
||||
|
||||
template <>
|
||||
struct std::hash<::onnxruntime::cuda::TunableOpInfo> {
|
||||
size_t operator()(const ::onnxruntime::cuda::TunableOpInfo& info) const {
|
||||
size_t seed_and_value{0xbc9f1d34};
|
||||
onnxruntime::HashCombine(info.enable, seed_and_value);
|
||||
onnxruntime::HashCombine(info.tuning_enable, seed_and_value);
|
||||
onnxruntime::HashCombine(info.max_tuning_duration_ms, seed_and_value);
|
||||
return seed_and_value;
|
||||
struct std::hash<::onnxruntime::CUDAExecutionProviderInfo> {
|
||||
size_t operator()(const ::onnxruntime::CUDAExecutionProviderInfo& info) const {
|
||||
size_t value{0xbc9f1d34}; // seed
|
||||
|
||||
// Bits: device_id (16), arena_extend_strategy/cudnn_conv_algo_search (reserved 2), boolean options (1 each)
|
||||
size_t data = static_cast<size_t>(info.device_id) ^
|
||||
(static_cast<size_t>(info.arena_extend_strategy) << 16) ^
|
||||
(static_cast<size_t>(info.cudnn_conv_algo_search) << 18) ^
|
||||
(static_cast<size_t>(info.do_copy_in_default_stream) << 20) ^
|
||||
(static_cast<size_t>(info.has_user_compute_stream) << 21) ^
|
||||
(static_cast<size_t>(info.cudnn_conv_use_max_workspace) << 22) ^
|
||||
(static_cast<size_t>(info.enable_cuda_graph) << 23) ^
|
||||
(static_cast<size_t>(info.tunable_op.enable) << 24) ^
|
||||
(static_cast<size_t>(info.tunable_op.tuning_enable) << 25) ^
|
||||
(static_cast<size_t>(info.cudnn_conv1d_pad_to_nc1d) << 26) ^
|
||||
(static_cast<size_t>(info.enable_skip_layer_norm_strict_mode) << 27) ^
|
||||
(static_cast<size_t>(info.prefer_nhwc) << 28) ^
|
||||
(static_cast<size_t>(info.use_ep_level_unified_stream) << 29);
|
||||
onnxruntime::HashCombine(data, value);
|
||||
|
||||
onnxruntime::HashCombine(info.gpu_mem_limit, value);
|
||||
onnxruntime::HashCombine(info.tunable_op.max_tuning_duration_ms, value);
|
||||
|
||||
// Memory pointers
|
||||
onnxruntime::HashCombine(reinterpret_cast<size_t>(info.user_compute_stream), value);
|
||||
onnxruntime::HashCombine(reinterpret_cast<size_t>(info.external_allocator_info.alloc), value);
|
||||
onnxruntime::HashCombine(reinterpret_cast<size_t>(info.external_allocator_info.free), value);
|
||||
onnxruntime::HashCombine(reinterpret_cast<size_t>(info.external_allocator_info.empty_cache), value);
|
||||
|
||||
// The default memory arena cfg is not used in hashing right now.
|
||||
return value;
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -74,12 +74,32 @@ struct ROCMExecutionProviderInfo {
|
|||
} // namespace onnxruntime
|
||||
|
||||
template <>
|
||||
struct std::hash<::onnxruntime::rocm::TunableOpInfo> {
|
||||
size_t operator()(const ::onnxruntime::rocm::TunableOpInfo& info) const {
|
||||
size_t seed_and_value{0xbc9f1d34};
|
||||
onnxruntime::HashCombine(info.enable, seed_and_value);
|
||||
onnxruntime::HashCombine(info.tuning_enable, seed_and_value);
|
||||
onnxruntime::HashCombine(info.max_tuning_duration_ms, seed_and_value);
|
||||
return seed_and_value;
|
||||
struct std::hash<::onnxruntime::ROCMExecutionProviderInfo> {
|
||||
size_t operator()(const ::onnxruntime::ROCMExecutionProviderInfo& info) const {
|
||||
size_t value{0xbc9f1d34}; // seed
|
||||
|
||||
// Bits: device_id (16), arena_extend_strategy/miopen_conv_exhaustive_search (reserved 2), boolean options (1 each)
|
||||
size_t data = static_cast<size_t>(info.device_id) ^
|
||||
(static_cast<size_t>(info.arena_extend_strategy) << 16) ^
|
||||
(static_cast<size_t>(info.miopen_conv_exhaustive_search) << 18) ^
|
||||
(static_cast<size_t>(info.do_copy_in_default_stream) << 20) ^
|
||||
(static_cast<size_t>(info.has_user_compute_stream) << 21) ^
|
||||
(static_cast<size_t>(info.miopen_conv_use_max_workspace) << 22) ^
|
||||
(static_cast<size_t>(info.enable_hip_graph) << 23) ^
|
||||
(static_cast<size_t>(info.tunable_op.enable) << 24) ^
|
||||
(static_cast<size_t>(info.tunable_op.tuning_enable) << 25);
|
||||
onnxruntime::HashCombine(data, value);
|
||||
|
||||
onnxruntime::HashCombine(info.gpu_mem_limit, value);
|
||||
onnxruntime::HashCombine(info.tunable_op.max_tuning_duration_ms, value);
|
||||
|
||||
// Memory pointers
|
||||
onnxruntime::HashCombine(reinterpret_cast<size_t>(info.user_compute_stream), value);
|
||||
onnxruntime::HashCombine(reinterpret_cast<size_t>(info.external_allocator_info.alloc), value);
|
||||
onnxruntime::HashCombine(reinterpret_cast<size_t>(info.external_allocator_info.free), value);
|
||||
onnxruntime::HashCombine(reinterpret_cast<size_t>(info.external_allocator_info.empty_cache), value);
|
||||
|
||||
// The default memory arena cfg is not used in hashing right now.
|
||||
return value;
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -414,6 +414,8 @@ class TestInferenceSession(unittest.TestCase):
|
|||
str(option_value),
|
||||
)
|
||||
|
||||
test_get_and_set_option_with_values("enable_cuda_graph", ["1", "0"])
|
||||
|
||||
test_get_and_set_option_with_values("arena_extend_strategy", ["kNextPowerOfTwo", "kSameAsRequested"])
|
||||
|
||||
test_get_and_set_option_with_values("cudnn_conv_algo_search", ["DEFAULT", "EXHAUSTIVE", "HEURISTIC"])
|
||||
|
|
@ -553,6 +555,8 @@ class TestInferenceSession(unittest.TestCase):
|
|||
|
||||
test_get_and_set_option_with_values("tunable_op_max_tuning_duration_ms", ["-1", "1"])
|
||||
|
||||
test_get_and_set_option_with_values("enable_hip_graph", ["1", "0"])
|
||||
|
||||
run_rocm_options_test()
|
||||
|
||||
def test_invalid_set_providers(self):
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ void addObjectMethodsForLazyTensor(py::module& m);
|
|||
#endif
|
||||
bool InitArray();
|
||||
|
||||
bool GetDyanmicExecutionProviderHash(
|
||||
bool GetDynamicExecutionProviderHash(
|
||||
const std::string& ep_shared_lib_path,
|
||||
const ProviderOptions& provider_options,
|
||||
size_t& hash,
|
||||
|
|
@ -87,13 +87,7 @@ bool GetProviderInstanceHash(const std::string& type,
|
|||
if (auto* cuda_provider_info = TryGetProviderInfo_CUDA()) {
|
||||
const CUDAExecutionProviderInfo info = GetCudaExecutionProviderInfo(cuda_provider_info,
|
||||
provider_options_map);
|
||||
hash = static_cast<size_t>(info.device_id) ^
|
||||
info.gpu_mem_limit ^
|
||||
(static_cast<size_t>(info.arena_extend_strategy) << 16) ^
|
||||
(static_cast<size_t>(info.cudnn_conv_algo_search) << 18) ^
|
||||
(static_cast<size_t>(info.do_copy_in_default_stream) << 20) ^
|
||||
(static_cast<size_t>(info.has_user_compute_stream) << 22) ^
|
||||
std::hash<cuda::TunableOpInfo>{}(info.tunable_op);
|
||||
hash = std::hash<CUDAExecutionProviderInfo>{}(info);
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
|
@ -102,13 +96,7 @@ bool GetProviderInstanceHash(const std::string& type,
|
|||
if (auto* rocm_provider_info = TryGetProviderInfo_ROCM()) {
|
||||
const ROCMExecutionProviderInfo info = GetRocmExecutionProviderInfo(rocm_provider_info,
|
||||
provider_options_map);
|
||||
hash = static_cast<size_t>(info.device_id) ^
|
||||
info.gpu_mem_limit ^
|
||||
(static_cast<size_t>(info.arena_extend_strategy) << 16) ^
|
||||
(static_cast<size_t>(info.miopen_conv_exhaustive_search) << 18) ^
|
||||
(static_cast<size_t>(info.do_copy_in_default_stream) << 20) ^
|
||||
(static_cast<size_t>(info.has_user_compute_stream) << 22) ^
|
||||
std::hash<rocm::TunableOpInfo>{}(info.tunable_op);
|
||||
hash = std::hash<ROCMExecutionProviderInfo>{}(info);
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
|
@ -128,7 +116,7 @@ bool GetProviderInstanceHash(const std::string& type,
|
|||
provider_options.insert(option);
|
||||
}
|
||||
}
|
||||
return GetDyanmicExecutionProviderHash(shared_lib_path_it->second, provider_options, hash);
|
||||
return GetDynamicExecutionProviderHash(shared_lib_path_it->second, provider_options, hash);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue