Enabled rocm support for graph transformations (#7057)

This commit is contained in:
raviskolli 2021-03-22 09:02:10 -07:00 committed by GitHub
parent b2c6617b0f
commit 79ba045d74
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -144,27 +144,27 @@ std::vector<std::unique_ptr<GraphTransformer>> GenerateTransformers(TransformerL
transformers.emplace_back(onnxruntime::make_unique<DynamicQuantizeMatMulFusion>(cpu_execution_providers));
std::unordered_set<std::string> cpu_acl_execution_providers = {onnxruntime::kCpuExecutionProvider, onnxruntime::kAclExecutionProvider};
std::unordered_set<std::string> cpu_cuda_acl_armnn_execution_providers = {onnxruntime::kCpuExecutionProvider, onnxruntime::kCudaExecutionProvider, onnxruntime::kAclExecutionProvider, onnxruntime::kArmNNExecutionProvider};
std::unordered_set<std::string> cpu_cuda_rocm_acl_armnn_execution_providers = {onnxruntime::kCpuExecutionProvider, onnxruntime::kCudaExecutionProvider, onnxruntime::kRocmExecutionProvider, onnxruntime::kAclExecutionProvider, onnxruntime::kArmNNExecutionProvider};
transformers.emplace_back(onnxruntime::make_unique<ConvActivationFusion>(cpu_cuda_acl_armnn_execution_providers));
transformers.emplace_back(onnxruntime::make_unique<ConvActivationFusion>(cpu_cuda_rocm_acl_armnn_execution_providers));
const std::unordered_set<std::string> cuda_execution_providers = {onnxruntime::kCudaExecutionProvider};
const std::unordered_set<std::string> cpu_cuda_execution_providers = {onnxruntime::kCpuExecutionProvider, onnxruntime::kCudaExecutionProvider};
transformers.emplace_back(onnxruntime::make_unique<GeluFusion>(cpu_cuda_execution_providers));
transformers.emplace_back(onnxruntime::make_unique<LayerNormFusion>(cpu_cuda_execution_providers));
transformers.emplace_back(onnxruntime::make_unique<SimplifiedLayerNormFusion>(cpu_cuda_execution_providers));
transformers.emplace_back(onnxruntime::make_unique<AttentionFusion>(cpu_cuda_execution_providers));
transformers.emplace_back(onnxruntime::make_unique<EmbedLayerNormFusion>(cpu_cuda_execution_providers));
const std::unordered_set<std::string> cuda_rocm_execution_providers = {onnxruntime::kCudaExecutionProvider, onnxruntime::kRocmExecutionProvider};
const std::unordered_set<std::string> cpu_cuda_rocm_execution_providers = {onnxruntime::kCpuExecutionProvider, onnxruntime::kCudaExecutionProvider, onnxruntime::kRocmExecutionProvider};
transformers.emplace_back(onnxruntime::make_unique<GeluFusion>(cpu_cuda_rocm_execution_providers));
transformers.emplace_back(onnxruntime::make_unique<LayerNormFusion>(cpu_cuda_rocm_execution_providers));
transformers.emplace_back(onnxruntime::make_unique<SimplifiedLayerNormFusion>(cpu_cuda_rocm_execution_providers));
transformers.emplace_back(onnxruntime::make_unique<AttentionFusion>(cpu_cuda_rocm_execution_providers));
transformers.emplace_back(onnxruntime::make_unique<EmbedLayerNormFusion>(cpu_cuda_rocm_execution_providers));
transformers.emplace_back(onnxruntime::make_unique<BiasDropoutFusion>(cuda_execution_providers));
transformers.emplace_back(onnxruntime::make_unique<MatmulTransposeFusion>(cpu_cuda_execution_providers));
transformers.emplace_back(onnxruntime::make_unique<BiasGeluFusion>(cpu_cuda_execution_providers));
transformers.emplace_back(onnxruntime::make_unique<BiasSoftmaxFusion>(cpu_cuda_execution_providers));
transformers.emplace_back(onnxruntime::make_unique<SkipLayerNormFusion>(cpu_cuda_execution_providers));
transformers.emplace_back(onnxruntime::make_unique<BiasDropoutFusion>(cuda_rocm_execution_providers));
transformers.emplace_back(onnxruntime::make_unique<MatmulTransposeFusion>(cpu_cuda_rocm_execution_providers));
transformers.emplace_back(onnxruntime::make_unique<BiasGeluFusion>(cpu_cuda_rocm_execution_providers));
transformers.emplace_back(onnxruntime::make_unique<BiasSoftmaxFusion>(cpu_cuda_rocm_execution_providers));
transformers.emplace_back(onnxruntime::make_unique<SkipLayerNormFusion>(cpu_cuda_rocm_execution_providers));
transformers.emplace_back(onnxruntime::make_unique<FastGeluFusion>(cpu_cuda_execution_providers));
transformers.emplace_back(onnxruntime::make_unique<FastGeluFusion>(cpu_cuda_rocm_execution_providers));
transformers.emplace_back(onnxruntime::make_unique<MatMulScaleFusion>(cpu_cuda_execution_providers));
transformers.emplace_back(onnxruntime::make_unique<MatMulScaleFusion>(cpu_cuda_rocm_execution_providers));
#endif
} break;
@ -197,8 +197,8 @@ std::vector<std::unique_ptr<GraphTransformer>> GenerateTransformers(TransformerL
// These transformers could only be enabled by custom transformer list.
#ifndef DISABLE_CONTRIB_OPS
if (level == TransformerLevel::Level2) {
std::unordered_set<std::string> cuda_execution_providers = {onnxruntime::kCudaExecutionProvider};
transformers.emplace_back(onnxruntime::make_unique<GeluApproximation>(cuda_execution_providers));
std::unordered_set<std::string> cuda_rocm_execution_providers = {onnxruntime::kCudaExecutionProvider, onnxruntime::kRocmExecutionProvider};
transformers.emplace_back(onnxruntime::make_unique<GeluApproximation>(cuda_rocm_execution_providers));
}
#endif