From f22f04a109b4e0ffa592f50a49f0f2b4569e4cfa Mon Sep 17 00:00:00 2001 From: jingyanwangms <47403504+jingyanwangms@users.noreply.github.com> Date: Tue, 2 Mar 2021 18:54:25 -0800 Subject: [PATCH] Add comment (#6860) Co-authored-by: Jingyan Wang --- .../contrib_ops/cpu/cpu_contrib_kernels.cc | 4 ++-- onnxruntime/contrib_ops/cpu/fused_matmul.cc | 1 + .../contrib_ops/cuda/cuda_contrib_kernels.cc | 17 +++++++++-------- .../contrib_ops/cuda/math/fused_matmul.cc | 1 + .../contrib_ops/rocm/rocm_contrib_kernels.cc | 6 ------ .../core/optimizer/matmul_scale_fusion.cc | 5 ++--- .../core/optimizer/matmul_transpose_fusion.cc | 5 ++--- onnxruntime/core/providers/cpu/math/matmul.h | 2 +- .../test/contrib_ops/fused_matmul_op_test.cc | 14 -------------- .../operator_type_usage_processors.py | 2 +- 10 files changed, 19 insertions(+), 38 deletions(-) diff --git a/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc b/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc index abe425a09f..534d9c3243 100644 --- a/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc +++ b/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc @@ -20,7 +20,7 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, Range); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, WordConvEmbedding); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, GatherND); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, TransposeMatMul); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, TransposeMatMul); // backward compatibility class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, FusedMatMul); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MurmurHash3); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, MaxpoolWithMask); @@ -178,7 +178,7 @@ Status RegisterCpuContribKernels(KernelRegistry& kernel_registry) { BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, // backward compatibility BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, diff --git a/onnxruntime/contrib_ops/cpu/fused_matmul.cc b/onnxruntime/contrib_ops/cpu/fused_matmul.cc index 8be8d4fbc5..e1d9c15947 100644 --- a/onnxruntime/contrib_ops/cpu/fused_matmul.cc +++ b/onnxruntime/contrib_ops/cpu/fused_matmul.cc @@ -6,6 +6,7 @@ namespace onnxruntime { namespace contrib { +// TransposedMatMul is kept for backward compatibility ONNX_OPERATOR_KERNEL_EX( TransposeMatMul, kMSDomain, diff --git a/onnxruntime/contrib_ops/cuda/cuda_contrib_kernels.cc b/onnxruntime/contrib_ops/cuda/cuda_contrib_kernels.cc index a2cb327943..1f88aac942 100644 --- a/onnxruntime/contrib_ops/cuda/cuda_contrib_kernels.cc +++ b/onnxruntime/contrib_ops/cuda/cuda_contrib_kernels.cc @@ -17,9 +17,9 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, BiasGelu); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, double, BiasGelu); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, BiasGelu); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, TransposeMatMul); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, double, TransposeMatMul); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, TransposeMatMul); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, TransposeMatMul); // backward compatibility +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, double, TransposeMatMul); // backward compatibility +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, TransposeMatMul); // backward compatibility class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, FusedMatMul); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, double, FusedMatMul); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, FusedMatMul); @@ -83,7 +83,7 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1 #if defined(CUDA_VERSION) && CUDA_VERSION >= 11000 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, BFloat16, FastGelu); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, BFloat16, TransposeMatMul); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, BFloat16, TransposeMatMul); // backward compatibility class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, BFloat16, FusedMatMul); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, BFloat16_float, LayerNormalization); #endif @@ -105,9 +105,9 @@ Status RegisterCudaContribKernels(KernelRegistry& kernel_registry) { BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, // backward compatibility + BuildKernelCreateInfo, // backward compatibility + BuildKernelCreateInfo, // backward compatibility BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, @@ -171,7 +171,8 @@ Status RegisterCudaContribKernels(KernelRegistry& kernel_registry) { #if defined(CUDA_VERSION) && CUDA_VERSION >= 11000 BuildKernelCreateInfo, - BuildKernelCreateInfo, + // TransposedMatMul is still here for backward compatibility + BuildKernelCreateInfo, // backward compatibility BuildKernelCreateInfo, BuildKernelCreateInfo, #endif diff --git a/onnxruntime/contrib_ops/cuda/math/fused_matmul.cc b/onnxruntime/contrib_ops/cuda/math/fused_matmul.cc index a39a6ee3b5..d9edac84eb 100644 --- a/onnxruntime/contrib_ops/cuda/math/fused_matmul.cc +++ b/onnxruntime/contrib_ops/cuda/math/fused_matmul.cc @@ -18,6 +18,7 @@ namespace cuda { .TypeConstraint("T", DataTypeImpl::GetTensorType()), \ onnxruntime::cuda::MatMul); +// TransposeMatMul is kept here for backward compatibility REGISTER_KERNEL_TYPED(TransposeMatMul, float) REGISTER_KERNEL_TYPED(TransposeMatMul, double) REGISTER_KERNEL_TYPED(TransposeMatMul, MLFloat16) diff --git a/onnxruntime/contrib_ops/rocm/rocm_contrib_kernels.cc b/onnxruntime/contrib_ops/rocm/rocm_contrib_kernels.cc index 300308d4dc..71fa8b3278 100644 --- a/onnxruntime/contrib_ops/rocm/rocm_contrib_kernels.cc +++ b/onnxruntime/contrib_ops/rocm/rocm_contrib_kernels.cc @@ -17,9 +17,6 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, BiasGelu); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, double, BiasGelu); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, BiasGelu); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, TransposeMatMul); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, double, TransposeMatMul); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, TransposeMatMul); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, FusedMatMul); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, double, FusedMatMul); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, FusedMatMul); @@ -92,9 +89,6 @@ Status RegisterRocmContribKernels(KernelRegistry& kernel_registry) { BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, diff --git a/onnxruntime/core/optimizer/matmul_scale_fusion.cc b/onnxruntime/core/optimizer/matmul_scale_fusion.cc index 07f695659a..697bec53cb 100644 --- a/onnxruntime/core/optimizer/matmul_scale_fusion.cc +++ b/onnxruntime/core/optimizer/matmul_scale_fusion.cc @@ -187,8 +187,7 @@ Status ProcessNode( } if (!graph_utils::IsSupportedOptypeVersionAndDomain(node, "MatMul", {9, 13}) && - !graph_utils::IsSupportedOptypeVersionAndDomain(node, "FusedMatMul", {1}, kMSDomain) && - !graph_utils::IsSupportedOptypeVersionAndDomain(node, "TransposeMatMul", {1}, kMSDomain)) { + !graph_utils::IsSupportedOptypeVersionAndDomain(node, "FusedMatMul", {1}, kMSDomain)) { return Status::OK(); } @@ -206,7 +205,7 @@ Status ProcessNode( } NodeAttributes fused_node_attrs = - (node.OpType() == "TransposeMatMul") || (node.OpType() == "FusedMatMul") ? node.GetAttributes() : NodeAttributes{}; + node.OpType() == "FusedMatMul" ? node.GetAttributes() : NodeAttributes{}; { ONNX_NAMESPACE::AttributeProto& alpha_attr = fused_node_attrs["alpha"]; diff --git a/onnxruntime/core/optimizer/matmul_transpose_fusion.cc b/onnxruntime/core/optimizer/matmul_transpose_fusion.cc index 31672b1f12..acd43b1df3 100644 --- a/onnxruntime/core/optimizer/matmul_transpose_fusion.cc +++ b/onnxruntime/core/optimizer/matmul_transpose_fusion.cc @@ -98,8 +98,7 @@ Status MatmulTransposeFusion::ApplyImpl(Graph& graph, bool& modified, int graph_ ORT_RETURN_IF_ERROR(Recurse(node, modified, graph_level, logger)); if ((!graph_utils::IsSupportedOptypeVersionAndDomain(node, "MatMul", {9, 13}) && - !graph_utils::IsSupportedOptypeVersionAndDomain(node, "FusedMatMul", {1}, kMSDomain) && - !graph_utils::IsSupportedOptypeVersionAndDomain(node, "TransposeMatMul", {1}, kMSDomain)) || + !graph_utils::IsSupportedOptypeVersionAndDomain(node, "FusedMatMul", {1}, kMSDomain)) || !graph_utils::IsSupportedProvider(node, GetCompatibleExecutionProviders())) { continue; } @@ -139,7 +138,7 @@ Status MatmulTransposeFusion::ApplyImpl(Graph& graph, bool& modified, int graph_ bool transpose_left = (left != nullptr); bool transpose_right = (right != nullptr); float alpha = 1.0f; - if ((node.OpType() == "TransposeMatMul") || (node.OpType() == "FusedMatMul")) { + if (node.OpType() == "FusedMatMul") { transpose_left ^= static_cast(node.GetAttributes().at("transA").i()); transpose_right ^= static_cast(node.GetAttributes().at("transB").i()); alpha = node.GetAttributes().at("alpha").f(); diff --git a/onnxruntime/core/providers/cpu/math/matmul.h b/onnxruntime/core/providers/cpu/math/matmul.h index 9f75db3905..2bf12c666c 100644 --- a/onnxruntime/core/providers/cpu/math/matmul.h +++ b/onnxruntime/core/providers/cpu/math/matmul.h @@ -32,7 +32,7 @@ class MatMul final : public OpKernel { TensorShape b_shape_; BufferUniquePtr packed_b_; - // For FusedMatMul and TransposeMatMul contrib ops + // For FusedMatMul contrib ops float alpha_attr_; int64_t trans_a_attr_; int64_t trans_b_attr_; diff --git a/onnxruntime/test/contrib_ops/fused_matmul_op_test.cc b/onnxruntime/test/contrib_ops/fused_matmul_op_test.cc index a571fad9c2..761104cfa1 100644 --- a/onnxruntime/test/contrib_ops/fused_matmul_op_test.cc +++ b/onnxruntime/test/contrib_ops/fused_matmul_op_test.cc @@ -170,9 +170,6 @@ TEST(FusedMatMulOpTest, FloatTypeNoTranspose) { } #if defined(USE_CUDA) || defined(USE_ROCM) // double support only implemented in CUDA/ROCM kernel -TEST(TransposeMatMulOpTest, DoubleTypeNoTranspose) { - RunFusedMatMulTest("TransposeMatMul", 1); -} TEST(FusedMatMulOpTest, DoubleTypeNoTranspose) { RunFusedMatMulTest("FusedMatMul", 1); @@ -207,17 +204,6 @@ TEST(FusedMatMulOpTest, FloatTypeScale) { RunFusedMatMulTest("FusedMatMul", 1, true, true, 4.0f, true); } -TEST(TransposeMatMulOpTest, FloatTypeScale) { - RunFusedMatMulTest("TransposeMatMul", 1, false, false, 0.5f); - RunFusedMatMulTest("TransposeMatMul", 1, true, false, 2.0f); - RunFusedMatMulTest("TransposeMatMul", 1, true, true, 4.0f); - - // now run tests with b constant. - RunFusedMatMulTest("TransposeMatMul", 1, false, false, 0.5f, true); - RunFusedMatMulTest("TransposeMatMul", 1, true, false, 2.0f, true); - RunFusedMatMulTest("TransposeMatMul", 1, true, true, 4.0f, true); -} - } // namespace transpose_matmul } // namespace test } // namespace onnxruntime diff --git a/tools/python/util/ort_format_model/operator_type_usage_processors.py b/tools/python/util/ort_format_model/operator_type_usage_processors.py index 716fd4c765..ff9b6d7c3c 100644 --- a/tools/python/util/ort_format_model/operator_type_usage_processors.py +++ b/tools/python/util/ort_format_model/operator_type_usage_processors.py @@ -263,7 +263,7 @@ def _create_operator_type_usage_processors(): # ai.onnx: If, Loop, Reshape, Scan, Shape, Squeeze, Unsqueeze # com.microsoft: DynamicQuantizeMatMul, MatMulIntegerToFloat # - Only one type supported in the ORT implementation: - # com.microsoft: FusedConv, FusedGemm, FusedMatMul, TransposeMatMul + # com.microsoft: FusedConv, FusedGemm, FusedMatMul # - Implementation does not have any significant type specific code: # ai.onnx: Concat, Flatten, Not, QLinearConv, Reshape, Shape, Squeeze, Unsqueeze #