From bf78e4d18b2f9a1ed6a91baa93f9d9d43578c832 Mon Sep 17 00:00:00 2001 From: Hariharan Seshadri Date: Tue, 21 Jul 2020 17:57:47 -0700 Subject: [PATCH] Handle cases which produce an empty output in the MatMul op family (#4573) --- .../contrib_ops/cpu/matmul_integer16.cc | 4 + .../quantization/dynamic_quantize_matmul.cc | 4 + .../contrib_ops/cpu/transpose_matmul.cc | 4 + onnxruntime/core/providers/cpu/math/matmul.cc | 4 + .../core/providers/cpu/math/matmul_integer.cc | 4 + .../cpu/math/quantize_linear_matmul.cc | 4 + .../core/providers/cuda/math/matmul.cc | 4 + .../providers/cuda/math/matmul_integer.cc | 4 + .../dynamic_quantize_matmul_test.cc | 7 ++ .../test/contrib_ops/matmul_integer16_test.cc | 14 ++- .../contrib_ops/transpose_matmul_op_test.cc | 7 ++ .../providers/cpu/math/matmul_integer_test.cc | 9 ++ .../test/providers/cpu/math/matmul_test.cc | 105 +++++++++--------- .../cpu/math/quantize_linear_matmul_test.cc | 38 +++++-- 14 files changed, 148 insertions(+), 64 deletions(-) diff --git a/onnxruntime/contrib_ops/cpu/matmul_integer16.cc b/onnxruntime/contrib_ops/cpu/matmul_integer16.cc index 7378cd5651..d94914cf41 100644 --- a/onnxruntime/contrib_ops/cpu/matmul_integer16.cc +++ b/onnxruntime/contrib_ops/cpu/matmul_integer16.cc @@ -28,6 +28,10 @@ Status MatMulInteger16::Compute(OpKernelContext* ctx) ORT_RETURN_IF_ERROR(helper.Compute(A->Shape(), B->Shape())); Tensor* Y = ctx->Output(0, helper.OutputShape()); + // Bail out early if the output is going to be empty + if (Y->Shape().Size() == 0) + return Status::OK(); + for (int i = 0; i < static_cast(helper.OutputOffsets().size()); i++) { EigenCastGEMM( A->template Data() + helper.LeftOffsets()[i], diff --git a/onnxruntime/contrib_ops/cpu/quantization/dynamic_quantize_matmul.cc b/onnxruntime/contrib_ops/cpu/quantization/dynamic_quantize_matmul.cc index 7a6b3ae936..b36115a66b 100644 --- a/onnxruntime/contrib_ops/cpu/quantization/dynamic_quantize_matmul.cc +++ b/onnxruntime/contrib_ops/cpu/quantization/dynamic_quantize_matmul.cc @@ -83,6 +83,10 @@ Status MatMulIntegerToFloatBase::ComputeCommon(OpKernelContext* ctx, ORT_RETURN_IF_ERROR(helper.Compute(a_shape, b->Shape())); Tensor* y = ctx->Output(0, helper.OutputShape()); + // Bail out early if the output is going to be empty + if (y->Shape().Size() == 0) + return Status::OK(); + const auto* b_data = static_cast(b->DataRaw()); const bool b_is_signed = b->IsDataType(); auto* y_data = y->template MutableData(); diff --git a/onnxruntime/contrib_ops/cpu/transpose_matmul.cc b/onnxruntime/contrib_ops/cpu/transpose_matmul.cc index 10b3e11588..fcf5d3b1c4 100644 --- a/onnxruntime/contrib_ops/cpu/transpose_matmul.cc +++ b/onnxruntime/contrib_ops/cpu/transpose_matmul.cc @@ -37,6 +37,10 @@ Status TransposeMatMul::Compute(OpKernelContext* context) const { Tensor* Y = context->Output(0, helper.OutputShape()); + // Bail out early if the output is going to be empty + if (Y->Shape().Size() == 0) + return Status::OK(); + const size_t num_offsets = helper.OutputOffsets().size(); for (size_t i = 0; i < num_offsets; ++i) { math::Gemm( diff --git a/onnxruntime/core/providers/cpu/math/matmul.cc b/onnxruntime/core/providers/cpu/math/matmul.cc index 6a66fd609f..66762b5b35 100644 --- a/onnxruntime/core/providers/cpu/math/matmul.cc +++ b/onnxruntime/core/providers/cpu/math/matmul.cc @@ -72,6 +72,10 @@ Status MatMul::Compute(OpKernelContext* ctx) const { ORT_RETURN_IF_ERROR(helper.Compute(a->Shape(), b->Shape())); Tensor* y = ctx->Output(0, helper.OutputShape()); + // Bail out early if the output is going to be empty + if (y->Shape().Size() == 0) + return Status::OK(); + // Using DataRaw as int32_t/uint32_t and int64_t/uint64_t share a common // operator body. const auto* a_data = reinterpret_cast(a->DataRaw()); diff --git a/onnxruntime/core/providers/cpu/math/matmul_integer.cc b/onnxruntime/core/providers/cpu/math/matmul_integer.cc index b1a23c317f..a36d3dd254 100644 --- a/onnxruntime/core/providers/cpu/math/matmul_integer.cc +++ b/onnxruntime/core/providers/cpu/math/matmul_integer.cc @@ -38,6 +38,10 @@ Status MatMulInteger::Compute(OpKernelContext* ctx) const { ORT_RETURN_IF_ERROR(helper.Compute(a->Shape(), b->Shape())); Tensor* y = ctx->Output(0, helper.OutputShape()); + // Bail out early if the output is going to be empty + if (y->Shape().Size() == 0) + return Status::OK(); + // validate zero points uint8_t a_offset = 0; uint8_t b_offset = 0; diff --git a/onnxruntime/core/providers/cpu/math/quantize_linear_matmul.cc b/onnxruntime/core/providers/cpu/math/quantize_linear_matmul.cc index 9d376f606c..faf71df08c 100644 --- a/onnxruntime/core/providers/cpu/math/quantize_linear_matmul.cc +++ b/onnxruntime/core/providers/cpu/math/quantize_linear_matmul.cc @@ -38,6 +38,10 @@ Status QLinearMatMul::Compute(OpKernelContext* ctx) const { ORT_RETURN_IF_ERROR(helper.Compute(a->Shape(), b->Shape())); Tensor* y = ctx->Output(0, helper.OutputShape()); + // Bail out early if the output is going to be empty + if (y->Shape().Size() == 0) + return Status::OK(); + // validate offsets const auto* a_offset = ctx->Input(2); const auto* b_offset = ctx->Input(5); diff --git a/onnxruntime/core/providers/cuda/math/matmul.cc b/onnxruntime/core/providers/cuda/math/matmul.cc index f32f1f52b4..bec6cdb364 100644 --- a/onnxruntime/core/providers/cuda/math/matmul.cc +++ b/onnxruntime/core/providers/cuda/math/matmul.cc @@ -92,6 +92,10 @@ Status MatMul::ComputeInternal(OpKernelContext* ctx) const { Tensor* Y = ctx->Output(0, helper.OutputShape()); + // Bail out early if the output is going to be empty + if (Y->Shape().Size() == 0) + return Status::OK(); + CudaT one = ToCudaType::FromFloat(1.0f); CudaT zero = ToCudaType::FromFloat(0.0f); diff --git a/onnxruntime/core/providers/cuda/math/matmul_integer.cc b/onnxruntime/core/providers/cuda/math/matmul_integer.cc index be0cb953af..5c7cb81320 100644 --- a/onnxruntime/core/providers/cuda/math/matmul_integer.cc +++ b/onnxruntime/core/providers/cuda/math/matmul_integer.cc @@ -36,6 +36,10 @@ Status MatMulInteger::ComputeInternal(OpKernelContext* ctx) cons ORT_RETURN_IF_ERROR(helper.Compute(a->Shape(), b->Shape())); Tensor* Y = ctx->Output(0, helper.OutputShape()); + // Bail out early if the output is going to be empty + if (Y->Shape().Size() == 0) + return Status::OK(); + const int8_t* a_ptr = a->template Data(); const int8_t* b_ptr = b->template Data(); int32_t* output_ptr = Y->template MutableData(); diff --git a/onnxruntime/test/contrib_ops/dynamic_quantize_matmul_test.cc b/onnxruntime/test/contrib_ops/dynamic_quantize_matmul_test.cc index dcc8fe7697..516f4c9f29 100644 --- a/onnxruntime/test/contrib_ops/dynamic_quantize_matmul_test.cc +++ b/onnxruntime/test/contrib_ops/dynamic_quantize_matmul_test.cc @@ -89,6 +89,13 @@ TEST(DynamicQuantizeMatMul, UInt8_test) { TestDynamicQuantizeMatMul(A_dims, B_dims, "testdata/dynamic_quantize_matmul_uint8.onnx"); } +TEST(DynamicQuantizeMatMul, UInt8_test_with_empty_input) { + std::vector A_dims{0, 128}; + std::vector B_dims{128, 128}; + std::vector Y_dims{0, 128}; + + TestDynamicQuantizeMatMul(A_dims, B_dims, "testdata/dynamic_quantize_matmul_uint8.onnx"); +} TEST(DynamicQuantizeMatMul, UInt8_test_bias) { std::vector A_dims{4, 128}; std::vector B_dims{128, 128}; diff --git a/onnxruntime/test/contrib_ops/matmul_integer16_test.cc b/onnxruntime/test/contrib_ops/matmul_integer16_test.cc index 6c4cc23960..a1032bc157 100644 --- a/onnxruntime/test/contrib_ops/matmul_integer16_test.cc +++ b/onnxruntime/test/contrib_ops/matmul_integer16_test.cc @@ -27,13 +27,21 @@ TEST(MatmulInteger16OpTest, MatMulInteger16_2) { test.Run(); } +TEST(MatmulInteger16OpTest, MatMulInteger16_Empty_input) { + OpTester test("MatMulInteger16", 1, onnxruntime::kMSDomain); + test.AddInput("T1", {0, 2}, {}); + test.AddInput("T2", {2, 1}, {-8, -11}); + test.AddOutput("T3", {0, 1}, {}); + test.Run(); +} TEST(MatmulInteger16OpTest, MatMulInteger16_3) { OpTester test("MatMulInteger16", 1, onnxruntime::kMSDomain); test.AddInput("T1", {3, 2}, {-7, 10, 10, -1113, 22, -356}); test.AddInput("T2", {2, 4}, {-8, -11, 13, 14, -99, 1234, 321, -6}); - test.AddOutput("T3", {3, 4}, {-934, 12417, 3119, -158, - 110107, -1373552, -357143, 6818, - 35068, -439546, -113990, 2444}); + test.AddOutput("T3", {3, 4}, + {-934, 12417, 3119, -158, + 110107, -1373552, -357143, 6818, + 35068, -439546, -113990, 2444}); test.Run(); } diff --git a/onnxruntime/test/contrib_ops/transpose_matmul_op_test.cc b/onnxruntime/test/contrib_ops/transpose_matmul_op_test.cc index d580b5f1d6..afe4f4f00d 100644 --- a/onnxruntime/test/contrib_ops/transpose_matmul_op_test.cc +++ b/onnxruntime/test/contrib_ops/transpose_matmul_op_test.cc @@ -84,6 +84,13 @@ std::vector> GenerateSimpleTestCases() { {1, 2, 4}, {20, 23, 26, 29, 56, 68, 80, 92}}); + test_cases.push_back( + {"test 2D with empty input", + {0, 3}, + {3, 4}, + {0, 4}, + {}}); + return test_cases; } diff --git a/onnxruntime/test/providers/cpu/math/matmul_integer_test.cc b/onnxruntime/test/providers/cpu/math/matmul_integer_test.cc index d5abeb5406..b54c04fe82 100644 --- a/onnxruntime/test/providers/cpu/math/matmul_integer_test.cc +++ b/onnxruntime/test/providers/cpu/math/matmul_integer_test.cc @@ -26,6 +26,15 @@ TEST(MatmulIntegerOpTest, MatMulInteger_2D) { test.Run(); } +TEST(MatmulIntegerOpTest, MatMulInteger_2D_empty_input) { + OpTester test("MatMulInteger", 10); + test.AddInput("T1", {0, 3}, {}); + test.AddInput("T2", {3, 2}, {1, 4, 2, 5, 3, 6}); + test.AddInput("a_zero_point", {}, {12}); + test.AddInput("b_zero_point", {}, {0}); + test.AddOutput("T3", {0, 2}, {}); + test.Run(); +} TEST(MatmulIntegerOpTest, MatMulInteger) { OpTester test("MatMulInteger", 10); test.AddInput("T1", {1, 1}, {11}); diff --git a/onnxruntime/test/providers/cpu/math/matmul_test.cc b/onnxruntime/test/providers/cpu/math/matmul_test.cc index 418dee24f0..0f813a63aa 100644 --- a/onnxruntime/test/providers/cpu/math/matmul_test.cc +++ b/onnxruntime/test/providers/cpu/math/matmul_test.cc @@ -17,79 +17,84 @@ struct MatMulTestData { }; template -std::vector> GenerateTestCases() -{ +std::vector> GenerateTestCases() { std::vector> test_cases; test_cases.push_back( - {"test padding and broadcast", - {3, 1, 1, 2}, - {2, 2, 2}, - {3, 2, 1, 2}, - {2, 3, 6, 7, 6, 11, 26, 31, 10, 19, 46, 55}}); + {"test padding and broadcast", + {3, 1, 1, 2}, + {2, 2, 2}, + {3, 2, 1, 2}, + {2, 3, 6, 7, 6, 11, 26, 31, 10, 19, 46, 55}}); test_cases.push_back( - {"test padding and broadcast", - {2, 3, 2}, - {3, 2, 2, 1}, - {3, 2, 3, 1}, - {1, 3, 5, 33, 43, 53, 5, 23, 41, 85, 111, 137, 9, 43, 77, 137, 179, 221}}); + {"test padding and broadcast", + {2, 3, 2}, + {3, 2, 2, 1}, + {3, 2, 3, 1}, + {1, 3, 5, 33, 43, 53, 5, 23, 41, 85, 111, 137, 9, 43, 77, 137, 179, 221}}); test_cases.push_back( - {"test left 1D", - {2}, - {3, 2, 1}, - {3, 1}, - {1, 3, 5}}); + {"test left 1D", + {2}, + {3, 2, 1}, + {3, 1}, + {1, 3, 5}}); test_cases.push_back( - {"test right 1D", - {3, 1, 2}, - {2}, - {3, 1}, - {1, 3, 5}}); + {"test right 1D", + {3, 1, 2}, + {2}, + {3, 1}, + {1, 3, 5}}); test_cases.push_back( - {"test scalar output", - {3}, - {3}, - {}, - {5}}); + {"test scalar output", + {3}, + {3}, + {}, + {5}}); test_cases.push_back( - {"test 2D", - {3, 4}, - {4, 3}, - {3, 3}, - {42, 48, 54, 114, 136, 158, 186, 224, 262}}); + {"test 2D", + {3, 4}, + {4, 3}, + {3, 3}, + {42, 48, 54, 114, 136, 158, 186, 224, 262}}); test_cases.push_back( - {"test 2D special", - {2, 2, 3}, - {3, 4}, - {2, 2, 4}, - {20, 23, 26, 29, 56, 68, 80, 92, 92, 113, 134, 155, 128, 158, 188, 218}}); + {"test 2D special", + {2, 2, 3}, + {3, 4}, + {2, 2, 4}, + {20, 23, 26, 29, 56, 68, 80, 92, 92, 113, 134, 155, 128, 158, 188, 218}}); test_cases.push_back( - {"test 2D special 2", - {2, 2, 3}, - {1, 3, 4}, - {2, 2, 4}, - {20, 23, 26, 29, 56, 68, 80, 92, 92, 113, 134, 155, 128, 158, 188, 218}}); + {"test 2D special 2", + {2, 2, 3}, + {1, 3, 4}, + {2, 2, 4}, + {20, 23, 26, 29, 56, 68, 80, 92, 92, 113, 134, 155, 128, 158, 188, 218}}); test_cases.push_back( - {"test 2D special 3", - {2, 6}, - {1, 1, 6, 1}, - {1, 1, 2, 1}, - {55, 145}}); + {"test 2D special 3", + {2, 6}, + {1, 1, 6, 1}, + {1, 1, 2, 1}, + {55, 145}}); + + test_cases.push_back( + {"test 2D empty input", + {3, 4}, + {4, 0}, + {3, 0}, + {}}); return test_cases; } template -void RunMatMulTest(int32_t opset_version = 7) -{ +void RunMatMulTest(int32_t opset_version = 7) { std::vector common_input_vals{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; for (auto t : GenerateTestCases()) { OpTester test("MatMul", opset_version); @@ -105,7 +110,7 @@ void RunMatMulTest(int32_t opset_version = 7) test.AddOutput("Y", t.expected_dims, t.expected_vals); // OpenVINO EP: Disabled temporarily matmul broadcasting not fully supported - test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider,kOpenVINOExecutionProvider});// Disable TensorRT because of unsupported data type + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider}); // Disable TensorRT because of unsupported data type } } diff --git a/onnxruntime/test/providers/cpu/math/quantize_linear_matmul_test.cc b/onnxruntime/test/providers/cpu/math/quantize_linear_matmul_test.cc index 32095a8a15..d6b17cf5e9 100644 --- a/onnxruntime/test/providers/cpu/math/quantize_linear_matmul_test.cc +++ b/onnxruntime/test/providers/cpu/math/quantize_linear_matmul_test.cc @@ -46,17 +46,33 @@ TEST(QuantizeLinearMatmulOpTest, QLinearMatMul3D) { } static void QLinearMatMul2DTest(bool only_t1_not_initializer) { - OpTester test("QLinearMatMul", 10); - test.AddInput("T1", {2, 4}, {208, 236, 0, 238, 3, 214, 255, 29}); - test.AddInput("a_scale", {1}, {0.0066f}, only_t1_not_initializer); - test.AddInput("a_zero_point", {1}, {113}, only_t1_not_initializer); - test.AddInput("T2", {4, 3}, {152, 51, 244, 60, 26, 255, 0, 127, 246, 127, 254, 247}, only_t1_not_initializer); - test.AddInput("b_scale", {1}, {0.00705f}, only_t1_not_initializer); - test.AddInput("b_zero_point", {1}, {114}, only_t1_not_initializer); - test.AddInput("y_scale", {1}, {0.0107f}, only_t1_not_initializer); - test.AddInput("y_zero_point", {1}, {118}, only_t1_not_initializer); - test.AddOutput("T3", {2, 3}, {168, 115, 255, 1, 66, 151}); - test.Run(); + // Test non-empty inputs + OpTester test_non_empty("QLinearMatMul", 10); + test_non_empty.AddInput("T1", {2, 4}, {208, 236, 0, 238, 3, 214, 255, 29}); + test_non_empty.AddInput("a_scale", {1}, {0.0066f}, only_t1_not_initializer); + test_non_empty.AddInput("a_zero_point", {1}, {113}, only_t1_not_initializer); + test_non_empty.AddInput("T2", {4, 3}, {152, 51, 244, 60, 26, 255, 0, 127, 246, 127, 254, 247}, only_t1_not_initializer); + test_non_empty.AddInput("b_scale", {1}, {0.00705f}, only_t1_not_initializer); + test_non_empty.AddInput("b_zero_point", {1}, {114}, only_t1_not_initializer); + test_non_empty.AddInput("y_scale", {1}, {0.0107f}, only_t1_not_initializer); + test_non_empty.AddInput("y_zero_point", {1}, {118}, only_t1_not_initializer); + test_non_empty.AddOutput("T3", {2, 3}, {168, 115, 255, 1, 66, 151}); + test_non_empty.Run(); + + // Test with an empty input + OpTester test_empty("QLinearMatMul", 10); + test_empty.AddInput("T1", {0, 4}, {}); + test_empty.AddInput("a_scale", {1}, {0.0066f}, only_t1_not_initializer); + test_empty.AddInput("a_zero_point", {1}, {113}, only_t1_not_initializer); + test_empty.AddInput("T2", {4, 3}, {152, 51, 244, 60, 26, 255, 0, 127, 246, 127, 254, 247}, only_t1_not_initializer); + test_empty.AddInput("b_scale", {1}, {0.00705f}, only_t1_not_initializer); + test_empty.AddInput("b_zero_point", {1}, {114}, only_t1_not_initializer); + test_empty.AddInput("y_scale", {1}, {0.0107f}, only_t1_not_initializer); + test_empty.AddInput("y_zero_point", {1}, {118}, only_t1_not_initializer); + test_empty.AddOutput("T3", {0, 3}, {}); + + // Skip NNAPI as it doesn't support empty output for now + test_empty.Run(OpTester::ExpectResult::kExpectSuccess, "", {kNnapiExecutionProvider}); } TEST(QuantizeLinearMatmulOpTest, QLinearMatMul) {