Merge remote-tracking branch 'upstream/master' into DmlDev

2026-06-30 03:37:44 +00:00 · 2020-07-22 02:24:48 +00:00 · 2020-07-22 02:24:48 +00:00 · 42d0ad8fec
commit 42d0ad8fec
parent 8829d60f94 bf78e4d18b
14 changed files with 148 additions and 64 deletions
--- a/onnxruntime/contrib_ops/cpu/matmul_integer16.cc
+++ b/onnxruntime/contrib_ops/cpu/matmul_integer16.cc
@ -28,6 +28,10 @@ Status MatMulInteger16<int16_t, int16_t, int32_t>::Compute(OpKernelContext* ctx)
  ORT_RETURN_IF_ERROR(helper.Compute(A->Shape(), B->Shape()));
  Tensor* Y = ctx->Output(0, helper.OutputShape());

+  // Bail out early if the output is going to be empty
+  if (Y->Shape().Size() == 0)
+    return Status::OK();
+
  for (int i = 0; i < static_cast<int>(helper.OutputOffsets().size()); i++) {
    EigenCastGEMM<int16_t, int16_t, int32_t>(
        A->template Data<int16_t>() + helper.LeftOffsets()[i],
--- a/onnxruntime/contrib_ops/cpu/quantization/dynamic_quantize_matmul.cc
+++ b/onnxruntime/contrib_ops/cpu/quantization/dynamic_quantize_matmul.cc
@ -83,6 +83,10 @@ Status MatMulIntegerToFloatBase::ComputeCommon(OpKernelContext* ctx,
  ORT_RETURN_IF_ERROR(helper.Compute(a_shape, b->Shape()));
  Tensor* y = ctx->Output(0, helper.OutputShape());

+  // Bail out early if the output is going to be empty
+  if (y->Shape().Size() == 0)
+    return Status::OK();
+
  const auto* b_data = static_cast<const uint8_t*>(b->DataRaw());
  const bool b_is_signed = b->IsDataType<int8_t>();
  auto* y_data = y->template MutableData<float>();
--- a/onnxruntime/contrib_ops/cpu/transpose_matmul.cc
+++ b/onnxruntime/contrib_ops/cpu/transpose_matmul.cc
@ -37,6 +37,10 @@ Status TransposeMatMul::Compute(OpKernelContext* context) const {

  Tensor* Y = context->Output(0, helper.OutputShape());

+  // Bail out early if the output is going to be empty
+  if (Y->Shape().Size() == 0)
+    return Status::OK();
+
  const size_t num_offsets = helper.OutputOffsets().size();
  for (size_t i = 0; i < num_offsets; ++i) {
    math::Gemm<float, concurrency::ThreadPool>(
--- a/onnxruntime/core/providers/cpu/math/matmul.cc
+++ b/onnxruntime/core/providers/cpu/math/matmul.cc
@ -72,6 +72,10 @@ Status MatMul<T>::Compute(OpKernelContext* ctx) const {
  ORT_RETURN_IF_ERROR(helper.Compute(a->Shape(), b->Shape()));
  Tensor* y = ctx->Output(0, helper.OutputShape());

+  // Bail out early if the output is going to be empty
+  if (y->Shape().Size() == 0)
+    return Status::OK();
+
  // Using DataRaw as int32_t/uint32_t and int64_t/uint64_t share a common
  // operator body.
  const auto* a_data = reinterpret_cast<const T*>(a->DataRaw());
--- a/onnxruntime/core/providers/cpu/math/matmul_integer.cc
+++ b/onnxruntime/core/providers/cpu/math/matmul_integer.cc
@ -38,6 +38,10 @@ Status MatMulInteger::Compute(OpKernelContext* ctx) const {
  ORT_RETURN_IF_ERROR(helper.Compute(a->Shape(), b->Shape()));
  Tensor* y = ctx->Output(0, helper.OutputShape());

+  // Bail out early if the output is going to be empty
+  if (y->Shape().Size() == 0)
+    return Status::OK();
+
  // validate zero points
  uint8_t a_offset = 0;
  uint8_t b_offset = 0;
--- a/onnxruntime/core/providers/cpu/math/quantize_linear_matmul.cc
+++ b/onnxruntime/core/providers/cpu/math/quantize_linear_matmul.cc
@ -38,6 +38,10 @@ Status QLinearMatMul::Compute(OpKernelContext* ctx) const {
  ORT_RETURN_IF_ERROR(helper.Compute(a->Shape(), b->Shape()));
  Tensor* y = ctx->Output(0, helper.OutputShape());

+  // Bail out early if the output is going to be empty
+  if (y->Shape().Size() == 0)
+    return Status::OK();
+
  // validate offsets
  const auto* a_offset = ctx->Input<Tensor>(2);
  const auto* b_offset = ctx->Input<Tensor>(5);
--- a/onnxruntime/core/providers/cuda/math/matmul.cc
+++ b/onnxruntime/core/providers/cuda/math/matmul.cc
@ -92,6 +92,10 @@ Status MatMul<T>::ComputeInternal(OpKernelContext* ctx) const {

  Tensor* Y = ctx->Output(0, helper.OutputShape());

+  // Bail out early if the output is going to be empty
+  if (Y->Shape().Size() == 0)
+    return Status::OK();
+
  CudaT one = ToCudaType<T>::FromFloat(1.0f);
  CudaT zero = ToCudaType<T>::FromFloat(0.0f);

--- a/onnxruntime/core/providers/cuda/math/matmul_integer.cc
+++ b/onnxruntime/core/providers/cuda/math/matmul_integer.cc
@ -36,6 +36,10 @@ Status MatMulInteger<int8_t, int8_t>::ComputeInternal(OpKernelContext* ctx) cons
  ORT_RETURN_IF_ERROR(helper.Compute(a->Shape(), b->Shape()));
  Tensor* Y = ctx->Output(0, helper.OutputShape());

+  // Bail out early if the output is going to be empty
+  if (Y->Shape().Size() == 0)
+    return Status::OK();
+
  const int8_t* a_ptr = a->template Data<int8_t>();
  const int8_t* b_ptr = b->template Data<int8_t>();
  int32_t* output_ptr = Y->template MutableData<int32_t>();
--- a/onnxruntime/test/contrib_ops/dynamic_quantize_matmul_test.cc
+++ b/onnxruntime/test/contrib_ops/dynamic_quantize_matmul_test.cc
@ -89,6 +89,13 @@ TEST(DynamicQuantizeMatMul, UInt8_test) {
  TestDynamicQuantizeMatMul<uint8_t>(A_dims, B_dims, "testdata/dynamic_quantize_matmul_uint8.onnx");
 }

+TEST(DynamicQuantizeMatMul, UInt8_test_with_empty_input) {
+  std::vector<int64_t> A_dims{0, 128};
+  std::vector<int64_t> B_dims{128, 128};
+  std::vector<int64_t> Y_dims{0, 128};
+
+  TestDynamicQuantizeMatMul<uint8_t>(A_dims, B_dims, "testdata/dynamic_quantize_matmul_uint8.onnx");
+}
 TEST(DynamicQuantizeMatMul, UInt8_test_bias) {
  std::vector<int64_t> A_dims{4, 128};
  std::vector<int64_t> B_dims{128, 128};
--- a/onnxruntime/test/contrib_ops/matmul_integer16_test.cc
+++ b/onnxruntime/test/contrib_ops/matmul_integer16_test.cc
@ -27,13 +27,21 @@ TEST(MatmulInteger16OpTest, MatMulInteger16_2) {
  test.Run();
 }

+TEST(MatmulInteger16OpTest, MatMulInteger16_Empty_input) {
+  OpTester test("MatMulInteger16", 1, onnxruntime::kMSDomain);
+  test.AddInput<int16_t>("T1", {0, 2}, {});
+  test.AddInput<int16_t>("T2", {2, 1}, {-8, -11});
+  test.AddOutput<int32_t>("T3", {0, 1}, {});
+  test.Run();
+}
 TEST(MatmulInteger16OpTest, MatMulInteger16_3) {
  OpTester test("MatMulInteger16", 1, onnxruntime::kMSDomain);
  test.AddInput<int16_t>("T1", {3, 2}, {-7, 10, 10, -1113, 22, -356});
  test.AddInput<int16_t>("T2", {2, 4}, {-8, -11, 13, 14, -99, 1234, 321, -6});
-  test.AddOutput<int32_t>("T3", {3, 4}, {-934, 12417, 3119, -158,
-                                         110107, -1373552, -357143, 6818,
-                                         35068, -439546, -113990, 2444});
+  test.AddOutput<int32_t>("T3", {3, 4},
+                          {-934, 12417, 3119, -158,
+                           110107, -1373552, -357143, 6818,
+                           35068, -439546, -113990, 2444});
  test.Run();
 }

--- a/onnxruntime/test/contrib_ops/transpose_matmul_op_test.cc
+++ b/onnxruntime/test/contrib_ops/transpose_matmul_op_test.cc
@ -84,6 +84,13 @@ std::vector<MatMulTestData<T>> GenerateSimpleTestCases() {
       {1, 2, 4},
       {20, 23, 26, 29, 56, 68, 80, 92}});

+  test_cases.push_back(
+      {"test 2D with empty input",
+       {0, 3},
+       {3, 4},
+       {0, 4},
+       {}});
+
  return test_cases;
 }

--- a/onnxruntime/test/providers/cpu/math/matmul_integer_test.cc
+++ b/onnxruntime/test/providers/cpu/math/matmul_integer_test.cc
@ -26,6 +26,15 @@ TEST(MatmulIntegerOpTest, MatMulInteger_2D) {
  test.Run();
 }

+TEST(MatmulIntegerOpTest, MatMulInteger_2D_empty_input) {
+  OpTester test("MatMulInteger", 10);
+  test.AddInput<uint8_t>("T1", {0, 3}, {});
+  test.AddInput<uint8_t>("T2", {3, 2}, {1, 4, 2, 5, 3, 6});
+  test.AddInput<uint8_t>("a_zero_point", {}, {12});
+  test.AddInput<uint8_t>("b_zero_point", {}, {0});
+  test.AddOutput<int32_t>("T3", {0, 2}, {});
+  test.Run();
+}
 TEST(MatmulIntegerOpTest, MatMulInteger) {
  OpTester test("MatMulInteger", 10);
  test.AddInput<uint8_t>("T1", {1, 1}, {11});
--- a/onnxruntime/test/providers/cpu/math/matmul_test.cc
+++ b/onnxruntime/test/providers/cpu/math/matmul_test.cc
@ -17,79 +17,84 @@ struct MatMulTestData {
 };

 template <typename T>
-std::vector<MatMulTestData<T>> GenerateTestCases()
-{
+std::vector<MatMulTestData<T>> GenerateTestCases() {
  std::vector<MatMulTestData<T>> test_cases;

  test_cases.push_back(
-    {"test padding and broadcast",
-    {3, 1, 1, 2},
-    {2, 2, 2},
-    {3, 2, 1, 2},
-    {2, 3, 6, 7, 6, 11, 26, 31, 10, 19, 46, 55}});
+      {"test padding and broadcast",
+       {3, 1, 1, 2},
+       {2, 2, 2},
+       {3, 2, 1, 2},
+       {2, 3, 6, 7, 6, 11, 26, 31, 10, 19, 46, 55}});

  test_cases.push_back(
-    {"test padding and broadcast",
-    {2, 3, 2},
-    {3, 2, 2, 1},
-    {3, 2, 3, 1},
-    {1, 3, 5, 33, 43, 53, 5, 23, 41, 85, 111, 137, 9, 43, 77, 137, 179, 221}});
+      {"test padding and broadcast",
+       {2, 3, 2},
+       {3, 2, 2, 1},
+       {3, 2, 3, 1},
+       {1, 3, 5, 33, 43, 53, 5, 23, 41, 85, 111, 137, 9, 43, 77, 137, 179, 221}});

  test_cases.push_back(
-    {"test left 1D",
-    {2},
-    {3, 2, 1},
-    {3, 1},
-    {1, 3, 5}});
+      {"test left 1D",
+       {2},
+       {3, 2, 1},
+       {3, 1},
+       {1, 3, 5}});

  test_cases.push_back(
-    {"test right 1D",
-    {3, 1, 2},
-    {2},
-    {3, 1},
-    {1, 3, 5}});
+      {"test right 1D",
+       {3, 1, 2},
+       {2},
+       {3, 1},
+       {1, 3, 5}});

  test_cases.push_back(
-    {"test scalar output",
-    {3},
-    {3},
-    {},
-    {5}});
+      {"test scalar output",
+       {3},
+       {3},
+       {},
+       {5}});

  test_cases.push_back(
-    {"test 2D",
-    {3, 4},
-    {4, 3},
-    {3, 3},
-    {42, 48, 54, 114, 136, 158, 186, 224, 262}});
+      {"test 2D",
+       {3, 4},
+       {4, 3},
+       {3, 3},
+       {42, 48, 54, 114, 136, 158, 186, 224, 262}});

  test_cases.push_back(
-    {"test 2D special",
-    {2, 2, 3},
-    {3, 4},
-    {2, 2, 4},
-    {20, 23, 26, 29, 56, 68, 80, 92, 92, 113, 134, 155, 128, 158, 188, 218}});
+      {"test 2D special",
+       {2, 2, 3},
+       {3, 4},
+       {2, 2, 4},
+       {20, 23, 26, 29, 56, 68, 80, 92, 92, 113, 134, 155, 128, 158, 188, 218}});

  test_cases.push_back(
-    {"test 2D special 2",
-    {2, 2, 3},
-    {1, 3, 4},
-    {2, 2, 4},
-    {20, 23, 26, 29, 56, 68, 80, 92, 92, 113, 134, 155, 128, 158, 188, 218}});
+      {"test 2D special 2",
+       {2, 2, 3},
+       {1, 3, 4},
+       {2, 2, 4},
+       {20, 23, 26, 29, 56, 68, 80, 92, 92, 113, 134, 155, 128, 158, 188, 218}});

  test_cases.push_back(
-    {"test 2D special 3",
-    {2, 6},
-    {1, 1, 6, 1},
-    {1, 1, 2, 1},
-    {55, 145}});
+      {"test 2D special 3",
+       {2, 6},
+       {1, 1, 6, 1},
+       {1, 1, 2, 1},
+       {55, 145}});
+
+  test_cases.push_back(
+      {"test 2D empty input",
+       {3, 4},
+       {4, 0},
+       {3, 0},
+       {}});

  return test_cases;
 }

 template <typename T>
-void RunMatMulTest(int32_t opset_version = 7)
-{
+void RunMatMulTest(int32_t opset_version = 7) {
  std::vector<T> common_input_vals{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
  for (auto t : GenerateTestCases<T>()) {
    OpTester test("MatMul", opset_version);
@ -105,7 +110,7 @@ void RunMatMulTest(int32_t opset_version = 7)
    test.AddOutput<T>("Y", t.expected_dims, t.expected_vals);

    // OpenVINO EP: Disabled temporarily matmul broadcasting not fully supported
-    test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider,kOpenVINOExecutionProvider});// Disable TensorRT because of unsupported data type
+    test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider});  // Disable TensorRT because of unsupported data type
  }
 }

--- a/onnxruntime/test/providers/cpu/math/quantize_linear_matmul_test.cc
+++ b/onnxruntime/test/providers/cpu/math/quantize_linear_matmul_test.cc
@ -46,17 +46,33 @@ TEST(QuantizeLinearMatmulOpTest, QLinearMatMul3D) {
 }

 static void QLinearMatMul2DTest(bool only_t1_not_initializer) {
-  OpTester test("QLinearMatMul", 10);
-  test.AddInput<uint8_t>("T1", {2, 4}, {208, 236, 0, 238, 3, 214, 255, 29});
-  test.AddInput<float>("a_scale", {1}, {0.0066f}, only_t1_not_initializer);
-  test.AddInput<uint8_t>("a_zero_point", {1}, {113}, only_t1_not_initializer);
-  test.AddInput<uint8_t>("T2", {4, 3}, {152, 51, 244, 60, 26, 255, 0, 127, 246, 127, 254, 247}, only_t1_not_initializer);
-  test.AddInput<float>("b_scale", {1}, {0.00705f}, only_t1_not_initializer);
-  test.AddInput<uint8_t>("b_zero_point", {1}, {114}, only_t1_not_initializer);
-  test.AddInput<float>("y_scale", {1}, {0.0107f}, only_t1_not_initializer);
-  test.AddInput<uint8_t>("y_zero_point", {1}, {118}, only_t1_not_initializer);
-  test.AddOutput<uint8_t>("T3", {2, 3}, {168, 115, 255, 1, 66, 151});
-  test.Run();
+  // Test non-empty inputs
+  OpTester test_non_empty("QLinearMatMul", 10);
+  test_non_empty.AddInput<uint8_t>("T1", {2, 4}, {208, 236, 0, 238, 3, 214, 255, 29});
+  test_non_empty.AddInput<float>("a_scale", {1}, {0.0066f}, only_t1_not_initializer);
+  test_non_empty.AddInput<uint8_t>("a_zero_point", {1}, {113}, only_t1_not_initializer);
+  test_non_empty.AddInput<uint8_t>("T2", {4, 3}, {152, 51, 244, 60, 26, 255, 0, 127, 246, 127, 254, 247}, only_t1_not_initializer);
+  test_non_empty.AddInput<float>("b_scale", {1}, {0.00705f}, only_t1_not_initializer);
+  test_non_empty.AddInput<uint8_t>("b_zero_point", {1}, {114}, only_t1_not_initializer);
+  test_non_empty.AddInput<float>("y_scale", {1}, {0.0107f}, only_t1_not_initializer);
+  test_non_empty.AddInput<uint8_t>("y_zero_point", {1}, {118}, only_t1_not_initializer);
+  test_non_empty.AddOutput<uint8_t>("T3", {2, 3}, {168, 115, 255, 1, 66, 151});
+  test_non_empty.Run();
+
+  // Test with an empty input
+  OpTester test_empty("QLinearMatMul", 10);
+  test_empty.AddInput<uint8_t>("T1", {0, 4}, {});
+  test_empty.AddInput<float>("a_scale", {1}, {0.0066f}, only_t1_not_initializer);
+  test_empty.AddInput<uint8_t>("a_zero_point", {1}, {113}, only_t1_not_initializer);
+  test_empty.AddInput<uint8_t>("T2", {4, 3}, {152, 51, 244, 60, 26, 255, 0, 127, 246, 127, 254, 247}, only_t1_not_initializer);
+  test_empty.AddInput<float>("b_scale", {1}, {0.00705f}, only_t1_not_initializer);
+  test_empty.AddInput<uint8_t>("b_zero_point", {1}, {114}, only_t1_not_initializer);
+  test_empty.AddInput<float>("y_scale", {1}, {0.0107f}, only_t1_not_initializer);
+  test_empty.AddInput<uint8_t>("y_zero_point", {1}, {118}, only_t1_not_initializer);
+  test_empty.AddOutput<uint8_t>("T3", {0, 3}, {});
+
+  // Skip NNAPI as it doesn't support empty output for now
+  test_empty.Run(OpTester::ExpectResult::kExpectSuccess, "", {kNnapiExecutionProvider});
 }

 TEST(QuantizeLinearMatmulOpTest, QLinearMatMul) {