Fix CUDA Reduce ops (#2268)

* Add some tests for Reduction ops * Exclude tensorrt for new tests * Fix bug in CUDA Reduce ops * Fix nit
2026-07-11 17:48:34 +00:00 · 2019-10-31 10:11:59 -07:00 · 2019-10-31 10:11:59 -07:00 · 4bcd8bfca1
commit 4bcd8bfca1
parent a5da5ff6f4
2 changed files with 149 additions and 3 deletions
--- a/onnxruntime/core/providers/cuda/reduction/reduction_ops.cc
+++ b/onnxruntime/core/providers/cuda/reduction/reduction_ops.cc
@ -75,6 +75,7 @@ Status ReduceKernel<allow_multi_axes>::ComputeImpl(OpKernelContext* ctx, cudnnRe
  std::vector<int64_t> squeezed_output_dims;
  output_dims.reserve(input_dims.size());

+  // explicit 'axes' provided => reduce only on given axis values
  if (axes_.size() > 0) {
    output_dims = input_dims;
    for (auto reduced_axis : axes_) {
@ -88,6 +89,7 @@ Status ReduceKernel<allow_multi_axes>::ComputeImpl(OpKernelContext* ctx, cudnnRe
      reduced[axis] = true;
    }
  } else {
+    // no axes provided (i.e.) default axes  => reduce on all dims
    for (auto dim : input_dims) {
      ORT_ENFORCE(keepdims_ || dim != 0,
                  "Can't reduce on dim with value of 0 if 'keepdims' is false. "
@ -99,12 +101,18 @@ Status ReduceKernel<allow_multi_axes>::ComputeImpl(OpKernelContext* ctx, cudnnRe
  }

  if (keepdims_) {
+    // since keepdims is set, the final output dim is the same as the output_dims computed above
    squeezed_output_dims = output_dims;
-  } else {
+  } else if (axes_.size() > 0) {
+    // we are not going to keep the reduced dims, hence compute the final output dim accordingly
+    squeezed_output_dims.reserve(rank);  // even though we won't use the full capacity, it is better to reserve for peak possible usage
    for (size_t i = 0; i < rank; ++i) {
      if (!reduced[i])
        squeezed_output_dims.push_back(input_dims[i]);
    }
+  } else {
+    // 'axes' is empty and keepdims is false => we reduce on all axes AND drop all dims,
+    // so the result is just a scalar, we keep 'squeezed_output_dims' empty (i.e.) no-op
  }

  Tensor* Y = ctx->Output(0, TensorShape(squeezed_output_dims));
--- a/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc
+++ b/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc
@ -77,7 +77,7 @@ TEST(ReductionOpTest, ReduceL1_default_axes_keepdims) {
  test.Run();
 }

-TEST(ReductionOpTest, ReduceL1_do_not_keepdims) {
+TEST(ReductionOpTest, ReduceL1_do_not_keep_dims) {
  OpTester test("ReduceL1");
  test.AddAttribute("axes", std::vector<int64_t>{2});
  test.AddAttribute("keepdims", (int64_t)0);
@ -94,7 +94,7 @@ TEST(ReductionOpTest, ReduceL1_do_not_keepdims) {
  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //TensorRT: full reduce without keepDimensions is not supported with explicit batch
 }

-TEST(ReductionOpTest, ReduceL1_do_not_keepdims_2) {
+TEST(ReductionOpTest, ReduceL1_do_not_keep_dims_2) {
  OpTester test("ReduceL1");
  test.AddAttribute("axes", std::vector<int64_t>{0});
  test.AddAttribute("keepdims", (int64_t)0);
@ -169,6 +169,22 @@ TEST(ReductionOpTest, ReduceL2_default_axes_keepdims) {
  test.Run();
 }

+TEST(ReductionOpTest, ReduceL2_default_axes_do_not_keep_dims) {
+  OpTester test("ReduceL2");
+  test.AddAttribute("keepdims", static_cast<int64_t>(0));
+  test.AddInput<float>("data", {3, 2, 2},
+                       {1.0f, 2.0f,
+                        3.0f, 4.0f,
+
+                        5.0f, 6.0f,
+                        7.0f, 8.0f,
+
+                        9.0f, 10.0f,
+                        11.0f, 12.0f});
+  test.AddOutput<float>("reduced", {}, {25.49509757f});
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //TensorRT: full reduce without keepDimensions is not supported with explicit batch
+}
+
 TEST(ReductionOpTest, ReduceL2_do_not_keepdims) {
  OpTester test("ReduceL2");
  test.AddAttribute("axes", std::vector<int64_t>{2});
@ -304,6 +320,22 @@ TEST(ReductionOpTest, ReduceLogSumExp_default_axes_keepdims) {
  test.Run();
 }

+TEST(ReductionOpTest, ReduceLogSumExp_default_axes_do_not_keep_dims) {
+  OpTester test("ReduceLogSumExp");
+  test.AddAttribute("keepdims", static_cast<int64_t>(0));
+  test.AddInput<float>("data", {3, 2, 2},
+                       {5.0f, 1.0f,
+                        20.0f, 2.0f,
+
+                        30.0f, 1.0f,
+                        40.0f, 2.0f,
+
+                        55.0f, 1.0f,
+                        60.0f, 2.0f});
+  test.AddOutput<float>("reduced", {}, {60.00671387f});
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //TensorRT: full reduce without keepDimensions is not supported with explicit batch
+}
+
 TEST(ReductionOpTest, ReduceLogSumExp_do_not_keepdims) {
  OpTester test("ReduceLogSumExp");
  test.AddAttribute("axes", std::vector<int64_t>{1});
@ -398,6 +430,22 @@ TEST(ReductionOpTest, ReduceMax_default_axes_keepdims) {
  test.Run();
 }

+TEST(ReductionOpTest, ReduceMax_default_axes_do_not_keep_dims) {
+  OpTester test("ReduceMax");
+  test.AddAttribute("keepdims", static_cast<int64_t>(0));
+  test.AddInput<float>("data", {3, 2, 2},
+                       {5.0f, 1.0f,
+                        20.0f, 2.0f,
+
+                        30.0f, 1.0f,
+                        40.0f, 2.0f,
+
+                        55.0f, 1.0f,
+                        60.0f, 2.0f});
+  test.AddOutput<float>("reduced", {}, {60.0f});
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //TensorRT: full reduce without keepDimensions is not supported with explicit batch
+}
+
 TEST(ReductionOpTest, ReduceMax_do_not_keepdims) {
  OpTester test("ReduceMax");
  test.AddAttribute("axes", std::vector<int64_t>{1});
@ -509,6 +557,22 @@ TEST(ReductionOpTest, ReduceMean_default_axes_keepdims) {
  test.Run();
 }

+TEST(ReductionOpTest, ReduceMean_default_axes_do_not_keep_dims) {
+  OpTester test("ReduceMean");
+  test.AddAttribute("keepdims", static_cast<int64_t>(0));
+  test.AddInput<float>("data", {3, 2, 2},
+                       {5.0f, 1.0f,
+                        20.0f, 2.0f,
+
+                        30.0f, 1.0f,
+                        40.0f, 2.0f,
+
+                        55.0f, 1.0f,
+                        60.0f, 2.0f});
+  test.AddOutput<float>("reduced", {}, {18.25f});
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //TensorRT: full reduce without keepDimensions is not supported with explicit batch
+}
+
 TEST(ReductionOpTest, ReduceMean_do_not_keepdims) {
  OpTester test("ReduceMean");
  test.AddAttribute("axes", std::vector<int64_t>{1});
@ -603,6 +667,32 @@ TEST(ReductionOpTest, ReduceMin_default_axes_keepdims) {
  test.Run();
 }

+TEST(ReductionOpTest, ReduceMin_default_axes_do_not_keep_dims) {
+  OpTester test("ReduceMin");
+  test.AddAttribute("keepdims", static_cast<int64_t>(0));
+  test.AddInput<float>("data", {3, 2, 2},
+                       {5.0f, 1.0f,
+                        20.0f, 2.0f,
+
+                        30.0f, 1.0f,
+                        40.0f, 2.0f,
+
+                        55.0f, 1.0f,
+                        60.0f, 2.0f});
+  test.AddOutput<float>("reduced", {}, {1.0f});
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //TensorRT: full reduce without keepDimensions is not supported with explicit batch
+}
+
+TEST(ReductionOpTest, ReduceMin_default_axes_do_not_keep_dims_2D) {
+  OpTester test("ReduceMin");
+  test.AddAttribute("keepdims", static_cast<int64_t>(0));
+  test.AddInput<float>("data", {2, 2},
+                       {5.0f, 1.0f,
+                        20.0f, 2.0f});
+  test.AddOutput<float>("reduced", {}, {1.0f});
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //TensorRT: full reduce without keepDimensions is not supported with explicit batch
+}
+
 TEST(ReductionOpTest, ReduceMin_do_not_keepdims) {
  OpTester test("ReduceMin");
  test.AddAttribute("axes", std::vector<int64_t>{1});
@ -799,6 +889,22 @@ TEST(ReductionOpTest, ReduceSum_default_axes_keepdims) {
  test.Run();
 }

+TEST(ReductionOpTest, ReduceSum_default_axes_do_not_keep_dims) {
+  OpTester test("ReduceSum");
+  test.AddAttribute("keepdims", static_cast<int64_t>(0));
+  test.AddInput<float>("data", {3, 2, 2},
+                       {1.0f, 2.0f,
+                        3.0f, 4.0f,
+
+                        5.0f, 6.0f,
+                        7.0f, 8.0f,
+
+                        9.0f, 10.0f,
+                        11.0f, 12.0f});
+  test.AddOutput<float>("reduced", {}, {78.0f});
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //TensorRT: full reduce without keepDimensions is not supported with explicit batch
+}
+
 TEST(ReductionOpTest, ReduceSum_do_not_keepdims) {
  OpTester test("ReduceSum");
  test.AddAttribute("axes", std::vector<int64_t>{1});
@ -904,6 +1010,22 @@ TEST(ReductionOpTest, ReduceSumSquare_default_axes_keepdims) {
  test.Run();
 }

+TEST(ReductionOpTest, ReduceSumSquare_default_axes_do_not_keep_dims) {
+  OpTester test("ReduceSumSquare");
+  test.AddAttribute("keepdims", static_cast<int64_t>(0));
+  test.AddInput<float>("data", {3, 2, 2},
+                       {1.0f, 2.0f,
+                        3.0f, 4.0f,
+
+                        5.0f, 6.0f,
+                        7.0f, 8.0f,
+
+                        9.0f, 10.0f,
+                        11.0f, 12.0f});
+  test.AddOutput<float>("reduced", {}, {650.0f});
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //TensorRT: full reduce without keepDimensions is not supported with explicit batch
+}
+
 TEST(ReductionOpTest, ReduceSumSquare_do_not_keepdims) {
  OpTester test("ReduceSumSquare");
  test.AddAttribute("axes", std::vector<int64_t>{1});
@ -963,6 +1085,22 @@ TEST(ReductionOpTest, ReduceProd_default_axes_keepdims) {
  test.Run();
 }

+TEST(ReductionOpTest, ReduceProd_default_axes_do_not_keep_dims) {
+  OpTester test("ReduceProd");
+  test.AddAttribute("keepdims", static_cast<int64_t>(0));
+  test.AddInput<float>("data", {3, 2, 2},
+                       {1.0f, 2.0f,
+                        3.0f, 4.0f,
+
+                        5.0f, 6.0f,
+                        7.0f, 8.0f,
+
+                        9.0f, 10.0f,
+                        11.0f, 12.0f});
+  test.AddOutput<float>("reduced", {}, {479001600.f});
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //TensorRT: full reduce without keepDimensions is not supported with explicit batch
+}
+
 TEST(ReductionOpTest, ReduceProd_do_not_keepdims) {
  OpTester test("ReduceProd");
  test.AddAttribute("axes", std::vector<int64_t>{1});