Fix MaxPool when using dilation > 1 plus non-zero padding (#1320)

MaxPool with dilation > 1 and padding did not compute the correct start index. Added code to fix and test cases to cover this.
2026-07-12 17:57:38 +00:00 · 2019-07-17 17:33:29 -07:00 · 2019-07-17 17:33:29 -07:00 · f47f6fd020
commit f47f6fd020
parent fbdd905440
3 changed files with 147 additions and 61 deletions
--- a/onnxruntime/core/providers/cpu/nn/pool.cc
+++ b/onnxruntime/core/providers/cpu/nn/pool.cc
@ -3,7 +3,7 @@

 #include "core/framework/op_kernel_context_internal.h"
 #include "core/providers/cpu/nn/pool.h"
-#include <cmath>
+
 using namespace ::onnxruntime::common;

 namespace onnxruntime {
@ -25,7 +25,7 @@ Status Pool<T, PoolType>::Compute(OpKernelContext* context) const {
  }

  std::vector<int64_t> output_dims = PoolBase::SetOutputSize(x_shape, x_shape[1], &pads, dilations_, ceil_mode_);
-  Tensor* Y = context->Output(0, TensorShape(output_dims));
+  Tensor* Y = context->Output(0, output_dims);

  const auto* X_data = X->template Data<float>();
  auto* Y_data = Y->template MutableData<float>();
@ -185,7 +185,7 @@ Status PoolBase::Compute(OpKernelContext* context, MLAS_POOLING_KIND kind) const

  std::vector<int64_t> pads = pads_;
  std::vector<int64_t> output_dims = PoolBase::SetOutputSize(x_shape, x_shape[1], &pads, dilations_, ceil_mode_);
-  Tensor* Y = context->Output(0, TensorShape(output_dims));
+  Tensor* Y = context->Output(0, output_dims);

  // Get access to the internal threadpool
  // Temporarily derive concurrency parameters without access to session state
@ -222,8 +222,9 @@ Status Pool<float, MaxPool<8 /*VERSION*/>>::Compute(OpKernelContext* context) co
  // and also if dilation is not required

  bool need_dilation = false;
-  for (auto n : dilations_)
+  for (auto n : dilations_) {
    need_dilation |= n > 1;
+  }

  if (OpKernel::Node().OutputDefs().size() == 1 && !need_dilation) {
    return PoolBase::Compute(context, MlasMaximumPooling);
@ -238,8 +239,8 @@ Status Pool<float, MaxPool<8 /*VERSION*/>>::Compute(OpKernelContext* context) co
  std::vector<int64_t> kernel_shape = kernel_shape_;

  std::vector<int64_t> output_dims = PoolBase::SetOutputSize(x_shape, x_shape[1], &pads, dilations_, ceil_mode_);
-  Tensor* Y = context->Output(0, TensorShape(output_dims));
-  Tensor* I = context->Output(1, TensorShape(output_dims));
+  Tensor* Y = context->Output(0, output_dims);
+  Tensor* I = context->Output(1, output_dims);

  const auto* X_data = X->template Data<float>();
  auto* Y_data = Y->template MutableData<float>();
@ -270,14 +271,15 @@ Status Pool<float, MaxPool<8 /*VERSION*/>>::Compute(OpKernelContext* context) co
        int64_t* i_d = I_data ? I_data + c * y_step : nullptr;
        for (int64_t ph = 0; ph < pooled_height; ++ph) {
          int64_t hstart = ph * stride_h() - pads[0];
-          int64_t hend = std::min(hstart + kernel_shape[0] * dilation_h - dilation_h + 1, height);
-          hstart = std::max(hstart, static_cast<int64_t>(0));
+          int64_t hend = hstart + kernel_shape[0] * dilation_h;
          float Yh = std::numeric_limits<float>::lowest();
          int64_t h_index = -1;
          for (int64_t h = hstart; h < hend; h += dilation_h) {
-            if (x_d[h] > Yh) {
-              Yh = x_d[h];
-              h_index = h;
+            if (math::is_a_ge_zero_and_a_lt_b(h, height)) {
+              if (x_d[h] > Yh) {
+                Yh = x_d[h];
+                h_index = h;
+              }
            }
          }
          y_d[ph] = Yh;
@ -305,23 +307,25 @@ Status Pool<float, MaxPool<8 /*VERSION*/>>::Compute(OpKernelContext* context) co

        for (int64_t ph = 0; ph < pooled_height; ++ph) {
          int64_t hstart = ph * stride_h() - pads[0];
-          int64_t hend = std::min(hstart + kernel_shape[0] * dilation_h - dilation_h + 1, height);
-          hstart = std::max(hstart, static_cast<int64_t>(0));
+          int64_t hend = hstart + kernel_shape[0] * dilation_h;
          for (int64_t pw = 0; pw < pooled_width; ++pw) {
            int64_t wstart = pw * stride_w() - pads[1];
-            int64_t wend = std::min(wstart + kernel_shape[1] * dilation_w - dilation_w + 1, width);
-            wstart = std::max(wstart, static_cast<int64_t>(0));
+            int64_t wend = wstart + kernel_shape[1] * dilation_w;
            const int64_t pool_index = ph * pooled_width + pw;
            float Yh = std::numeric_limits<float>::lowest();
            int64_t h_index = -1;
            int64_t w_index = -1;
            for (int64_t h = hstart; h < hend; h += dilation_h) {
-              for (int64_t w = wstart; w < wend; w += dilation_w) {
-                const int64_t input_index = h * width + w;
-                if (x_d[input_index] > Yh) {
-                  Yh = x_d[input_index];
-                  h_index = h;
-                  w_index = w;
+              if (math::is_a_ge_zero_and_a_lt_b(h, height)) {
+                for (int64_t w = wstart; w < wend; w += dilation_w) {
+                  if (math::is_a_ge_zero_and_a_lt_b(w, width)) {
+                    const int64_t input_index = h * width + w;
+                    if (x_d[input_index] > Yh) {
+                      Yh = x_d[input_index];
+                      h_index = h;
+                      w_index = w;
+                    }
+                  }
                }
              }
            }
@ -353,16 +357,13 @@ Status Pool<float, MaxPool<8 /*VERSION*/>>::Compute(OpKernelContext* context) co

        for (int64_t ph = 0; ph < pooled_height; ++ph) {
          int64_t hstart = ph * stride_h() - pads[0];
-          int64_t hend = std::min(hstart + kernel_shape[0] * dilation_h - dilation_h + 1, height);
-          hstart = std::max(hstart, static_cast<int64_t>(0));
+          int64_t hend = hstart + kernel_shape[0] * dilation_h;
          for (int64_t pw = 0; pw < pooled_width; ++pw) {
            int64_t wstart = pw * stride_w() - pads[1];
-            int64_t wend = std::min(wstart + kernel_shape[1] * dilation_w - dilation_w + 1, width);
-            wstart = std::max(wstart, static_cast<int64_t>(0));
+            int64_t wend = wstart + kernel_shape[1] * dilation_w;
            for (int64_t pd = 0; pd < pooled_depth; ++pd) {
              int64_t dstart = pd * stride_d() - pads[2];
-              int64_t dend = std::min(dstart + kernel_shape[2] * dilation_d - dilation_d + 1, depth);
-              dstart = std::max(dstart, static_cast<int64_t>(0));
+              int64_t dend = dstart + kernel_shape[2] * dilation_d;
              const int64_t pool_index =
                  ph * pooled_width * pooled_depth + pw * pooled_depth + pd;
              float Yh = std::numeric_limits<float>::lowest();
@ -370,14 +371,20 @@ Status Pool<float, MaxPool<8 /*VERSION*/>>::Compute(OpKernelContext* context) co
              int64_t w_index = -1;
              int64_t d_index = -1;
              for (int64_t h = hstart; h < hend; h += dilation_h) {
-                for (int64_t w = wstart; w < wend; w += dilation_w) {
-                  for (int64_t d = dstart; d < dend; d += dilation_d) {
-                    const int64_t input_index = h * width * depth + w * depth + d;
-                    if (x_d[input_index] > Yh) {
-                      Yh = x_d[input_index];
-                      h_index = h;
-                      w_index = w;
-                      d_index = d;
+                if (math::is_a_ge_zero_and_a_lt_b(h, height)) {
+                  for (int64_t w = wstart; w < wend; w += dilation_w) {
+                    if (math::is_a_ge_zero_and_a_lt_b(w, width)) {
+                      for (int64_t d = dstart; d < dend; d += dilation_d) {
+                        if (math::is_a_ge_zero_and_a_lt_b(d, depth)) {
+                          const int64_t input_index = h * width * depth + w * depth + d;
+                          if (x_d[input_index] > Yh) {
+                            Yh = x_d[input_index];
+                            h_index = h;
+                            w_index = w;
+                            d_index = d;
+                          }
+                        }
+                      }
                    }
                  }
                }
--- a/onnxruntime/core/providers/cpu/nn/pool_base.h
+++ b/onnxruntime/core/providers/cpu/nn/pool_base.h
@ -7,6 +7,7 @@
 #include "core/common/common.h"
 #include "core/framework/op_kernel.h"
 #include "core/providers/cpu/nn/autopad_type.h"
+#include "core/util/math.h"
 #include "core/mlas/inc/mlas.h"

 namespace onnxruntime {
--- a/onnxruntime/test/providers/cpu/nn/pool_op_test.cc
+++ b/onnxruntime/test/providers/cpu/nn/pool_op_test.cc
@ -51,7 +51,7 @@ TEST(PoolTest, MaxPool) {

  test.AddInput<float>("X", x_dims, x_vals);
  test.AddOutput<float>("Y", expected_dims, expected_vals);
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: result differs
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //TensorRT: result differs
 }

 // Only CUDA kernel has float 16 support
@ -104,11 +104,11 @@ TEST(PoolTest, MaxPool_F16) {

  test.AddInput<MLFloat16>("X", x_dims, f_X);
  test.AddOutput<MLFloat16>("Y", expected_dims, f_Y);
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: Assertion `!attrs.count("pads")' failed
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //TensorRT: Assertion `!attrs.count("pads")' failed
 }
 #endif

-static void MaxPool_8_WithIndexTest(bool has_index, int64_t storage_order=0) {
+static void MaxPool_8_WithIndexTest(bool has_index, int64_t storage_order = 0) {
  OpTester test("MaxPool", 8);

  test.AddAttribute("auto_pad", "");
@ -160,7 +160,7 @@ static void MaxPool_8_WithIndexTest(bool has_index, int64_t storage_order=0) {
 }

 TEST(PoolTest, MaxPool_8_With_Index) {
-  MaxPool_8_WithIndexTest(false);  // row major
+  MaxPool_8_WithIndexTest(false);                      // row major
  MaxPool_8_WithIndexTest(true, 0 /*storage_order*/);  // row major
  MaxPool_8_WithIndexTest(true, 1 /*storage_order*/);  // col major
 }
@ -229,6 +229,26 @@ TEST(PoolTest, MaxPool_10_Dilation_1d) {
  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
 }

+TEST(PoolTest, MaxPool_10_DilationPadding_1d) {
+  OpTester test("MaxPool", 10);
+
+  test.AddAttribute("auto_pad", "");
+  test.AddAttribute("strides", std::vector<int64_t>{1});
+  test.AddAttribute("pads", vector<int64_t>{1, 1});
+  test.AddAttribute("kernel_shape", vector<int64_t>{3});
+  test.AddAttribute("dilations", vector<int64_t>{3});
+
+  std::vector<float> x_vals = {
+      1, 3, 2, 4, -1, -3, -2, -4, -6, -5, -4, -2};
+  std::vector<int64_t> x_dims = {1, 1, 12};
+  std::vector<int64_t> expected_dims = {1, 1, 8};
+  std::vector<float> expected_vals = {2, 4, 3, 2, 4, -1, -2, -2};
+
+  test.AddInput<float>("X", x_dims, x_vals);
+  test.AddOutput<float>("Y", expected_dims, expected_vals);
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kTensorrtExecutionProvider});
+}
+
 TEST(PoolTest, MaxPool_10_Dilation_2d) {
  OpTester test("MaxPool", 10);

@ -239,11 +259,10 @@ TEST(PoolTest, MaxPool_10_Dilation_2d) {
  test.AddAttribute("dilations", vector<int64_t>{2, 2});

  std::vector<float> x_vals = {
-	  1,  3,  2,  4, -1,
-	  5,  7,  6,  8, -2,
-	  9,  11, 10, 12, -3,
-	  13, 15, 14, 16, -4,
-      };
+      1, 3, 2, 4, -1,
+      5, 7, 6, 8, -2,
+      9, 11, 10, 12, -3,
+      13, 15, 14, 16, -4};
  std::vector<int64_t> x_dims = {1, 1, 4, 5};
  std::vector<int64_t> expected_dims = {1, 1, 2, 3};
  std::vector<float> expected_vals = {10, 12, 10, 14, 16, 14};
@ -253,6 +272,33 @@ TEST(PoolTest, MaxPool_10_Dilation_2d) {
  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
 }

+TEST(PoolTest, MaxPool_10_DilationPadding_2d) {
+  OpTester test("MaxPool", 10);
+
+  test.AddAttribute("auto_pad", "");
+  test.AddAttribute("strides", std::vector<int64_t>{1, 1});
+  test.AddAttribute("pads", vector<int64_t>{1, 1, 1, 1});
+  test.AddAttribute("kernel_shape", vector<int64_t>{2, 2});
+  test.AddAttribute("dilations", vector<int64_t>{2, 2});
+
+  std::vector<float> x_vals = {
+      1, 3, 2, 4, -1,
+      5, 7, 6, 8, -2,
+      9, 11, 10, 12, -3,
+      13, 15, 14, 16, -4};
+  std::vector<int64_t> x_dims = {1, 1, 4, 5};
+  std::vector<int64_t> expected_dims = {1, 1, 4, 5};
+  std::vector<float> expected_vals = {
+      7, 6, 8, 6, 8,
+      11, 10, 12, 10, 12,
+      15, 14, 16, 14, 16,
+      11, 10, 12, 10, 12};
+
+  test.AddInput<float>("X", x_dims, x_vals);
+  test.AddOutput<float>("Y", expected_dims, expected_vals);
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kTensorrtExecutionProvider});
+}
+
 TEST(PoolTest, MaxPool_10_Dilation_Ceil0_2d) {
  OpTester test("MaxPool", 10);

@ -263,11 +309,10 @@ TEST(PoolTest, MaxPool_10_Dilation_Ceil0_2d) {
  test.AddAttribute("dilations", vector<int64_t>{2, 2});

  std::vector<float> x_vals = {
-	  1,  3,  2,  4, -1,
-	  5,  7,  6,  8, -2,
-	  9,  11, 10, 12, -3,
-	  13, 15, 14, 16, -4,
-      };
+      1, 3, 2, 4, -1,
+      5, 7, 6, 8, -2,
+      9, 11, 10, 12, -3,
+      13, 15, 14, 16, -4};
  std::vector<int64_t> x_dims = {1, 1, 4, 5};
  std::vector<int64_t> expected_dims = {1, 1, 1, 3};
  std::vector<float> expected_vals = {10, 12, 10};
@ -288,11 +333,10 @@ TEST(PoolTest, MaxPool_10_Dilation_Ceil1_2d) {
  test.AddAttribute("ceil_mode", (int64_t)1);

  std::vector<float> x_vals = {
-	  1,  3,  2,  4, -1,
-	  5,  7,  6,  8, -2,
-	  9,  11, 10, 12, -3,
-	  13, 15, 14, 16, -4,
-      };
+      1, 3, 2, 4, -1,
+      5, 7, 6, 8, -2,
+      9, 11, 10, 12, -3,
+      13, 15, 14, 16, -4};
  std::vector<int64_t> x_dims = {1, 1, 4, 5};
  std::vector<int64_t> expected_dims = {1, 1, 2, 3};
  std::vector<float> expected_vals = {10, 12, 10, 10, 12, 10};
@ -302,6 +346,41 @@ TEST(PoolTest, MaxPool_10_Dilation_Ceil1_2d) {
  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
 }

+TEST(PoolTest, MaxPool_10_DilationPadding_3d) {
+  OpTester test("MaxPool", 10);
+
+  test.AddAttribute("auto_pad", "");
+  test.AddAttribute("strides", std::vector<int64_t>{1, 1, 1});
+  test.AddAttribute("pads", vector<int64_t>{1, 1, 1, 1, 1, 1});
+  test.AddAttribute("kernel_shape", vector<int64_t>{2, 2, 2});
+  test.AddAttribute("dilations", vector<int64_t>{2, 2, 2});
+
+  std::vector<float> x_vals = {
+      1, 3, 2, 4, -1,
+      5, 7, 6, 8, -2,
+      9, 11, 10, 12, -3,
+      13, 15, 14, 16, -4,
+      1, 3, 2, 4, -1,
+      5, 7, 6, 8, -2,
+      9, 11, 10, 12, -3,
+      13, 15, 14, 16, -4};
+  std::vector<int64_t> x_dims = {1, 1, 2, 4, 5};
+  std::vector<int64_t> expected_dims = {1, 1, 2, 4, 5};
+  std::vector<float> expected_vals = {
+      7, 6, 8, 6, 8,
+      11, 10, 12, 10, 12,
+      15, 14, 16, 14, 16,
+      11, 10, 12, 10, 12,
+      7, 6, 8, 6, 8,
+      11, 10, 12, 10, 12,
+      15, 14, 16, 14, 16,
+      11, 10, 12, 10, 12};
+
+  test.AddInput<float>("X", x_dims, x_vals);
+  test.AddOutput<float>("Y", expected_dims, expected_vals);
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kTensorrtExecutionProvider});
+}
+
 TEST(PoolTest, GlobalMaxPool) {
  OpTester test("GlobalMaxPool");

@ -566,17 +645,16 @@ TEST(PoolTest, AveragePool_10_ceil1_2d) {
  test.AddAttribute("strides", std::vector<int64_t>{3, 1});
  test.AddAttribute("pads", vector<int64_t>{0, 0, 0, 0});
  test.AddAttribute("kernel_shape", vector<int64_t>{2, 2});
-  test.AddAttribute("ceil_mode", (int64_t) 1);
+  test.AddAttribute("ceil_mode", (int64_t)1);

  std::vector<float> x_vals = {
-	  1,  3,  2,  4,
-	  5,  7,  6,  8,
-	  9,  11, 10, 12,
-	  13, 15, 14, 16,
-      };
+      1, 3, 2, 4,
+      5, 7, 6, 8,
+      9, 11, 10, 12,
+      13, 15, 14, 16};
  std::vector<int64_t> x_dims = {1, 1, 4, 4};
  std::vector<int64_t> expected_dims = {1, 1, 2, 3};
-  std::vector<float> expected_vals = {4.0f, 4.5f, 5.0f , 14.0f, 14.5f, 15.0f};
+  std::vector<float> expected_vals = {4.0f, 4.5f, 5.0f, 14.0f, 14.5f, 15.0f};

  test.AddInput<float>("X", x_dims, x_vals);
  test.AddOutput<float>("Y", expected_dims, expected_vals);