diff --git a/onnxruntime/core/providers/cpu/nn/pool.cc b/onnxruntime/core/providers/cpu/nn/pool.cc index d37424c3ed..367a9256a0 100644 --- a/onnxruntime/core/providers/cpu/nn/pool.cc +++ b/onnxruntime/core/providers/cpu/nn/pool.cc @@ -3,7 +3,7 @@ #include "core/framework/op_kernel_context_internal.h" #include "core/providers/cpu/nn/pool.h" -#include + using namespace ::onnxruntime::common; namespace onnxruntime { @@ -25,7 +25,7 @@ Status Pool::Compute(OpKernelContext* context) const { } std::vector output_dims = PoolBase::SetOutputSize(x_shape, x_shape[1], &pads, dilations_, ceil_mode_); - Tensor* Y = context->Output(0, TensorShape(output_dims)); + Tensor* Y = context->Output(0, output_dims); const auto* X_data = X->template Data(); auto* Y_data = Y->template MutableData(); @@ -185,7 +185,7 @@ Status PoolBase::Compute(OpKernelContext* context, MLAS_POOLING_KIND kind) const std::vector pads = pads_; std::vector output_dims = PoolBase::SetOutputSize(x_shape, x_shape[1], &pads, dilations_, ceil_mode_); - Tensor* Y = context->Output(0, TensorShape(output_dims)); + Tensor* Y = context->Output(0, output_dims); // Get access to the internal threadpool // Temporarily derive concurrency parameters without access to session state @@ -222,8 +222,9 @@ Status Pool>::Compute(OpKernelContext* context) co // and also if dilation is not required bool need_dilation = false; - for (auto n : dilations_) + for (auto n : dilations_) { need_dilation |= n > 1; + } if (OpKernel::Node().OutputDefs().size() == 1 && !need_dilation) { return PoolBase::Compute(context, MlasMaximumPooling); @@ -238,8 +239,8 @@ Status Pool>::Compute(OpKernelContext* context) co std::vector kernel_shape = kernel_shape_; std::vector output_dims = PoolBase::SetOutputSize(x_shape, x_shape[1], &pads, dilations_, ceil_mode_); - Tensor* Y = context->Output(0, TensorShape(output_dims)); - Tensor* I = context->Output(1, TensorShape(output_dims)); + Tensor* Y = context->Output(0, output_dims); + Tensor* I = context->Output(1, output_dims); const auto* X_data = X->template Data(); auto* Y_data = Y->template MutableData(); @@ -270,14 +271,15 @@ Status Pool>::Compute(OpKernelContext* context) co int64_t* i_d = I_data ? I_data + c * y_step : nullptr; for (int64_t ph = 0; ph < pooled_height; ++ph) { int64_t hstart = ph * stride_h() - pads[0]; - int64_t hend = std::min(hstart + kernel_shape[0] * dilation_h - dilation_h + 1, height); - hstart = std::max(hstart, static_cast(0)); + int64_t hend = hstart + kernel_shape[0] * dilation_h; float Yh = std::numeric_limits::lowest(); int64_t h_index = -1; for (int64_t h = hstart; h < hend; h += dilation_h) { - if (x_d[h] > Yh) { - Yh = x_d[h]; - h_index = h; + if (math::is_a_ge_zero_and_a_lt_b(h, height)) { + if (x_d[h] > Yh) { + Yh = x_d[h]; + h_index = h; + } } } y_d[ph] = Yh; @@ -305,23 +307,25 @@ Status Pool>::Compute(OpKernelContext* context) co for (int64_t ph = 0; ph < pooled_height; ++ph) { int64_t hstart = ph * stride_h() - pads[0]; - int64_t hend = std::min(hstart + kernel_shape[0] * dilation_h - dilation_h + 1, height); - hstart = std::max(hstart, static_cast(0)); + int64_t hend = hstart + kernel_shape[0] * dilation_h; for (int64_t pw = 0; pw < pooled_width; ++pw) { int64_t wstart = pw * stride_w() - pads[1]; - int64_t wend = std::min(wstart + kernel_shape[1] * dilation_w - dilation_w + 1, width); - wstart = std::max(wstart, static_cast(0)); + int64_t wend = wstart + kernel_shape[1] * dilation_w; const int64_t pool_index = ph * pooled_width + pw; float Yh = std::numeric_limits::lowest(); int64_t h_index = -1; int64_t w_index = -1; for (int64_t h = hstart; h < hend; h += dilation_h) { - for (int64_t w = wstart; w < wend; w += dilation_w) { - const int64_t input_index = h * width + w; - if (x_d[input_index] > Yh) { - Yh = x_d[input_index]; - h_index = h; - w_index = w; + if (math::is_a_ge_zero_and_a_lt_b(h, height)) { + for (int64_t w = wstart; w < wend; w += dilation_w) { + if (math::is_a_ge_zero_and_a_lt_b(w, width)) { + const int64_t input_index = h * width + w; + if (x_d[input_index] > Yh) { + Yh = x_d[input_index]; + h_index = h; + w_index = w; + } + } } } } @@ -353,16 +357,13 @@ Status Pool>::Compute(OpKernelContext* context) co for (int64_t ph = 0; ph < pooled_height; ++ph) { int64_t hstart = ph * stride_h() - pads[0]; - int64_t hend = std::min(hstart + kernel_shape[0] * dilation_h - dilation_h + 1, height); - hstart = std::max(hstart, static_cast(0)); + int64_t hend = hstart + kernel_shape[0] * dilation_h; for (int64_t pw = 0; pw < pooled_width; ++pw) { int64_t wstart = pw * stride_w() - pads[1]; - int64_t wend = std::min(wstart + kernel_shape[1] * dilation_w - dilation_w + 1, width); - wstart = std::max(wstart, static_cast(0)); + int64_t wend = wstart + kernel_shape[1] * dilation_w; for (int64_t pd = 0; pd < pooled_depth; ++pd) { int64_t dstart = pd * stride_d() - pads[2]; - int64_t dend = std::min(dstart + kernel_shape[2] * dilation_d - dilation_d + 1, depth); - dstart = std::max(dstart, static_cast(0)); + int64_t dend = dstart + kernel_shape[2] * dilation_d; const int64_t pool_index = ph * pooled_width * pooled_depth + pw * pooled_depth + pd; float Yh = std::numeric_limits::lowest(); @@ -370,14 +371,20 @@ Status Pool>::Compute(OpKernelContext* context) co int64_t w_index = -1; int64_t d_index = -1; for (int64_t h = hstart; h < hend; h += dilation_h) { - for (int64_t w = wstart; w < wend; w += dilation_w) { - for (int64_t d = dstart; d < dend; d += dilation_d) { - const int64_t input_index = h * width * depth + w * depth + d; - if (x_d[input_index] > Yh) { - Yh = x_d[input_index]; - h_index = h; - w_index = w; - d_index = d; + if (math::is_a_ge_zero_and_a_lt_b(h, height)) { + for (int64_t w = wstart; w < wend; w += dilation_w) { + if (math::is_a_ge_zero_and_a_lt_b(w, width)) { + for (int64_t d = dstart; d < dend; d += dilation_d) { + if (math::is_a_ge_zero_and_a_lt_b(d, depth)) { + const int64_t input_index = h * width * depth + w * depth + d; + if (x_d[input_index] > Yh) { + Yh = x_d[input_index]; + h_index = h; + w_index = w; + d_index = d; + } + } + } } } } diff --git a/onnxruntime/core/providers/cpu/nn/pool_base.h b/onnxruntime/core/providers/cpu/nn/pool_base.h index 11b70ac364..43f81982dd 100644 --- a/onnxruntime/core/providers/cpu/nn/pool_base.h +++ b/onnxruntime/core/providers/cpu/nn/pool_base.h @@ -7,6 +7,7 @@ #include "core/common/common.h" #include "core/framework/op_kernel.h" #include "core/providers/cpu/nn/autopad_type.h" +#include "core/util/math.h" #include "core/mlas/inc/mlas.h" namespace onnxruntime { diff --git a/onnxruntime/test/providers/cpu/nn/pool_op_test.cc b/onnxruntime/test/providers/cpu/nn/pool_op_test.cc index 27658f1cc7..73f0ca2eb9 100644 --- a/onnxruntime/test/providers/cpu/nn/pool_op_test.cc +++ b/onnxruntime/test/providers/cpu/nn/pool_op_test.cc @@ -51,7 +51,7 @@ TEST(PoolTest, MaxPool) { test.AddInput("X", x_dims, x_vals); test.AddOutput("Y", expected_dims, expected_vals); - test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: result differs + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: result differs } // Only CUDA kernel has float 16 support @@ -104,11 +104,11 @@ TEST(PoolTest, MaxPool_F16) { test.AddInput("X", x_dims, f_X); test.AddOutput("Y", expected_dims, f_Y); - test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: Assertion `!attrs.count("pads")' failed + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: Assertion `!attrs.count("pads")' failed } #endif -static void MaxPool_8_WithIndexTest(bool has_index, int64_t storage_order=0) { +static void MaxPool_8_WithIndexTest(bool has_index, int64_t storage_order = 0) { OpTester test("MaxPool", 8); test.AddAttribute("auto_pad", ""); @@ -160,7 +160,7 @@ static void MaxPool_8_WithIndexTest(bool has_index, int64_t storage_order=0) { } TEST(PoolTest, MaxPool_8_With_Index) { - MaxPool_8_WithIndexTest(false); // row major + MaxPool_8_WithIndexTest(false); // row major MaxPool_8_WithIndexTest(true, 0 /*storage_order*/); // row major MaxPool_8_WithIndexTest(true, 1 /*storage_order*/); // col major } @@ -229,6 +229,26 @@ TEST(PoolTest, MaxPool_10_Dilation_1d) { test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); } +TEST(PoolTest, MaxPool_10_DilationPadding_1d) { + OpTester test("MaxPool", 10); + + test.AddAttribute("auto_pad", ""); + test.AddAttribute("strides", std::vector{1}); + test.AddAttribute("pads", vector{1, 1}); + test.AddAttribute("kernel_shape", vector{3}); + test.AddAttribute("dilations", vector{3}); + + std::vector x_vals = { + 1, 3, 2, 4, -1, -3, -2, -4, -6, -5, -4, -2}; + std::vector x_dims = {1, 1, 12}; + std::vector expected_dims = {1, 1, 8}; + std::vector expected_vals = {2, 4, 3, 2, 4, -1, -2, -2}; + + test.AddInput("X", x_dims, x_vals); + test.AddOutput("Y", expected_dims, expected_vals); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kTensorrtExecutionProvider}); +} + TEST(PoolTest, MaxPool_10_Dilation_2d) { OpTester test("MaxPool", 10); @@ -239,11 +259,10 @@ TEST(PoolTest, MaxPool_10_Dilation_2d) { test.AddAttribute("dilations", vector{2, 2}); std::vector x_vals = { - 1, 3, 2, 4, -1, - 5, 7, 6, 8, -2, - 9, 11, 10, 12, -3, - 13, 15, 14, 16, -4, - }; + 1, 3, 2, 4, -1, + 5, 7, 6, 8, -2, + 9, 11, 10, 12, -3, + 13, 15, 14, 16, -4}; std::vector x_dims = {1, 1, 4, 5}; std::vector expected_dims = {1, 1, 2, 3}; std::vector expected_vals = {10, 12, 10, 14, 16, 14}; @@ -253,6 +272,33 @@ TEST(PoolTest, MaxPool_10_Dilation_2d) { test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); } +TEST(PoolTest, MaxPool_10_DilationPadding_2d) { + OpTester test("MaxPool", 10); + + test.AddAttribute("auto_pad", ""); + test.AddAttribute("strides", std::vector{1, 1}); + test.AddAttribute("pads", vector{1, 1, 1, 1}); + test.AddAttribute("kernel_shape", vector{2, 2}); + test.AddAttribute("dilations", vector{2, 2}); + + std::vector x_vals = { + 1, 3, 2, 4, -1, + 5, 7, 6, 8, -2, + 9, 11, 10, 12, -3, + 13, 15, 14, 16, -4}; + std::vector x_dims = {1, 1, 4, 5}; + std::vector expected_dims = {1, 1, 4, 5}; + std::vector expected_vals = { + 7, 6, 8, 6, 8, + 11, 10, 12, 10, 12, + 15, 14, 16, 14, 16, + 11, 10, 12, 10, 12}; + + test.AddInput("X", x_dims, x_vals); + test.AddOutput("Y", expected_dims, expected_vals); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kTensorrtExecutionProvider}); +} + TEST(PoolTest, MaxPool_10_Dilation_Ceil0_2d) { OpTester test("MaxPool", 10); @@ -263,11 +309,10 @@ TEST(PoolTest, MaxPool_10_Dilation_Ceil0_2d) { test.AddAttribute("dilations", vector{2, 2}); std::vector x_vals = { - 1, 3, 2, 4, -1, - 5, 7, 6, 8, -2, - 9, 11, 10, 12, -3, - 13, 15, 14, 16, -4, - }; + 1, 3, 2, 4, -1, + 5, 7, 6, 8, -2, + 9, 11, 10, 12, -3, + 13, 15, 14, 16, -4}; std::vector x_dims = {1, 1, 4, 5}; std::vector expected_dims = {1, 1, 1, 3}; std::vector expected_vals = {10, 12, 10}; @@ -288,11 +333,10 @@ TEST(PoolTest, MaxPool_10_Dilation_Ceil1_2d) { test.AddAttribute("ceil_mode", (int64_t)1); std::vector x_vals = { - 1, 3, 2, 4, -1, - 5, 7, 6, 8, -2, - 9, 11, 10, 12, -3, - 13, 15, 14, 16, -4, - }; + 1, 3, 2, 4, -1, + 5, 7, 6, 8, -2, + 9, 11, 10, 12, -3, + 13, 15, 14, 16, -4}; std::vector x_dims = {1, 1, 4, 5}; std::vector expected_dims = {1, 1, 2, 3}; std::vector expected_vals = {10, 12, 10, 10, 12, 10}; @@ -302,6 +346,41 @@ TEST(PoolTest, MaxPool_10_Dilation_Ceil1_2d) { test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); } +TEST(PoolTest, MaxPool_10_DilationPadding_3d) { + OpTester test("MaxPool", 10); + + test.AddAttribute("auto_pad", ""); + test.AddAttribute("strides", std::vector{1, 1, 1}); + test.AddAttribute("pads", vector{1, 1, 1, 1, 1, 1}); + test.AddAttribute("kernel_shape", vector{2, 2, 2}); + test.AddAttribute("dilations", vector{2, 2, 2}); + + std::vector x_vals = { + 1, 3, 2, 4, -1, + 5, 7, 6, 8, -2, + 9, 11, 10, 12, -3, + 13, 15, 14, 16, -4, + 1, 3, 2, 4, -1, + 5, 7, 6, 8, -2, + 9, 11, 10, 12, -3, + 13, 15, 14, 16, -4}; + std::vector x_dims = {1, 1, 2, 4, 5}; + std::vector expected_dims = {1, 1, 2, 4, 5}; + std::vector expected_vals = { + 7, 6, 8, 6, 8, + 11, 10, 12, 10, 12, + 15, 14, 16, 14, 16, + 11, 10, 12, 10, 12, + 7, 6, 8, 6, 8, + 11, 10, 12, 10, 12, + 15, 14, 16, 14, 16, + 11, 10, 12, 10, 12}; + + test.AddInput("X", x_dims, x_vals); + test.AddOutput("Y", expected_dims, expected_vals); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kTensorrtExecutionProvider}); +} + TEST(PoolTest, GlobalMaxPool) { OpTester test("GlobalMaxPool"); @@ -566,17 +645,16 @@ TEST(PoolTest, AveragePool_10_ceil1_2d) { test.AddAttribute("strides", std::vector{3, 1}); test.AddAttribute("pads", vector{0, 0, 0, 0}); test.AddAttribute("kernel_shape", vector{2, 2}); - test.AddAttribute("ceil_mode", (int64_t) 1); + test.AddAttribute("ceil_mode", (int64_t)1); std::vector x_vals = { - 1, 3, 2, 4, - 5, 7, 6, 8, - 9, 11, 10, 12, - 13, 15, 14, 16, - }; + 1, 3, 2, 4, + 5, 7, 6, 8, + 9, 11, 10, 12, + 13, 15, 14, 16}; std::vector x_dims = {1, 1, 4, 4}; std::vector expected_dims = {1, 1, 2, 3}; - std::vector expected_vals = {4.0f, 4.5f, 5.0f , 14.0f, 14.5f, 15.0f}; + std::vector expected_vals = {4.0f, 4.5f, 5.0f, 14.0f, 14.5f, 15.0f}; test.AddInput("X", x_dims, x_vals); test.AddOutput("Y", expected_dims, expected_vals);