From 8b1d3b3d57148afc41b788e9cdc6ec5033a08bc7 Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Thu, 23 Jan 2025 17:35:11 -0800 Subject: [PATCH] Align AvgPool ceil_mode on last value to torch (#16752) Fix #16203 Previous to this PR, if `ceil_mode` is on, the calculation of a value would divide the kernel size, even if remaining pixels is less than the kernel size, which causes the difference in this operator between ORT and torch. However, this fix only applies to the change in #15597, which only supports AvgPool since 19. The older opset version is remain the same, as it's using mlas files. Also, the PR fixes the shape mismatch caused by sliding window starting from padding. More detail: https://github.com/onnx/onnx/pull/6650 (And this PR is also validated with the tests added in https://github.com/onnx/onnx/pull/6650) --- .../core/providers/cpu/nn/pool_attributes.h | 26 ++++++++++++------- .../core/providers/cpu/nn/pool_functors.h | 6 +++++ onnxruntime/test/onnx/TestCase.cc | 1 + .../test/providers/cpu/nn/pool_op_test.cc | 25 ++++++++++++++++++ 4 files changed, 49 insertions(+), 9 deletions(-) diff --git a/onnxruntime/core/providers/cpu/nn/pool_attributes.h b/onnxruntime/core/providers/cpu/nn/pool_attributes.h index 118cb4a3ba..fbbd427375 100644 --- a/onnxruntime/core/providers/cpu/nn/pool_attributes.h +++ b/onnxruntime/core/providers/cpu/nn/pool_attributes.h @@ -150,14 +150,14 @@ struct PoolAttributes { case AutoPadType::VALID: *pad_head = 0; *pad_tail = 0; - *out_size = ComputeOutputSize(in_size, stride, kernel, 0, dilation); + *out_size = ComputeOutputSize(in_size, stride, kernel, 0, 0, dilation); break; case AutoPadType::SAME_LOWER: { int64_t legacy_target_size = (in_size + stride - 1) / stride; int64_t pad_needed = (legacy_target_size - 1) * stride + kernel - in_size; *pad_head = (pad_needed + 1) / 2; *pad_tail = pad_needed - *pad_head; - *out_size = ComputeOutputSize(in_size, stride, kernel, pad_needed, dilation); + *out_size = ComputeOutputSize(in_size, stride, kernel, *pad_head, *pad_tail, dilation); break; } case AutoPadType::SAME_UPPER: { @@ -165,7 +165,7 @@ struct PoolAttributes { int64_t pad_needed = (legacy_target_size - 1) * stride + kernel - in_size; *pad_head = pad_needed / 2; *pad_tail = pad_needed - *pad_head; - *out_size = ComputeOutputSize(in_size, stride, kernel, pad_needed, dilation); + *out_size = ComputeOutputSize(in_size, stride, kernel, *pad_head, *pad_tail, dilation); break; } default: { @@ -173,7 +173,7 @@ struct PoolAttributes { } } } else { - *out_size = ComputeOutputSize(in_size, stride, kernel, *pad_head + *pad_tail, dilation); + *out_size = ComputeOutputSize(in_size, stride, kernel, *pad_head, *pad_tail, dilation); } } #if defined(_MSC_VER) && !defined(__clang__) @@ -184,13 +184,21 @@ struct PoolAttributes { int64_t ComputeOutputSize(int64_t in_size, int64_t stride, int64_t kernel, - int64_t pad_needed, + int64_t pad_head, + int64_t pad_tail, int64_t dilation) const { - if (ceil_mode == 0) { - return static_cast(static_cast(in_size + pad_needed - dilation * (kernel - 1) - 1) / stride + 1); + int64_t numerator = in_size + pad_head + pad_tail - dilation * (kernel - 1) - 1; + int64_t out_size = numerator / stride + 1; + + if (ceil_mode == 1) { + out_size = static_cast(std::ceil(static_cast(numerator) / stride)) + 1; + // Ensure that the last pooling starts inside the image (at least 1 pixel) + // Reference: https://github.com/onnx/onnx/pull/5741 + if ((out_size - 1) * stride >= in_size + pad_head) { + --out_size; + } } - return static_cast( - std::ceil(static_cast(in_size + pad_needed - dilation * (kernel - 1) - 1) / stride + 1)); + return out_size; } #if defined(_MSC_VER) && !defined(__clang__) #pragma warning(pop) diff --git a/onnxruntime/core/providers/cpu/nn/pool_functors.h b/onnxruntime/core/providers/cpu/nn/pool_functors.h index d3205278b7..476a9a0338 100644 --- a/onnxruntime/core/providers/cpu/nn/pool_functors.h +++ b/onnxruntime/core/providers/cpu/nn/pool_functors.h @@ -406,6 +406,7 @@ struct AveragePool1DTask final { for (int64_t ph = 0; ph < pooled_height; ++ph) { int64_t hstart = ph * stride_h - pads[0]; int64_t hend = hstart + kernel_shape[0] * dilation_h; + hend = std::min(hend, height + pads[1]); y_d[ph] = 0; int total_elements = 0; for (int64_t h = hstart; h < hend; h += dilation_h) { @@ -461,9 +462,11 @@ struct AveragePool2DTask final { for (int64_t ph = 0; ph < pooled_height; ++ph) { int64_t hstart = ph * stride_h - pads[0]; int64_t hend = hstart + kernel_shape[0] * dilation_h; + hend = std::min(hend, height + pads[1]); for (int64_t pw = 0; pw < pooled_width; ++pw) { int64_t wstart = pw * stride_w - pads[1]; int64_t wend = wstart + kernel_shape[1] * dilation_w; + wend = std::min(wend, width + pads[3]); const int64_t pool_index = ph * pooled_width + pw; y_d[pool_index] = 0; int total_elements = 0; @@ -532,12 +535,15 @@ struct AveragePool3DTask { for (int64_t ph = 0; ph < pooled_height; ++ph) { int64_t hstart = ph * stride_h - pads[0]; int64_t hend = hstart + kernel_shape[0] * dilation_h; + hend = std::min(hend, height + pads[1]); for (int64_t pw = 0; pw < pooled_width; ++pw) { int64_t wstart = pw * stride_w - pads[1]; int64_t wend = wstart + kernel_shape[1] * dilation_w; + wend = std::min(wend, width + pads[3]); for (int64_t pd = 0; pd < pooled_depth; ++pd) { int64_t dstart = pd * stride_d - pads[2]; int64_t dend = dstart + kernel_shape[2] * dilation_d; + dend = std::min(dend, depth + pads[5]); const int64_t pool_index = ph * pooled_width * pooled_depth + pw * pooled_depth + pd; y_d[pool_index] = 0; int total_elements = 0; diff --git a/onnxruntime/test/onnx/TestCase.cc b/onnxruntime/test/onnx/TestCase.cc index b9b69fdc74..d44f098db6 100644 --- a/onnxruntime/test/onnx/TestCase.cc +++ b/onnxruntime/test/onnx/TestCase.cc @@ -961,6 +961,7 @@ std::unique_ptr> GetBrokenTests(const std::string& provider {"reduce_prod_empty_set", "unknown version", {}}, {"reduce_sum_empty_set", "unknown version", {}}, {"reduce_sum_square_empty_set_expanded", "unknown version", {}}, + {"averagepool_3d_dilations_large_count_include_pad_is_1_ceil_mode_is_True", "TODO(titaiwang): enable this in the next ONNX release."}, #ifdef ENABLE_TRAINING_CORE {"adagrad", "not a registered function/op", {}}, // Op not registered. {"adagrad_multiple", "not a registered function/op", {}}, // Op not registered. diff --git a/onnxruntime/test/providers/cpu/nn/pool_op_test.cc b/onnxruntime/test/providers/cpu/nn/pool_op_test.cc index a340f975ec..24a8c8491b 100644 --- a/onnxruntime/test/providers/cpu/nn/pool_op_test.cc +++ b/onnxruntime/test/providers/cpu/nn/pool_op_test.cc @@ -1030,6 +1030,31 @@ TEST(PoolTest, AveragePool_19_dilation_2d) { kTensorrtExecutionProvider, kAclExecutionProvider, kOpenVINOExecutionProvider}); } +TEST(PoolTest, AveragePool_19_ceil_count_include_pad_1d) { + // TODO: Unskip when fixed #41968513 + if (DefaultDmlExecutionProvider().get() != nullptr) { + GTEST_SKIP() << "Skipping because of the following error: MLOperatorAuthorImpl.cpp(2100): The parameter is incorrect."; + } + + OpTester test("AveragePool", 19); + + test.AddAttribute("auto_pad", ""); + test.AddAttribute("strides", std::vector{3}); + test.AddAttribute("pads", vector{3, 3}); + test.AddAttribute("kernel_shape", vector{7}); + test.AddAttribute("ceil_mode", (int64_t)1); + test.AddAttribute("count_include_pad", (int64_t)1); + + std::vector x_vals = {2.0903f, 4.6493f, 1.6320f, -3.2051f, 4.6975f, 4.7296f, 3.3653f, -1.5815f, -2.3832f, 0.9628f, -1.5899f, -2.6820f, 5.7529f, 7.7346f, -0.8910f, -2.0151f, 0.1313f, -0.5374f}; + std::vector x_dims = {1, 2, 9}; + std::vector expected_dims = {1, 2, 4}; + std::vector expected_vals = {0.73807144f, 2.5655572f, 0.8032287f, -0.09990001f, 0.34911433f, 1.0389f, 1.4536142f, -0.40353334f}; + + test.AddInput("X", x_dims, x_vals); + test.AddOutput("Y", expected_dims, expected_vals); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kAclExecutionProvider, kOpenVINOExecutionProvider}); +} + TEST(PoolTest, GlobalAveragePool) { OpTester test("GlobalAveragePool");