diff --git a/onnxruntime/core/providers/cpu/nn/pool_attributes.h b/onnxruntime/core/providers/cpu/nn/pool_attributes.h index 118cb4a3ba..fbbd427375 100644 --- a/onnxruntime/core/providers/cpu/nn/pool_attributes.h +++ b/onnxruntime/core/providers/cpu/nn/pool_attributes.h @@ -150,14 +150,14 @@ struct PoolAttributes { case AutoPadType::VALID: *pad_head = 0; *pad_tail = 0; - *out_size = ComputeOutputSize(in_size, stride, kernel, 0, dilation); + *out_size = ComputeOutputSize(in_size, stride, kernel, 0, 0, dilation); break; case AutoPadType::SAME_LOWER: { int64_t legacy_target_size = (in_size + stride - 1) / stride; int64_t pad_needed = (legacy_target_size - 1) * stride + kernel - in_size; *pad_head = (pad_needed + 1) / 2; *pad_tail = pad_needed - *pad_head; - *out_size = ComputeOutputSize(in_size, stride, kernel, pad_needed, dilation); + *out_size = ComputeOutputSize(in_size, stride, kernel, *pad_head, *pad_tail, dilation); break; } case AutoPadType::SAME_UPPER: { @@ -165,7 +165,7 @@ struct PoolAttributes { int64_t pad_needed = (legacy_target_size - 1) * stride + kernel - in_size; *pad_head = pad_needed / 2; *pad_tail = pad_needed - *pad_head; - *out_size = ComputeOutputSize(in_size, stride, kernel, pad_needed, dilation); + *out_size = ComputeOutputSize(in_size, stride, kernel, *pad_head, *pad_tail, dilation); break; } default: { @@ -173,7 +173,7 @@ struct PoolAttributes { } } } else { - *out_size = ComputeOutputSize(in_size, stride, kernel, *pad_head + *pad_tail, dilation); + *out_size = ComputeOutputSize(in_size, stride, kernel, *pad_head, *pad_tail, dilation); } } #if defined(_MSC_VER) && !defined(__clang__) @@ -184,13 +184,21 @@ struct PoolAttributes { int64_t ComputeOutputSize(int64_t in_size, int64_t stride, int64_t kernel, - int64_t pad_needed, + int64_t pad_head, + int64_t pad_tail, int64_t dilation) const { - if (ceil_mode == 0) { - return static_cast(static_cast(in_size + pad_needed - dilation * (kernel - 1) - 1) / stride + 1); + int64_t numerator = in_size + pad_head + pad_tail - dilation * (kernel - 1) - 1; + int64_t out_size = numerator / stride + 1; + + if (ceil_mode == 1) { + out_size = static_cast(std::ceil(static_cast(numerator) / stride)) + 1; + // Ensure that the last pooling starts inside the image (at least 1 pixel) + // Reference: https://github.com/onnx/onnx/pull/5741 + if ((out_size - 1) * stride >= in_size + pad_head) { + --out_size; + } } - return static_cast( - std::ceil(static_cast(in_size + pad_needed - dilation * (kernel - 1) - 1) / stride + 1)); + return out_size; } #if defined(_MSC_VER) && !defined(__clang__) #pragma warning(pop) diff --git a/onnxruntime/core/providers/cpu/nn/pool_functors.h b/onnxruntime/core/providers/cpu/nn/pool_functors.h index d3205278b7..476a9a0338 100644 --- a/onnxruntime/core/providers/cpu/nn/pool_functors.h +++ b/onnxruntime/core/providers/cpu/nn/pool_functors.h @@ -406,6 +406,7 @@ struct AveragePool1DTask final { for (int64_t ph = 0; ph < pooled_height; ++ph) { int64_t hstart = ph * stride_h - pads[0]; int64_t hend = hstart + kernel_shape[0] * dilation_h; + hend = std::min(hend, height + pads[1]); y_d[ph] = 0; int total_elements = 0; for (int64_t h = hstart; h < hend; h += dilation_h) { @@ -461,9 +462,11 @@ struct AveragePool2DTask final { for (int64_t ph = 0; ph < pooled_height; ++ph) { int64_t hstart = ph * stride_h - pads[0]; int64_t hend = hstart + kernel_shape[0] * dilation_h; + hend = std::min(hend, height + pads[1]); for (int64_t pw = 0; pw < pooled_width; ++pw) { int64_t wstart = pw * stride_w - pads[1]; int64_t wend = wstart + kernel_shape[1] * dilation_w; + wend = std::min(wend, width + pads[3]); const int64_t pool_index = ph * pooled_width + pw; y_d[pool_index] = 0; int total_elements = 0; @@ -532,12 +535,15 @@ struct AveragePool3DTask { for (int64_t ph = 0; ph < pooled_height; ++ph) { int64_t hstart = ph * stride_h - pads[0]; int64_t hend = hstart + kernel_shape[0] * dilation_h; + hend = std::min(hend, height + pads[1]); for (int64_t pw = 0; pw < pooled_width; ++pw) { int64_t wstart = pw * stride_w - pads[1]; int64_t wend = wstart + kernel_shape[1] * dilation_w; + wend = std::min(wend, width + pads[3]); for (int64_t pd = 0; pd < pooled_depth; ++pd) { int64_t dstart = pd * stride_d - pads[2]; int64_t dend = dstart + kernel_shape[2] * dilation_d; + dend = std::min(dend, depth + pads[5]); const int64_t pool_index = ph * pooled_width * pooled_depth + pw * pooled_depth + pd; y_d[pool_index] = 0; int total_elements = 0; diff --git a/onnxruntime/test/onnx/TestCase.cc b/onnxruntime/test/onnx/TestCase.cc index b9b69fdc74..d44f098db6 100644 --- a/onnxruntime/test/onnx/TestCase.cc +++ b/onnxruntime/test/onnx/TestCase.cc @@ -961,6 +961,7 @@ std::unique_ptr> GetBrokenTests(const std::string& provider {"reduce_prod_empty_set", "unknown version", {}}, {"reduce_sum_empty_set", "unknown version", {}}, {"reduce_sum_square_empty_set_expanded", "unknown version", {}}, + {"averagepool_3d_dilations_large_count_include_pad_is_1_ceil_mode_is_True", "TODO(titaiwang): enable this in the next ONNX release."}, #ifdef ENABLE_TRAINING_CORE {"adagrad", "not a registered function/op", {}}, // Op not registered. {"adagrad_multiple", "not a registered function/op", {}}, // Op not registered. diff --git a/onnxruntime/test/providers/cpu/nn/pool_op_test.cc b/onnxruntime/test/providers/cpu/nn/pool_op_test.cc index a340f975ec..24a8c8491b 100644 --- a/onnxruntime/test/providers/cpu/nn/pool_op_test.cc +++ b/onnxruntime/test/providers/cpu/nn/pool_op_test.cc @@ -1030,6 +1030,31 @@ TEST(PoolTest, AveragePool_19_dilation_2d) { kTensorrtExecutionProvider, kAclExecutionProvider, kOpenVINOExecutionProvider}); } +TEST(PoolTest, AveragePool_19_ceil_count_include_pad_1d) { + // TODO: Unskip when fixed #41968513 + if (DefaultDmlExecutionProvider().get() != nullptr) { + GTEST_SKIP() << "Skipping because of the following error: MLOperatorAuthorImpl.cpp(2100): The parameter is incorrect."; + } + + OpTester test("AveragePool", 19); + + test.AddAttribute("auto_pad", ""); + test.AddAttribute("strides", std::vector{3}); + test.AddAttribute("pads", vector{3, 3}); + test.AddAttribute("kernel_shape", vector{7}); + test.AddAttribute("ceil_mode", (int64_t)1); + test.AddAttribute("count_include_pad", (int64_t)1); + + std::vector x_vals = {2.0903f, 4.6493f, 1.6320f, -3.2051f, 4.6975f, 4.7296f, 3.3653f, -1.5815f, -2.3832f, 0.9628f, -1.5899f, -2.6820f, 5.7529f, 7.7346f, -0.8910f, -2.0151f, 0.1313f, -0.5374f}; + std::vector x_dims = {1, 2, 9}; + std::vector expected_dims = {1, 2, 4}; + std::vector expected_vals = {0.73807144f, 2.5655572f, 0.8032287f, -0.09990001f, 0.34911433f, 1.0389f, 1.4536142f, -0.40353334f}; + + test.AddInput("X", x_dims, x_vals); + test.AddOutput("Y", expected_dims, expected_vals); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kAclExecutionProvider, kOpenVINOExecutionProvider}); +} + TEST(PoolTest, GlobalAveragePool) { OpTester test("GlobalAveragePool");