Align AvgPool ceil_mode on last value to torch (#16752)

Fix #16203

Previous to this PR, if `ceil_mode` is on, the calculation of a value
would divide the kernel size, even if remaining pixels is less than the
kernel size, which causes the difference in this operator between ORT
and torch.

However, this fix only applies to the change in #15597, which only
supports AvgPool since 19. The older opset version is remain the same,
as it's using mlas files.

Also, the PR fixes the shape mismatch caused by sliding window starting
from padding. More detail: https://github.com/onnx/onnx/pull/6650 (And
this PR is also validated with the tests added in
https://github.com/onnx/onnx/pull/6650)
This commit is contained in:
Ti-Tai Wang 2025-01-23 17:35:11 -08:00 committed by GitHub
parent 06fc73b7d4
commit 8b1d3b3d57
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 49 additions and 9 deletions

View file

@ -150,14 +150,14 @@ struct PoolAttributes {
case AutoPadType::VALID:
*pad_head = 0;
*pad_tail = 0;
*out_size = ComputeOutputSize(in_size, stride, kernel, 0, dilation);
*out_size = ComputeOutputSize(in_size, stride, kernel, 0, 0, dilation);
break;
case AutoPadType::SAME_LOWER: {
int64_t legacy_target_size = (in_size + stride - 1) / stride;
int64_t pad_needed = (legacy_target_size - 1) * stride + kernel - in_size;
*pad_head = (pad_needed + 1) / 2;
*pad_tail = pad_needed - *pad_head;
*out_size = ComputeOutputSize(in_size, stride, kernel, pad_needed, dilation);
*out_size = ComputeOutputSize(in_size, stride, kernel, *pad_head, *pad_tail, dilation);
break;
}
case AutoPadType::SAME_UPPER: {
@ -165,7 +165,7 @@ struct PoolAttributes {
int64_t pad_needed = (legacy_target_size - 1) * stride + kernel - in_size;
*pad_head = pad_needed / 2;
*pad_tail = pad_needed - *pad_head;
*out_size = ComputeOutputSize(in_size, stride, kernel, pad_needed, dilation);
*out_size = ComputeOutputSize(in_size, stride, kernel, *pad_head, *pad_tail, dilation);
break;
}
default: {
@ -173,7 +173,7 @@ struct PoolAttributes {
}
}
} else {
*out_size = ComputeOutputSize(in_size, stride, kernel, *pad_head + *pad_tail, dilation);
*out_size = ComputeOutputSize(in_size, stride, kernel, *pad_head, *pad_tail, dilation);
}
}
#if defined(_MSC_VER) && !defined(__clang__)
@ -184,13 +184,21 @@ struct PoolAttributes {
int64_t ComputeOutputSize(int64_t in_size,
int64_t stride,
int64_t kernel,
int64_t pad_needed,
int64_t pad_head,
int64_t pad_tail,
int64_t dilation) const {
if (ceil_mode == 0) {
return static_cast<int64_t>(static_cast<float>(in_size + pad_needed - dilation * (kernel - 1) - 1) / stride + 1);
int64_t numerator = in_size + pad_head + pad_tail - dilation * (kernel - 1) - 1;
int64_t out_size = numerator / stride + 1;
if (ceil_mode == 1) {
out_size = static_cast<int64_t>(std::ceil(static_cast<float>(numerator) / stride)) + 1;
// Ensure that the last pooling starts inside the image (at least 1 pixel)
// Reference: https://github.com/onnx/onnx/pull/5741
if ((out_size - 1) * stride >= in_size + pad_head) {
--out_size;
}
}
return static_cast<int64_t>(
std::ceil(static_cast<float>(in_size + pad_needed - dilation * (kernel - 1) - 1) / stride + 1));
return out_size;
}
#if defined(_MSC_VER) && !defined(__clang__)
#pragma warning(pop)

View file

@ -406,6 +406,7 @@ struct AveragePool1DTask final {
for (int64_t ph = 0; ph < pooled_height; ++ph) {
int64_t hstart = ph * stride_h - pads[0];
int64_t hend = hstart + kernel_shape[0] * dilation_h;
hend = std::min(hend, height + pads[1]);
y_d[ph] = 0;
int total_elements = 0;
for (int64_t h = hstart; h < hend; h += dilation_h) {
@ -461,9 +462,11 @@ struct AveragePool2DTask final {
for (int64_t ph = 0; ph < pooled_height; ++ph) {
int64_t hstart = ph * stride_h - pads[0];
int64_t hend = hstart + kernel_shape[0] * dilation_h;
hend = std::min(hend, height + pads[1]);
for (int64_t pw = 0; pw < pooled_width; ++pw) {
int64_t wstart = pw * stride_w - pads[1];
int64_t wend = wstart + kernel_shape[1] * dilation_w;
wend = std::min(wend, width + pads[3]);
const int64_t pool_index = ph * pooled_width + pw;
y_d[pool_index] = 0;
int total_elements = 0;
@ -532,12 +535,15 @@ struct AveragePool3DTask {
for (int64_t ph = 0; ph < pooled_height; ++ph) {
int64_t hstart = ph * stride_h - pads[0];
int64_t hend = hstart + kernel_shape[0] * dilation_h;
hend = std::min(hend, height + pads[1]);
for (int64_t pw = 0; pw < pooled_width; ++pw) {
int64_t wstart = pw * stride_w - pads[1];
int64_t wend = wstart + kernel_shape[1] * dilation_w;
wend = std::min(wend, width + pads[3]);
for (int64_t pd = 0; pd < pooled_depth; ++pd) {
int64_t dstart = pd * stride_d - pads[2];
int64_t dend = dstart + kernel_shape[2] * dilation_d;
dend = std::min(dend, depth + pads[5]);
const int64_t pool_index = ph * pooled_width * pooled_depth + pw * pooled_depth + pd;
y_d[pool_index] = 0;
int total_elements = 0;

View file

@ -961,6 +961,7 @@ std::unique_ptr<std::set<BrokenTest>> GetBrokenTests(const std::string& provider
{"reduce_prod_empty_set", "unknown version", {}},
{"reduce_sum_empty_set", "unknown version", {}},
{"reduce_sum_square_empty_set_expanded", "unknown version", {}},
{"averagepool_3d_dilations_large_count_include_pad_is_1_ceil_mode_is_True", "TODO(titaiwang): enable this in the next ONNX release."},
#ifdef ENABLE_TRAINING_CORE
{"adagrad", "not a registered function/op", {}}, // Op not registered.
{"adagrad_multiple", "not a registered function/op", {}}, // Op not registered.

View file

@ -1030,6 +1030,31 @@ TEST(PoolTest, AveragePool_19_dilation_2d) {
kTensorrtExecutionProvider, kAclExecutionProvider, kOpenVINOExecutionProvider});
}
TEST(PoolTest, AveragePool_19_ceil_count_include_pad_1d) {
// TODO: Unskip when fixed #41968513
if (DefaultDmlExecutionProvider().get() != nullptr) {
GTEST_SKIP() << "Skipping because of the following error: MLOperatorAuthorImpl.cpp(2100): The parameter is incorrect.";
}
OpTester test("AveragePool", 19);
test.AddAttribute("auto_pad", "");
test.AddAttribute("strides", std::vector<int64_t>{3});
test.AddAttribute("pads", vector<int64_t>{3, 3});
test.AddAttribute("kernel_shape", vector<int64_t>{7});
test.AddAttribute("ceil_mode", (int64_t)1);
test.AddAttribute("count_include_pad", (int64_t)1);
std::vector<float> x_vals = {2.0903f, 4.6493f, 1.6320f, -3.2051f, 4.6975f, 4.7296f, 3.3653f, -1.5815f, -2.3832f, 0.9628f, -1.5899f, -2.6820f, 5.7529f, 7.7346f, -0.8910f, -2.0151f, 0.1313f, -0.5374f};
std::vector<int64_t> x_dims = {1, 2, 9};
std::vector<int64_t> expected_dims = {1, 2, 4};
std::vector<float> expected_vals = {0.73807144f, 2.5655572f, 0.8032287f, -0.09990001f, 0.34911433f, 1.0389f, 1.4536142f, -0.40353334f};
test.AddInput<float>("X", x_dims, x_vals);
test.AddOutput<float>("Y", expected_dims, expected_vals);
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kAclExecutionProvider, kOpenVINOExecutionProvider});
}
TEST(PoolTest, GlobalAveragePool) {
OpTester test("GlobalAveragePool");