mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-07-02 03:55:34 +00:00
Fix MaxPool when using dilation > 1 plus non-zero padding (#1320)
MaxPool with dilation > 1 and padding did not compute the correct start index. Added code to fix and test cases to cover this.
This commit is contained in:
parent
fbdd905440
commit
f47f6fd020
3 changed files with 147 additions and 61 deletions
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#include "core/framework/op_kernel_context_internal.h"
|
||||
#include "core/providers/cpu/nn/pool.h"
|
||||
#include <cmath>
|
||||
|
||||
using namespace ::onnxruntime::common;
|
||||
|
||||
namespace onnxruntime {
|
||||
|
|
@ -25,7 +25,7 @@ Status Pool<T, PoolType>::Compute(OpKernelContext* context) const {
|
|||
}
|
||||
|
||||
std::vector<int64_t> output_dims = PoolBase::SetOutputSize(x_shape, x_shape[1], &pads, dilations_, ceil_mode_);
|
||||
Tensor* Y = context->Output(0, TensorShape(output_dims));
|
||||
Tensor* Y = context->Output(0, output_dims);
|
||||
|
||||
const auto* X_data = X->template Data<float>();
|
||||
auto* Y_data = Y->template MutableData<float>();
|
||||
|
|
@ -185,7 +185,7 @@ Status PoolBase::Compute(OpKernelContext* context, MLAS_POOLING_KIND kind) const
|
|||
|
||||
std::vector<int64_t> pads = pads_;
|
||||
std::vector<int64_t> output_dims = PoolBase::SetOutputSize(x_shape, x_shape[1], &pads, dilations_, ceil_mode_);
|
||||
Tensor* Y = context->Output(0, TensorShape(output_dims));
|
||||
Tensor* Y = context->Output(0, output_dims);
|
||||
|
||||
// Get access to the internal threadpool
|
||||
// Temporarily derive concurrency parameters without access to session state
|
||||
|
|
@ -222,8 +222,9 @@ Status Pool<float, MaxPool<8 /*VERSION*/>>::Compute(OpKernelContext* context) co
|
|||
// and also if dilation is not required
|
||||
|
||||
bool need_dilation = false;
|
||||
for (auto n : dilations_)
|
||||
for (auto n : dilations_) {
|
||||
need_dilation |= n > 1;
|
||||
}
|
||||
|
||||
if (OpKernel::Node().OutputDefs().size() == 1 && !need_dilation) {
|
||||
return PoolBase::Compute(context, MlasMaximumPooling);
|
||||
|
|
@ -238,8 +239,8 @@ Status Pool<float, MaxPool<8 /*VERSION*/>>::Compute(OpKernelContext* context) co
|
|||
std::vector<int64_t> kernel_shape = kernel_shape_;
|
||||
|
||||
std::vector<int64_t> output_dims = PoolBase::SetOutputSize(x_shape, x_shape[1], &pads, dilations_, ceil_mode_);
|
||||
Tensor* Y = context->Output(0, TensorShape(output_dims));
|
||||
Tensor* I = context->Output(1, TensorShape(output_dims));
|
||||
Tensor* Y = context->Output(0, output_dims);
|
||||
Tensor* I = context->Output(1, output_dims);
|
||||
|
||||
const auto* X_data = X->template Data<float>();
|
||||
auto* Y_data = Y->template MutableData<float>();
|
||||
|
|
@ -270,14 +271,15 @@ Status Pool<float, MaxPool<8 /*VERSION*/>>::Compute(OpKernelContext* context) co
|
|||
int64_t* i_d = I_data ? I_data + c * y_step : nullptr;
|
||||
for (int64_t ph = 0; ph < pooled_height; ++ph) {
|
||||
int64_t hstart = ph * stride_h() - pads[0];
|
||||
int64_t hend = std::min(hstart + kernel_shape[0] * dilation_h - dilation_h + 1, height);
|
||||
hstart = std::max(hstart, static_cast<int64_t>(0));
|
||||
int64_t hend = hstart + kernel_shape[0] * dilation_h;
|
||||
float Yh = std::numeric_limits<float>::lowest();
|
||||
int64_t h_index = -1;
|
||||
for (int64_t h = hstart; h < hend; h += dilation_h) {
|
||||
if (x_d[h] > Yh) {
|
||||
Yh = x_d[h];
|
||||
h_index = h;
|
||||
if (math::is_a_ge_zero_and_a_lt_b(h, height)) {
|
||||
if (x_d[h] > Yh) {
|
||||
Yh = x_d[h];
|
||||
h_index = h;
|
||||
}
|
||||
}
|
||||
}
|
||||
y_d[ph] = Yh;
|
||||
|
|
@ -305,23 +307,25 @@ Status Pool<float, MaxPool<8 /*VERSION*/>>::Compute(OpKernelContext* context) co
|
|||
|
||||
for (int64_t ph = 0; ph < pooled_height; ++ph) {
|
||||
int64_t hstart = ph * stride_h() - pads[0];
|
||||
int64_t hend = std::min(hstart + kernel_shape[0] * dilation_h - dilation_h + 1, height);
|
||||
hstart = std::max(hstart, static_cast<int64_t>(0));
|
||||
int64_t hend = hstart + kernel_shape[0] * dilation_h;
|
||||
for (int64_t pw = 0; pw < pooled_width; ++pw) {
|
||||
int64_t wstart = pw * stride_w() - pads[1];
|
||||
int64_t wend = std::min(wstart + kernel_shape[1] * dilation_w - dilation_w + 1, width);
|
||||
wstart = std::max(wstart, static_cast<int64_t>(0));
|
||||
int64_t wend = wstart + kernel_shape[1] * dilation_w;
|
||||
const int64_t pool_index = ph * pooled_width + pw;
|
||||
float Yh = std::numeric_limits<float>::lowest();
|
||||
int64_t h_index = -1;
|
||||
int64_t w_index = -1;
|
||||
for (int64_t h = hstart; h < hend; h += dilation_h) {
|
||||
for (int64_t w = wstart; w < wend; w += dilation_w) {
|
||||
const int64_t input_index = h * width + w;
|
||||
if (x_d[input_index] > Yh) {
|
||||
Yh = x_d[input_index];
|
||||
h_index = h;
|
||||
w_index = w;
|
||||
if (math::is_a_ge_zero_and_a_lt_b(h, height)) {
|
||||
for (int64_t w = wstart; w < wend; w += dilation_w) {
|
||||
if (math::is_a_ge_zero_and_a_lt_b(w, width)) {
|
||||
const int64_t input_index = h * width + w;
|
||||
if (x_d[input_index] > Yh) {
|
||||
Yh = x_d[input_index];
|
||||
h_index = h;
|
||||
w_index = w;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -353,16 +357,13 @@ Status Pool<float, MaxPool<8 /*VERSION*/>>::Compute(OpKernelContext* context) co
|
|||
|
||||
for (int64_t ph = 0; ph < pooled_height; ++ph) {
|
||||
int64_t hstart = ph * stride_h() - pads[0];
|
||||
int64_t hend = std::min(hstart + kernel_shape[0] * dilation_h - dilation_h + 1, height);
|
||||
hstart = std::max(hstart, static_cast<int64_t>(0));
|
||||
int64_t hend = hstart + kernel_shape[0] * dilation_h;
|
||||
for (int64_t pw = 0; pw < pooled_width; ++pw) {
|
||||
int64_t wstart = pw * stride_w() - pads[1];
|
||||
int64_t wend = std::min(wstart + kernel_shape[1] * dilation_w - dilation_w + 1, width);
|
||||
wstart = std::max(wstart, static_cast<int64_t>(0));
|
||||
int64_t wend = wstart + kernel_shape[1] * dilation_w;
|
||||
for (int64_t pd = 0; pd < pooled_depth; ++pd) {
|
||||
int64_t dstart = pd * stride_d() - pads[2];
|
||||
int64_t dend = std::min(dstart + kernel_shape[2] * dilation_d - dilation_d + 1, depth);
|
||||
dstart = std::max(dstart, static_cast<int64_t>(0));
|
||||
int64_t dend = dstart + kernel_shape[2] * dilation_d;
|
||||
const int64_t pool_index =
|
||||
ph * pooled_width * pooled_depth + pw * pooled_depth + pd;
|
||||
float Yh = std::numeric_limits<float>::lowest();
|
||||
|
|
@ -370,14 +371,20 @@ Status Pool<float, MaxPool<8 /*VERSION*/>>::Compute(OpKernelContext* context) co
|
|||
int64_t w_index = -1;
|
||||
int64_t d_index = -1;
|
||||
for (int64_t h = hstart; h < hend; h += dilation_h) {
|
||||
for (int64_t w = wstart; w < wend; w += dilation_w) {
|
||||
for (int64_t d = dstart; d < dend; d += dilation_d) {
|
||||
const int64_t input_index = h * width * depth + w * depth + d;
|
||||
if (x_d[input_index] > Yh) {
|
||||
Yh = x_d[input_index];
|
||||
h_index = h;
|
||||
w_index = w;
|
||||
d_index = d;
|
||||
if (math::is_a_ge_zero_and_a_lt_b(h, height)) {
|
||||
for (int64_t w = wstart; w < wend; w += dilation_w) {
|
||||
if (math::is_a_ge_zero_and_a_lt_b(w, width)) {
|
||||
for (int64_t d = dstart; d < dend; d += dilation_d) {
|
||||
if (math::is_a_ge_zero_and_a_lt_b(d, depth)) {
|
||||
const int64_t input_index = h * width * depth + w * depth + d;
|
||||
if (x_d[input_index] > Yh) {
|
||||
Yh = x_d[input_index];
|
||||
h_index = h;
|
||||
w_index = w;
|
||||
d_index = d;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
#include "core/common/common.h"
|
||||
#include "core/framework/op_kernel.h"
|
||||
#include "core/providers/cpu/nn/autopad_type.h"
|
||||
#include "core/util/math.h"
|
||||
#include "core/mlas/inc/mlas.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ TEST(PoolTest, MaxPool) {
|
|||
|
||||
test.AddInput<float>("X", x_dims, x_vals);
|
||||
test.AddOutput<float>("Y", expected_dims, expected_vals);
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: result differs
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: result differs
|
||||
}
|
||||
|
||||
// Only CUDA kernel has float 16 support
|
||||
|
|
@ -104,11 +104,11 @@ TEST(PoolTest, MaxPool_F16) {
|
|||
|
||||
test.AddInput<MLFloat16>("X", x_dims, f_X);
|
||||
test.AddOutput<MLFloat16>("Y", expected_dims, f_Y);
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: Assertion `!attrs.count("pads")' failed
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: Assertion `!attrs.count("pads")' failed
|
||||
}
|
||||
#endif
|
||||
|
||||
static void MaxPool_8_WithIndexTest(bool has_index, int64_t storage_order=0) {
|
||||
static void MaxPool_8_WithIndexTest(bool has_index, int64_t storage_order = 0) {
|
||||
OpTester test("MaxPool", 8);
|
||||
|
||||
test.AddAttribute("auto_pad", "");
|
||||
|
|
@ -160,7 +160,7 @@ static void MaxPool_8_WithIndexTest(bool has_index, int64_t storage_order=0) {
|
|||
}
|
||||
|
||||
TEST(PoolTest, MaxPool_8_With_Index) {
|
||||
MaxPool_8_WithIndexTest(false); // row major
|
||||
MaxPool_8_WithIndexTest(false); // row major
|
||||
MaxPool_8_WithIndexTest(true, 0 /*storage_order*/); // row major
|
||||
MaxPool_8_WithIndexTest(true, 1 /*storage_order*/); // col major
|
||||
}
|
||||
|
|
@ -229,6 +229,26 @@ TEST(PoolTest, MaxPool_10_Dilation_1d) {
|
|||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
|
||||
}
|
||||
|
||||
TEST(PoolTest, MaxPool_10_DilationPadding_1d) {
|
||||
OpTester test("MaxPool", 10);
|
||||
|
||||
test.AddAttribute("auto_pad", "");
|
||||
test.AddAttribute("strides", std::vector<int64_t>{1});
|
||||
test.AddAttribute("pads", vector<int64_t>{1, 1});
|
||||
test.AddAttribute("kernel_shape", vector<int64_t>{3});
|
||||
test.AddAttribute("dilations", vector<int64_t>{3});
|
||||
|
||||
std::vector<float> x_vals = {
|
||||
1, 3, 2, 4, -1, -3, -2, -4, -6, -5, -4, -2};
|
||||
std::vector<int64_t> x_dims = {1, 1, 12};
|
||||
std::vector<int64_t> expected_dims = {1, 1, 8};
|
||||
std::vector<float> expected_vals = {2, 4, 3, 2, 4, -1, -2, -2};
|
||||
|
||||
test.AddInput<float>("X", x_dims, x_vals);
|
||||
test.AddOutput<float>("Y", expected_dims, expected_vals);
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kTensorrtExecutionProvider});
|
||||
}
|
||||
|
||||
TEST(PoolTest, MaxPool_10_Dilation_2d) {
|
||||
OpTester test("MaxPool", 10);
|
||||
|
||||
|
|
@ -239,11 +259,10 @@ TEST(PoolTest, MaxPool_10_Dilation_2d) {
|
|||
test.AddAttribute("dilations", vector<int64_t>{2, 2});
|
||||
|
||||
std::vector<float> x_vals = {
|
||||
1, 3, 2, 4, -1,
|
||||
5, 7, 6, 8, -2,
|
||||
9, 11, 10, 12, -3,
|
||||
13, 15, 14, 16, -4,
|
||||
};
|
||||
1, 3, 2, 4, -1,
|
||||
5, 7, 6, 8, -2,
|
||||
9, 11, 10, 12, -3,
|
||||
13, 15, 14, 16, -4};
|
||||
std::vector<int64_t> x_dims = {1, 1, 4, 5};
|
||||
std::vector<int64_t> expected_dims = {1, 1, 2, 3};
|
||||
std::vector<float> expected_vals = {10, 12, 10, 14, 16, 14};
|
||||
|
|
@ -253,6 +272,33 @@ TEST(PoolTest, MaxPool_10_Dilation_2d) {
|
|||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
|
||||
}
|
||||
|
||||
TEST(PoolTest, MaxPool_10_DilationPadding_2d) {
|
||||
OpTester test("MaxPool", 10);
|
||||
|
||||
test.AddAttribute("auto_pad", "");
|
||||
test.AddAttribute("strides", std::vector<int64_t>{1, 1});
|
||||
test.AddAttribute("pads", vector<int64_t>{1, 1, 1, 1});
|
||||
test.AddAttribute("kernel_shape", vector<int64_t>{2, 2});
|
||||
test.AddAttribute("dilations", vector<int64_t>{2, 2});
|
||||
|
||||
std::vector<float> x_vals = {
|
||||
1, 3, 2, 4, -1,
|
||||
5, 7, 6, 8, -2,
|
||||
9, 11, 10, 12, -3,
|
||||
13, 15, 14, 16, -4};
|
||||
std::vector<int64_t> x_dims = {1, 1, 4, 5};
|
||||
std::vector<int64_t> expected_dims = {1, 1, 4, 5};
|
||||
std::vector<float> expected_vals = {
|
||||
7, 6, 8, 6, 8,
|
||||
11, 10, 12, 10, 12,
|
||||
15, 14, 16, 14, 16,
|
||||
11, 10, 12, 10, 12};
|
||||
|
||||
test.AddInput<float>("X", x_dims, x_vals);
|
||||
test.AddOutput<float>("Y", expected_dims, expected_vals);
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kTensorrtExecutionProvider});
|
||||
}
|
||||
|
||||
TEST(PoolTest, MaxPool_10_Dilation_Ceil0_2d) {
|
||||
OpTester test("MaxPool", 10);
|
||||
|
||||
|
|
@ -263,11 +309,10 @@ TEST(PoolTest, MaxPool_10_Dilation_Ceil0_2d) {
|
|||
test.AddAttribute("dilations", vector<int64_t>{2, 2});
|
||||
|
||||
std::vector<float> x_vals = {
|
||||
1, 3, 2, 4, -1,
|
||||
5, 7, 6, 8, -2,
|
||||
9, 11, 10, 12, -3,
|
||||
13, 15, 14, 16, -4,
|
||||
};
|
||||
1, 3, 2, 4, -1,
|
||||
5, 7, 6, 8, -2,
|
||||
9, 11, 10, 12, -3,
|
||||
13, 15, 14, 16, -4};
|
||||
std::vector<int64_t> x_dims = {1, 1, 4, 5};
|
||||
std::vector<int64_t> expected_dims = {1, 1, 1, 3};
|
||||
std::vector<float> expected_vals = {10, 12, 10};
|
||||
|
|
@ -288,11 +333,10 @@ TEST(PoolTest, MaxPool_10_Dilation_Ceil1_2d) {
|
|||
test.AddAttribute("ceil_mode", (int64_t)1);
|
||||
|
||||
std::vector<float> x_vals = {
|
||||
1, 3, 2, 4, -1,
|
||||
5, 7, 6, 8, -2,
|
||||
9, 11, 10, 12, -3,
|
||||
13, 15, 14, 16, -4,
|
||||
};
|
||||
1, 3, 2, 4, -1,
|
||||
5, 7, 6, 8, -2,
|
||||
9, 11, 10, 12, -3,
|
||||
13, 15, 14, 16, -4};
|
||||
std::vector<int64_t> x_dims = {1, 1, 4, 5};
|
||||
std::vector<int64_t> expected_dims = {1, 1, 2, 3};
|
||||
std::vector<float> expected_vals = {10, 12, 10, 10, 12, 10};
|
||||
|
|
@ -302,6 +346,41 @@ TEST(PoolTest, MaxPool_10_Dilation_Ceil1_2d) {
|
|||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
|
||||
}
|
||||
|
||||
TEST(PoolTest, MaxPool_10_DilationPadding_3d) {
|
||||
OpTester test("MaxPool", 10);
|
||||
|
||||
test.AddAttribute("auto_pad", "");
|
||||
test.AddAttribute("strides", std::vector<int64_t>{1, 1, 1});
|
||||
test.AddAttribute("pads", vector<int64_t>{1, 1, 1, 1, 1, 1});
|
||||
test.AddAttribute("kernel_shape", vector<int64_t>{2, 2, 2});
|
||||
test.AddAttribute("dilations", vector<int64_t>{2, 2, 2});
|
||||
|
||||
std::vector<float> x_vals = {
|
||||
1, 3, 2, 4, -1,
|
||||
5, 7, 6, 8, -2,
|
||||
9, 11, 10, 12, -3,
|
||||
13, 15, 14, 16, -4,
|
||||
1, 3, 2, 4, -1,
|
||||
5, 7, 6, 8, -2,
|
||||
9, 11, 10, 12, -3,
|
||||
13, 15, 14, 16, -4};
|
||||
std::vector<int64_t> x_dims = {1, 1, 2, 4, 5};
|
||||
std::vector<int64_t> expected_dims = {1, 1, 2, 4, 5};
|
||||
std::vector<float> expected_vals = {
|
||||
7, 6, 8, 6, 8,
|
||||
11, 10, 12, 10, 12,
|
||||
15, 14, 16, 14, 16,
|
||||
11, 10, 12, 10, 12,
|
||||
7, 6, 8, 6, 8,
|
||||
11, 10, 12, 10, 12,
|
||||
15, 14, 16, 14, 16,
|
||||
11, 10, 12, 10, 12};
|
||||
|
||||
test.AddInput<float>("X", x_dims, x_vals);
|
||||
test.AddOutput<float>("Y", expected_dims, expected_vals);
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kTensorrtExecutionProvider});
|
||||
}
|
||||
|
||||
TEST(PoolTest, GlobalMaxPool) {
|
||||
OpTester test("GlobalMaxPool");
|
||||
|
||||
|
|
@ -566,17 +645,16 @@ TEST(PoolTest, AveragePool_10_ceil1_2d) {
|
|||
test.AddAttribute("strides", std::vector<int64_t>{3, 1});
|
||||
test.AddAttribute("pads", vector<int64_t>{0, 0, 0, 0});
|
||||
test.AddAttribute("kernel_shape", vector<int64_t>{2, 2});
|
||||
test.AddAttribute("ceil_mode", (int64_t) 1);
|
||||
test.AddAttribute("ceil_mode", (int64_t)1);
|
||||
|
||||
std::vector<float> x_vals = {
|
||||
1, 3, 2, 4,
|
||||
5, 7, 6, 8,
|
||||
9, 11, 10, 12,
|
||||
13, 15, 14, 16,
|
||||
};
|
||||
1, 3, 2, 4,
|
||||
5, 7, 6, 8,
|
||||
9, 11, 10, 12,
|
||||
13, 15, 14, 16};
|
||||
std::vector<int64_t> x_dims = {1, 1, 4, 4};
|
||||
std::vector<int64_t> expected_dims = {1, 1, 2, 3};
|
||||
std::vector<float> expected_vals = {4.0f, 4.5f, 5.0f , 14.0f, 14.5f, 15.0f};
|
||||
std::vector<float> expected_vals = {4.0f, 4.5f, 5.0f, 14.0f, 14.5f, 15.0f};
|
||||
|
||||
test.AddInput<float>("X", x_dims, x_vals);
|
||||
test.AddOutput<float>("Y", expected_dims, expected_vals);
|
||||
|
|
|
|||
Loading…
Reference in a new issue