Fix MaxPool when using dilation > 1 plus non-zero padding (#1320)

MaxPool with dilation > 1 and padding did not compute the correct start index. Added code to fix and test cases to cover this.
This commit is contained in:
Tracy Sharpe 2019-07-17 17:33:29 -07:00 committed by GitHub
parent fbdd905440
commit f47f6fd020
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 147 additions and 61 deletions

View file

@ -3,7 +3,7 @@
#include "core/framework/op_kernel_context_internal.h"
#include "core/providers/cpu/nn/pool.h"
#include <cmath>
using namespace ::onnxruntime::common;
namespace onnxruntime {
@ -25,7 +25,7 @@ Status Pool<T, PoolType>::Compute(OpKernelContext* context) const {
}
std::vector<int64_t> output_dims = PoolBase::SetOutputSize(x_shape, x_shape[1], &pads, dilations_, ceil_mode_);
Tensor* Y = context->Output(0, TensorShape(output_dims));
Tensor* Y = context->Output(0, output_dims);
const auto* X_data = X->template Data<float>();
auto* Y_data = Y->template MutableData<float>();
@ -185,7 +185,7 @@ Status PoolBase::Compute(OpKernelContext* context, MLAS_POOLING_KIND kind) const
std::vector<int64_t> pads = pads_;
std::vector<int64_t> output_dims = PoolBase::SetOutputSize(x_shape, x_shape[1], &pads, dilations_, ceil_mode_);
Tensor* Y = context->Output(0, TensorShape(output_dims));
Tensor* Y = context->Output(0, output_dims);
// Get access to the internal threadpool
// Temporarily derive concurrency parameters without access to session state
@ -222,8 +222,9 @@ Status Pool<float, MaxPool<8 /*VERSION*/>>::Compute(OpKernelContext* context) co
// and also if dilation is not required
bool need_dilation = false;
for (auto n : dilations_)
for (auto n : dilations_) {
need_dilation |= n > 1;
}
if (OpKernel::Node().OutputDefs().size() == 1 && !need_dilation) {
return PoolBase::Compute(context, MlasMaximumPooling);
@ -238,8 +239,8 @@ Status Pool<float, MaxPool<8 /*VERSION*/>>::Compute(OpKernelContext* context) co
std::vector<int64_t> kernel_shape = kernel_shape_;
std::vector<int64_t> output_dims = PoolBase::SetOutputSize(x_shape, x_shape[1], &pads, dilations_, ceil_mode_);
Tensor* Y = context->Output(0, TensorShape(output_dims));
Tensor* I = context->Output(1, TensorShape(output_dims));
Tensor* Y = context->Output(0, output_dims);
Tensor* I = context->Output(1, output_dims);
const auto* X_data = X->template Data<float>();
auto* Y_data = Y->template MutableData<float>();
@ -270,14 +271,15 @@ Status Pool<float, MaxPool<8 /*VERSION*/>>::Compute(OpKernelContext* context) co
int64_t* i_d = I_data ? I_data + c * y_step : nullptr;
for (int64_t ph = 0; ph < pooled_height; ++ph) {
int64_t hstart = ph * stride_h() - pads[0];
int64_t hend = std::min(hstart + kernel_shape[0] * dilation_h - dilation_h + 1, height);
hstart = std::max(hstart, static_cast<int64_t>(0));
int64_t hend = hstart + kernel_shape[0] * dilation_h;
float Yh = std::numeric_limits<float>::lowest();
int64_t h_index = -1;
for (int64_t h = hstart; h < hend; h += dilation_h) {
if (x_d[h] > Yh) {
Yh = x_d[h];
h_index = h;
if (math::is_a_ge_zero_and_a_lt_b(h, height)) {
if (x_d[h] > Yh) {
Yh = x_d[h];
h_index = h;
}
}
}
y_d[ph] = Yh;
@ -305,23 +307,25 @@ Status Pool<float, MaxPool<8 /*VERSION*/>>::Compute(OpKernelContext* context) co
for (int64_t ph = 0; ph < pooled_height; ++ph) {
int64_t hstart = ph * stride_h() - pads[0];
int64_t hend = std::min(hstart + kernel_shape[0] * dilation_h - dilation_h + 1, height);
hstart = std::max(hstart, static_cast<int64_t>(0));
int64_t hend = hstart + kernel_shape[0] * dilation_h;
for (int64_t pw = 0; pw < pooled_width; ++pw) {
int64_t wstart = pw * stride_w() - pads[1];
int64_t wend = std::min(wstart + kernel_shape[1] * dilation_w - dilation_w + 1, width);
wstart = std::max(wstart, static_cast<int64_t>(0));
int64_t wend = wstart + kernel_shape[1] * dilation_w;
const int64_t pool_index = ph * pooled_width + pw;
float Yh = std::numeric_limits<float>::lowest();
int64_t h_index = -1;
int64_t w_index = -1;
for (int64_t h = hstart; h < hend; h += dilation_h) {
for (int64_t w = wstart; w < wend; w += dilation_w) {
const int64_t input_index = h * width + w;
if (x_d[input_index] > Yh) {
Yh = x_d[input_index];
h_index = h;
w_index = w;
if (math::is_a_ge_zero_and_a_lt_b(h, height)) {
for (int64_t w = wstart; w < wend; w += dilation_w) {
if (math::is_a_ge_zero_and_a_lt_b(w, width)) {
const int64_t input_index = h * width + w;
if (x_d[input_index] > Yh) {
Yh = x_d[input_index];
h_index = h;
w_index = w;
}
}
}
}
}
@ -353,16 +357,13 @@ Status Pool<float, MaxPool<8 /*VERSION*/>>::Compute(OpKernelContext* context) co
for (int64_t ph = 0; ph < pooled_height; ++ph) {
int64_t hstart = ph * stride_h() - pads[0];
int64_t hend = std::min(hstart + kernel_shape[0] * dilation_h - dilation_h + 1, height);
hstart = std::max(hstart, static_cast<int64_t>(0));
int64_t hend = hstart + kernel_shape[0] * dilation_h;
for (int64_t pw = 0; pw < pooled_width; ++pw) {
int64_t wstart = pw * stride_w() - pads[1];
int64_t wend = std::min(wstart + kernel_shape[1] * dilation_w - dilation_w + 1, width);
wstart = std::max(wstart, static_cast<int64_t>(0));
int64_t wend = wstart + kernel_shape[1] * dilation_w;
for (int64_t pd = 0; pd < pooled_depth; ++pd) {
int64_t dstart = pd * stride_d() - pads[2];
int64_t dend = std::min(dstart + kernel_shape[2] * dilation_d - dilation_d + 1, depth);
dstart = std::max(dstart, static_cast<int64_t>(0));
int64_t dend = dstart + kernel_shape[2] * dilation_d;
const int64_t pool_index =
ph * pooled_width * pooled_depth + pw * pooled_depth + pd;
float Yh = std::numeric_limits<float>::lowest();
@ -370,14 +371,20 @@ Status Pool<float, MaxPool<8 /*VERSION*/>>::Compute(OpKernelContext* context) co
int64_t w_index = -1;
int64_t d_index = -1;
for (int64_t h = hstart; h < hend; h += dilation_h) {
for (int64_t w = wstart; w < wend; w += dilation_w) {
for (int64_t d = dstart; d < dend; d += dilation_d) {
const int64_t input_index = h * width * depth + w * depth + d;
if (x_d[input_index] > Yh) {
Yh = x_d[input_index];
h_index = h;
w_index = w;
d_index = d;
if (math::is_a_ge_zero_and_a_lt_b(h, height)) {
for (int64_t w = wstart; w < wend; w += dilation_w) {
if (math::is_a_ge_zero_and_a_lt_b(w, width)) {
for (int64_t d = dstart; d < dend; d += dilation_d) {
if (math::is_a_ge_zero_and_a_lt_b(d, depth)) {
const int64_t input_index = h * width * depth + w * depth + d;
if (x_d[input_index] > Yh) {
Yh = x_d[input_index];
h_index = h;
w_index = w;
d_index = d;
}
}
}
}
}
}

View file

@ -7,6 +7,7 @@
#include "core/common/common.h"
#include "core/framework/op_kernel.h"
#include "core/providers/cpu/nn/autopad_type.h"
#include "core/util/math.h"
#include "core/mlas/inc/mlas.h"
namespace onnxruntime {

View file

@ -51,7 +51,7 @@ TEST(PoolTest, MaxPool) {
test.AddInput<float>("X", x_dims, x_vals);
test.AddOutput<float>("Y", expected_dims, expected_vals);
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: result differs
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: result differs
}
// Only CUDA kernel has float 16 support
@ -104,11 +104,11 @@ TEST(PoolTest, MaxPool_F16) {
test.AddInput<MLFloat16>("X", x_dims, f_X);
test.AddOutput<MLFloat16>("Y", expected_dims, f_Y);
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: Assertion `!attrs.count("pads")' failed
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: Assertion `!attrs.count("pads")' failed
}
#endif
static void MaxPool_8_WithIndexTest(bool has_index, int64_t storage_order=0) {
static void MaxPool_8_WithIndexTest(bool has_index, int64_t storage_order = 0) {
OpTester test("MaxPool", 8);
test.AddAttribute("auto_pad", "");
@ -160,7 +160,7 @@ static void MaxPool_8_WithIndexTest(bool has_index, int64_t storage_order=0) {
}
TEST(PoolTest, MaxPool_8_With_Index) {
MaxPool_8_WithIndexTest(false); // row major
MaxPool_8_WithIndexTest(false); // row major
MaxPool_8_WithIndexTest(true, 0 /*storage_order*/); // row major
MaxPool_8_WithIndexTest(true, 1 /*storage_order*/); // col major
}
@ -229,6 +229,26 @@ TEST(PoolTest, MaxPool_10_Dilation_1d) {
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
}
TEST(PoolTest, MaxPool_10_DilationPadding_1d) {
OpTester test("MaxPool", 10);
test.AddAttribute("auto_pad", "");
test.AddAttribute("strides", std::vector<int64_t>{1});
test.AddAttribute("pads", vector<int64_t>{1, 1});
test.AddAttribute("kernel_shape", vector<int64_t>{3});
test.AddAttribute("dilations", vector<int64_t>{3});
std::vector<float> x_vals = {
1, 3, 2, 4, -1, -3, -2, -4, -6, -5, -4, -2};
std::vector<int64_t> x_dims = {1, 1, 12};
std::vector<int64_t> expected_dims = {1, 1, 8};
std::vector<float> expected_vals = {2, 4, 3, 2, 4, -1, -2, -2};
test.AddInput<float>("X", x_dims, x_vals);
test.AddOutput<float>("Y", expected_dims, expected_vals);
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kTensorrtExecutionProvider});
}
TEST(PoolTest, MaxPool_10_Dilation_2d) {
OpTester test("MaxPool", 10);
@ -239,11 +259,10 @@ TEST(PoolTest, MaxPool_10_Dilation_2d) {
test.AddAttribute("dilations", vector<int64_t>{2, 2});
std::vector<float> x_vals = {
1, 3, 2, 4, -1,
5, 7, 6, 8, -2,
9, 11, 10, 12, -3,
13, 15, 14, 16, -4,
};
1, 3, 2, 4, -1,
5, 7, 6, 8, -2,
9, 11, 10, 12, -3,
13, 15, 14, 16, -4};
std::vector<int64_t> x_dims = {1, 1, 4, 5};
std::vector<int64_t> expected_dims = {1, 1, 2, 3};
std::vector<float> expected_vals = {10, 12, 10, 14, 16, 14};
@ -253,6 +272,33 @@ TEST(PoolTest, MaxPool_10_Dilation_2d) {
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
}
TEST(PoolTest, MaxPool_10_DilationPadding_2d) {
OpTester test("MaxPool", 10);
test.AddAttribute("auto_pad", "");
test.AddAttribute("strides", std::vector<int64_t>{1, 1});
test.AddAttribute("pads", vector<int64_t>{1, 1, 1, 1});
test.AddAttribute("kernel_shape", vector<int64_t>{2, 2});
test.AddAttribute("dilations", vector<int64_t>{2, 2});
std::vector<float> x_vals = {
1, 3, 2, 4, -1,
5, 7, 6, 8, -2,
9, 11, 10, 12, -3,
13, 15, 14, 16, -4};
std::vector<int64_t> x_dims = {1, 1, 4, 5};
std::vector<int64_t> expected_dims = {1, 1, 4, 5};
std::vector<float> expected_vals = {
7, 6, 8, 6, 8,
11, 10, 12, 10, 12,
15, 14, 16, 14, 16,
11, 10, 12, 10, 12};
test.AddInput<float>("X", x_dims, x_vals);
test.AddOutput<float>("Y", expected_dims, expected_vals);
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kTensorrtExecutionProvider});
}
TEST(PoolTest, MaxPool_10_Dilation_Ceil0_2d) {
OpTester test("MaxPool", 10);
@ -263,11 +309,10 @@ TEST(PoolTest, MaxPool_10_Dilation_Ceil0_2d) {
test.AddAttribute("dilations", vector<int64_t>{2, 2});
std::vector<float> x_vals = {
1, 3, 2, 4, -1,
5, 7, 6, 8, -2,
9, 11, 10, 12, -3,
13, 15, 14, 16, -4,
};
1, 3, 2, 4, -1,
5, 7, 6, 8, -2,
9, 11, 10, 12, -3,
13, 15, 14, 16, -4};
std::vector<int64_t> x_dims = {1, 1, 4, 5};
std::vector<int64_t> expected_dims = {1, 1, 1, 3};
std::vector<float> expected_vals = {10, 12, 10};
@ -288,11 +333,10 @@ TEST(PoolTest, MaxPool_10_Dilation_Ceil1_2d) {
test.AddAttribute("ceil_mode", (int64_t)1);
std::vector<float> x_vals = {
1, 3, 2, 4, -1,
5, 7, 6, 8, -2,
9, 11, 10, 12, -3,
13, 15, 14, 16, -4,
};
1, 3, 2, 4, -1,
5, 7, 6, 8, -2,
9, 11, 10, 12, -3,
13, 15, 14, 16, -4};
std::vector<int64_t> x_dims = {1, 1, 4, 5};
std::vector<int64_t> expected_dims = {1, 1, 2, 3};
std::vector<float> expected_vals = {10, 12, 10, 10, 12, 10};
@ -302,6 +346,41 @@ TEST(PoolTest, MaxPool_10_Dilation_Ceil1_2d) {
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
}
TEST(PoolTest, MaxPool_10_DilationPadding_3d) {
OpTester test("MaxPool", 10);
test.AddAttribute("auto_pad", "");
test.AddAttribute("strides", std::vector<int64_t>{1, 1, 1});
test.AddAttribute("pads", vector<int64_t>{1, 1, 1, 1, 1, 1});
test.AddAttribute("kernel_shape", vector<int64_t>{2, 2, 2});
test.AddAttribute("dilations", vector<int64_t>{2, 2, 2});
std::vector<float> x_vals = {
1, 3, 2, 4, -1,
5, 7, 6, 8, -2,
9, 11, 10, 12, -3,
13, 15, 14, 16, -4,
1, 3, 2, 4, -1,
5, 7, 6, 8, -2,
9, 11, 10, 12, -3,
13, 15, 14, 16, -4};
std::vector<int64_t> x_dims = {1, 1, 2, 4, 5};
std::vector<int64_t> expected_dims = {1, 1, 2, 4, 5};
std::vector<float> expected_vals = {
7, 6, 8, 6, 8,
11, 10, 12, 10, 12,
15, 14, 16, 14, 16,
11, 10, 12, 10, 12,
7, 6, 8, 6, 8,
11, 10, 12, 10, 12,
15, 14, 16, 14, 16,
11, 10, 12, 10, 12};
test.AddInput<float>("X", x_dims, x_vals);
test.AddOutput<float>("Y", expected_dims, expected_vals);
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kTensorrtExecutionProvider});
}
TEST(PoolTest, GlobalMaxPool) {
OpTester test("GlobalMaxPool");
@ -566,17 +645,16 @@ TEST(PoolTest, AveragePool_10_ceil1_2d) {
test.AddAttribute("strides", std::vector<int64_t>{3, 1});
test.AddAttribute("pads", vector<int64_t>{0, 0, 0, 0});
test.AddAttribute("kernel_shape", vector<int64_t>{2, 2});
test.AddAttribute("ceil_mode", (int64_t) 1);
test.AddAttribute("ceil_mode", (int64_t)1);
std::vector<float> x_vals = {
1, 3, 2, 4,
5, 7, 6, 8,
9, 11, 10, 12,
13, 15, 14, 16,
};
1, 3, 2, 4,
5, 7, 6, 8,
9, 11, 10, 12,
13, 15, 14, 16};
std::vector<int64_t> x_dims = {1, 1, 4, 4};
std::vector<int64_t> expected_dims = {1, 1, 2, 3};
std::vector<float> expected_vals = {4.0f, 4.5f, 5.0f , 14.0f, 14.5f, 15.0f};
std::vector<float> expected_vals = {4.0f, 4.5f, 5.0f, 14.0f, 14.5f, 15.0f};
test.AddInput<float>("X", x_dims, x_vals);
test.AddOutput<float>("Y", expected_dims, expected_vals);