Implement QLinearAveragePool with unit tests. (#6896)

Implement QLinearAveragePool with unit tests.
This commit is contained in:
Zhang Lei 2021-03-10 10:02:01 -08:00 committed by GitHub
parent a8b897f710
commit acfe7ac4ce
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 605 additions and 1 deletions

View file

@ -47,6 +47,7 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSExperimentalDoma
// ******** Start: Quantization ******************* //
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MatMulInteger16);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, QLinearGlobalAveragePool);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, QLinearAveragePool);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, uint8_t, DequantizeLinear);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int8_t, DequantizeLinear);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, uint8_t, QuantizeLinear);
@ -131,6 +132,7 @@ Status RegisterQuantizationKernels(KernelRegistry& kernel_registry) {
BuildKernelCreateInfo<void>, //default entry to avoid the list become empty after ops-reducing
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MatMulInteger16)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, QLinearGlobalAveragePool)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, QLinearAveragePool)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, uint8_t, DequantizeLinear)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int8_t, DequantizeLinear)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, uint8_t, QuantizeLinear)>,

View file

@ -0,0 +1,327 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "qlinear_pool.h"
#include "core/util/math_cpuonly.h"
#include "core/providers/common.h"
#include "core/platform/threadpool.h"
#include "core/util/math.h"
#include "core/mlas/inc/mlas.h"
#include <functional>
namespace onnxruntime {
using concurrency::ThreadPool;
namespace contrib {
template <typename T8Bits>
static inline float dequantize_value(T8Bits x, float x_scale, T8Bits x_zero_point);
template <typename T8Bits>
static inline T8Bits quantize_value(float y, float y_scale, T8Bits y_zero_point);
template <>
inline float dequantize_value<uint8_t>(uint8_t x, float x_scale, uint8_t x_zero_point) {
return x_scale * (static_cast<int>(x) - x_zero_point);
}
template <>
inline uint8_t quantize_value<uint8_t>(float y, float y_scale, uint8_t y_zero_point) {
return static_cast<uint8_t>(std::max(0.0f, std::min(std::nearbyintf(y / y_scale + y_zero_point), 255.0f)));
}
template <typename T8Bits, typename PoolType>
struct QLinearPool1DTask final {
const float* X_data;
T8Bits* Y_data;
float y_scale;
T8Bits y_zero_point;
int64_t x_step;
int64_t y_step;
int64_t pooled_height;
int64_t stride_h;
int64_t height;
const std::vector<int64_t>& kernel_shape;
const std::vector<int64_t>& pads;
const PoolProcessContext& pool_context_;
const PoolAttributes& pool_attrs_;
TensorOpCost Cost() {
double loop_count = static_cast<double>(pooled_height * kernel_shape[0]);
return TensorOpCost{loop_count, loop_count, loop_count};
}
void operator()(std::ptrdiff_t begin, std::ptrdiff_t end) const {
for (int64_t c = begin; c < end; ++c) {
operator()(c);
}
}
void operator()(std::ptrdiff_t c) const {
const float* x_d = X_data + c * x_step;
T8Bits* y_d = Y_data + c * y_step;
for (int64_t ph = 0; ph < pooled_height; ++ph) {
int64_t hstart = ph * stride_h - pads[0];
int64_t hend = std::min(hstart + kernel_shape[0], height);
hstart = std::max(hstart, static_cast<int64_t>(0));
float Yh = PoolType::Initialize();
for (int64_t h = hstart; h < hend; ++h) {
PoolType::Process(x_d[h], Yh, pool_context_);
}
if (pool_attrs_.count_include_pad) {
PoolType::Finalize(kernel_shape[0], Yh, pool_context_);
} else {
PoolType::Finalize(hend - hstart, Yh, pool_context_);
}
y_d[ph] = quantize_value(Yh, y_scale, y_zero_point);
}
}
};
template <typename T8Bits, typename PoolType>
struct QLinearPool2DTask final {
const float* X_data;
T8Bits* Y_data;
float y_scale;
T8Bits y_zero_point;
int64_t x_step;
int64_t y_step;
int64_t pooled_height;
int64_t pooled_width;
int64_t stride_h;
int64_t stride_w;
int64_t height;
int64_t width;
const std::vector<int64_t>& kernel_shape;
const std::vector<int64_t>& pads;
const PoolProcessContext& pool_context_;
const PoolAttributes& pool_attrs_;
TensorOpCost Cost() {
double loop_count = static_cast<double>(pooled_height * pooled_width * kernel_shape[0] * kernel_shape[1]);
return TensorOpCost{loop_count, loop_count, loop_count};
}
void operator()(std::ptrdiff_t begin, std::ptrdiff_t end) const {
for (int64_t c = begin; c < end; ++c) {
operator()(c);
}
}
void operator()(std::ptrdiff_t c) const {
const float* x_d = X_data + c * x_step;
T8Bits* y_d = Y_data + c * y_step;
for (int64_t ph = 0; ph < pooled_height; ++ph) {
int64_t hstart = ph * stride_h - pads[0];
int64_t hend = std::min(hstart + kernel_shape[0], height);
hstart = std::max(hstart, static_cast<int64_t>(0));
for (int64_t pw = 0; pw < pooled_width; ++pw) {
int64_t wstart = pw * stride_w - pads[1];
int64_t wend = std::min(wstart + kernel_shape[1], width);
wstart = std::max(wstart, static_cast<int64_t>(0));
const int64_t pool_index = ph * pooled_width + pw;
float Yh = PoolType::Initialize();
for (int64_t h = hstart; h < hend; ++h) {
int64_t input_index = h * width + wstart;
for (int64_t w = wstart; w < wend; ++w) {
PoolType::Process(x_d[input_index++], Yh, pool_context_);
}
}
if (pool_attrs_.count_include_pad) {
PoolType::Finalize(kernel_shape[0] * kernel_shape[1], Yh, pool_context_);
} else {
PoolType::Finalize((hend - hstart) * (wend - wstart), Yh, pool_context_);
}
y_d[pool_index] = quantize_value(Yh, y_scale, y_zero_point);
}
}
}
};
template <typename T8Bits, typename PoolType>
struct QLinearPool3DTask final {
const float* X_data;
T8Bits* Y_data;
float y_scale;
T8Bits y_zero_point;
int64_t x_step;
int64_t y_step;
int64_t pooled_height;
int64_t pooled_width;
int64_t pooled_depth;
int64_t stride_h;
int64_t stride_w;
int64_t stride_d;
int64_t height;
int64_t width;
int64_t depth;
const std::vector<int64_t>& kernel_shape;
const std::vector<int64_t>& pads;
const PoolProcessContext& pool_context_;
const PoolAttributes& pool_attrs_;
TensorOpCost Cost() {
double loop_count = static_cast<double>(pooled_height * pooled_width * pooled_depth * kernel_shape[0] *
kernel_shape[1] * kernel_shape[2]);
return TensorOpCost{loop_count, loop_count, loop_count};
}
void operator()(std::ptrdiff_t begin, std::ptrdiff_t end) const {
for (int64_t c = begin; c < end; ++c) {
operator()(c);
}
}
void operator()(std::ptrdiff_t c) const {
const float* x_d = X_data + c * x_step;
T8Bits* y_d = Y_data + c * y_step;
for (int64_t ph = 0; ph < pooled_height; ++ph) {
int64_t hstart = ph * stride_h - pads[0];
int64_t hend = std::min(hstart + kernel_shape[0], height);
hstart = std::max(hstart, static_cast<int64_t>(0));
for (int64_t pw = 0; pw < pooled_width; ++pw) {
int64_t wstart = pw * stride_w - pads[1];
int64_t wend = std::min(wstart + kernel_shape[1], width);
wstart = std::max(wstart, static_cast<int64_t>(0));
for (int64_t pd = 0; pd < pooled_depth; ++pd) {
int64_t dstart = pd * stride_d - pads[2];
int64_t dend = std::min(dstart + kernel_shape[2], depth);
dstart = std::max(dstart, static_cast<int64_t>(0));
const int64_t pool_index = ph * pooled_width * pooled_depth + pw * pooled_depth + pd;
float Yh = PoolType::Initialize();
for (int64_t h = hstart; h < hend; ++h) {
const int64_t input_index_h = h * width * depth;
for (int64_t w = wstart; w < wend; ++w) {
int64_t input_index = input_index_h + w * depth + dstart;
for (int64_t d = dstart; d < dend; ++d) {
PoolType::Process(x_d[input_index++], Yh, pool_context_);
}
}
}
if (pool_attrs_.count_include_pad) {
PoolType::Finalize(kernel_shape[0] * kernel_shape[1] * kernel_shape[2], Yh, pool_context_);
} else {
PoolType::Finalize((hend - hstart) * (wend - wstart) * (dend - dstart), Yh, pool_context_);
}
auto y_value = quantize_value(Yh, y_scale, y_zero_point);
y_d[pool_index] = y_value;
}
}
}
}
};
Status QLinearAveragePool::Compute(OpKernelContext* context) const {
const auto tensor_x_scale = context->Input<Tensor>(1);
const auto tensor_x_zero_point = context->Input<Tensor>(2);
const auto tensor_y_scale = context->Input<Tensor>(3);
const auto tensor_y_zero_point = context->Input<Tensor>(4);
ORT_ENFORCE(IsScalarOr1ElementVector(tensor_x_scale),
"Input x_scale must be a scalar or 1D tensor of size 1");
ORT_ENFORCE(tensor_x_zero_point == nullptr || IsScalarOr1ElementVector(tensor_x_zero_point),
"input x_zero_point must be a scalar or 1D tensor of size 1 if given");
ORT_ENFORCE(IsScalarOr1ElementVector(tensor_y_scale),
"input y_scale must be a scalar or 1D tensor of size 1");
ORT_ENFORCE(tensor_y_zero_point == nullptr || IsScalarOr1ElementVector(tensor_y_zero_point),
"input y_zero_point must be a scalar or 1D tensor of size 1 if given");
const auto* X = context->Input<Tensor>(0);
auto dtype = X->GetElementType();
if (dtype != ONNX_NAMESPACE::TensorProto_DataType_UINT8) {
ORT_THROW("Unsupported 'dtype' in QLinear Pooling:", dtype);
}
const TensorShape& x_shape = X->Shape();
const float x_scale = *(tensor_x_scale->Data<float>());
const float y_scale = *(tensor_y_scale->Data<float>());
uint8_t x_zero_point = (tensor_x_zero_point ? *(tensor_x_zero_point->Data<uint8_t>()) : (uint8_t)0);
uint8_t y_zero_point = (tensor_y_zero_point ? *(tensor_y_zero_point->Data<uint8_t>()) : (uint8_t)0);
ORT_RETURN_IF_NOT(x_shape.NumDimensions() >= 3, "Input dimension cannot be less than 3.");
std::vector<int64_t> pads = pool_attrs_.pads;
std::vector<int64_t> strides = pool_attrs_.strides;
std::vector<int64_t> kernel_shape = pool_attrs_.kernel_shape;
std::vector<int64_t> output_dims = pool_attrs_.SetOutputSize(x_shape, x_shape[1], &pads);
Tensor* Y = context->Output(0, output_dims);
const auto* X_data = X->Data<uint8_t>();
auto* Y_data = Y->MutableData<uint8_t>();
const int64_t channels = x_shape[1];
const int64_t height = x_shape[2];
const int64_t width = kernel_shape.size() > 1 ? x_shape[3] : 1;
const int64_t depth = kernel_shape.size() > 2 ? x_shape[4] : 1;
const int64_t pooled_height = output_dims[2];
const int64_t pooled_width = kernel_shape.size() > 1 ? output_dims[3] : 1;
const int64_t pooled_depth = kernel_shape.size() > 2 ? output_dims[4] : 1;
const int64_t total_channels = x_shape[0] * channels;
const int64_t x_step = height * width * depth;
const int64_t y_step = pooled_height * pooled_width * pooled_depth;
ThreadPool* tp = context->GetOperatorThreadPool();
std::vector<float> x_data_fp32;
if (kernel_shape.size() <= 3) {
x_data_fp32.resize(x_shape.Size());
ThreadPool::TryParallelFor(tp, x_shape.Size(), 1.0f, [=, &x_data_fp32](ptrdiff_t first, ptrdiff_t last) {
const auto* x8 = X_data + first;
float* x32 = x_data_fp32.data() + first;
for (ptrdiff_t i = 0, sz = last - first; i < sz; ++i) {
*x32++ = dequantize_value(x8[i], x_scale, x_zero_point);
}
});
}
switch (kernel_shape.size()) {
case 1:
{
QLinearPool1DTask<uint8_t, onnxruntime::AveragePool> avg_pool_task_1d = {
x_data_fp32.data(), Y_data, y_scale, y_zero_point, x_step, y_step,
pooled_height, strides[0], height, kernel_shape, pads, pool_context_, pool_attrs_};
ThreadPool::TryParallelFor(tp, total_channels, avg_pool_task_1d.Cost(), avg_pool_task_1d);
break;
}
case 2:
{
QLinearPool2DTask<uint8_t, onnxruntime::AveragePool> avg_pool_task_2d = {
x_data_fp32.data(), Y_data, y_scale, y_zero_point, x_step, y_step,
pooled_height, pooled_width, strides[0], strides[1], height, width, kernel_shape, pads, pool_context_, pool_attrs_};
ThreadPool::TryParallelFor(tp, total_channels, avg_pool_task_2d.Cost(), avg_pool_task_2d);
break;
}
case 3:
{
QLinearPool3DTask<uint8_t, onnxruntime::AveragePool> avg_pool_task_3d = {
x_data_fp32.data(), Y_data, y_scale, y_zero_point, x_step, y_step,
pooled_height, pooled_width, pooled_depth, strides[0], strides[1], strides[2], height, width, depth,
kernel_shape, pads, pool_context_, pool_attrs_};
ThreadPool::TryParallelFor(tp, total_channels, avg_pool_task_3d.Cost(), avg_pool_task_3d);
break;
}
default:
{
return onnxruntime::common::Status(
onnxruntime::common::ONNXRUNTIME,
onnxruntime::common::INVALID_ARGUMENT,
"QLinear Pooling unsupported pooling size!");
}
}
return Status::OK();
}
ONNX_OPERATOR_KERNEL_EX(QLinearAveragePool, kMSDomain, 1, kCpuExecutionProvider, KernelDefBuilder(), QLinearAveragePool);
} // namespace contrib
} // namespace onnxruntime

View file

@ -0,0 +1,27 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#pragma once
#include "core/common/common.h"
#include "core/framework/op_kernel.h"
#include "core/providers/cpu/nn/pool_base.h"
namespace onnxruntime {
namespace contrib {
class QLinearAveragePool final : public OpKernel, public PoolBase {
public:
QLinearAveragePool(const OpKernelInfo& info) : OpKernel(info), PoolBase(info) { }
~QLinearAveragePool() override = default;
Status Compute(OpKernelContext* context) const override;
private:
PoolProcessContext pool_context_;
};
} // namespace contrib
} // namespace onnxruntime

View file

@ -106,7 +106,8 @@ class PoolBase {
protected:
PoolBase(const OpKernelInfo& info)
: op_name_(info.GetKernelDef().OpName()),
: op_name_(info.GetKernelDef().OpName().rfind("QLinear", 0) != 0 ?
info.GetKernelDef().OpName() : info.GetKernelDef().OpName().substr(7)),
pool_attrs_(info, op_name_, GetStartVersion(info)) {
}

View file

@ -0,0 +1,247 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "gtest/gtest.h"
#include "test/common/tensor_op_test_utils.h"
#include "test/providers/provider_test_utils.h"
#include "core/providers/common.h"
namespace onnxruntime {
namespace test {
static inline float dequantize_u8(uint8_t x, float x_scale, uint8_t x_zero_point) {
return x_scale * (static_cast<int>(x) - x_zero_point);
}
static inline uint8_t quantize_u8(float y, float y_scale, uint8_t y_zero_point) {
return static_cast<uint8_t>(std::max(0.0f, std::min(std::nearbyintf(y / y_scale + y_zero_point), 255.0f)));
}
struct DimIterator {
DimIterator(const std::vector<int64_t>& dims) : dims_(dims) {
size_ = std::accumulate(dims_.begin(), dims_.end(), 1LL, std::multiplies<int64_t>());
restart();
}
void restart() {
pos_.resize(dims_.size(), 0LL);
index_ = 0LL;
}
bool has_next() { return index_ < size_; }
// if has more data return current data ptr and iterator to next pos_
// otherwise return -1
int64_t next() {
if (has_next()) {
for (size_t i = dims_.size(); i > 0;) {
i--;
++pos_[i];
if (pos_[i] < dims_[i]) {
break;
}
pos_[i] = 0;
}
return index_++;
}
return -1L;
}
const std::vector<int64_t> dims_;
std::vector<int64_t> pos_;
int64_t size_;
int64_t index_;
};
static void
CalculateAvgPoolNchwU8(
uint8_t* x,
const std::vector<int64_t> x_dims,
float x_scale,
int x_zero_point,
uint8_t* y,
const std::vector<int64_t> y_dims,
float y_scale,
int y_zero_point,
const std::vector<int64_t> kernel_shape,
const std::vector<int64_t> strides,
const std::vector<int64_t> pads,
const int64_t count_include_pad) {
int64_t batch = y_dims[0];
int64_t channel = y_dims[1];
std::vector<int64_t> y_img_dims(y_dims.begin() + 2, y_dims.end());
std::vector<int64_t> x_img_dims(x_dims.begin() + 2, x_dims.end());
std::vector<int64_t> x_img_strides(x_img_dims.size(), 1LL);
for (size_t i = x_img_dims.size() - 1; i > 0;) {
i--;
x_img_strides[i] = x_img_strides[i + 1] * x_img_dims[i + 1];
}
int64_t y_step = std::accumulate(y_img_dims.begin(), y_img_dims.end(), 1LL, std::multiplies<int64_t>());
int64_t x_step = std::accumulate(x_img_dims.begin(), x_img_dims.end(), 1LL, std::multiplies<int64_t>());
for (int64_t b = 0; b < batch; ++b) {
for (int64_t c = 0; c < channel; ++c) {
uint8_t* ybc = y + (b * channel + c) * y_step;
uint8_t* xbc = x + (b * channel + c) * x_step;
DimIterator yit(y_img_dims);
while (yit.has_next()) {
std::vector<int64_t> kernel_topleft(y_img_dims.size(), 0);
for (size_t i = 0; i < y_img_dims.size(); ++i) {
kernel_topleft[i] = yit.pos_[i] * strides[i];
}
float y_value_sum = 0.0f;
int count = 0;
for (DimIterator kit(kernel_shape); kit.has_next(); kit.next()) {
int64_t kernel_offset = 0;
for (size_t i = 0; kernel_offset >= 0 && i < kernel_shape.size(); ++i) {
int64_t x_real_dim = kernel_topleft[i] + kit.pos_[i] - pads[i];
if (x_real_dim >= 0 && x_real_dim < x_img_dims[i]) {
kernel_offset += x_real_dim * x_img_strides[i];
} else {
kernel_offset = -1LL; // padding element
}
}
if (kernel_offset >= 0) {
y_value_sum += dequantize_u8(xbc[kernel_offset], x_scale, static_cast<uint8_t>(x_zero_point));
++count;
} else {
count += count_include_pad ? 1 : 0;
}
}
auto y_offset = yit.next();
auto y_u8 = quantize_u8(y_value_sum / count, y_scale, static_cast<uint8_t>(y_zero_point));
ybc[y_offset] = y_u8;
}
}
}
}
void RunQLinearAveragePoolNchwU8(
const std::vector<int64_t> x_dims,
const std::vector<int64_t> y_dims,
const std::vector<int64_t> kernel_shape,
const std::vector<int64_t> strides,
const std::vector<int64_t> pads,
const int64_t count_include_pad = 0) {
float x_scale = 1.0f / 255.0f;
uint8_t x_zero_point = 128;
RandomValueGenerator random{};
std::vector<float> x_data_fp32 = random.Uniform<float>(x_dims, -0.5f, 0.5f);
std::vector<uint8_t> x_data(x_data_fp32.size());
for (size_t i = 0; i < x_data.size(); ++i) {
x_data[i] = quantize_u8(x_data_fp32[i], x_scale, x_zero_point);
}
float y_scale = 1.0f / 255.0f;
uint8_t y_zero_point = 100;
int64_t y_size = std::accumulate(y_dims.begin(), y_dims.end(), 1LL, std::multiplies<int64_t>());
std::vector<uint8_t> y_data(y_size);
CalculateAvgPoolNchwU8(
x_data.data(), x_dims, x_scale, x_zero_point,
y_data.data(), y_dims, y_scale, y_zero_point,
kernel_shape, strides, pads, count_include_pad);
OpTester test("QLinearAveragePool", 1, onnxruntime::kMSDomain);
test.AddAttribute("auto_pad", "");
test.AddAttribute("strides", strides);
test.AddAttribute("pads", pads);
test.AddAttribute("kernel_shape", kernel_shape);
test.AddAttribute("count_include_pad", count_include_pad);
test.AddInput<uint8_t>("X", x_dims, x_data);
test.AddInput<float>("x_scale", {}, {x_scale});
test.AddInput<uint8_t>("x_zero_point", {}, {x_zero_point});
test.AddInput<float>("y_scale", {}, {y_scale});
test.AddInput<uint8_t>("y_zero_point", {}, {y_zero_point});
test.AddOutput<uint8_t>("Y", y_dims, y_data);
auto q8checker = [&](const std::vector<OrtValue>& fetches, const std::string& provider_type) {
const OrtValue& ort_value = fetches[0];
if (ort_value.Fence()) {
ort_value.Fence()->BeforeUsingAsInput(onnxruntime::kCpuExecutionProvider, 0);
}
auto y_shape = TensorShape(y_dims);
const Tensor& output_tensor = ort_value.Get<Tensor>();
ORT_ENFORCE(y_shape == output_tensor.Shape(),
"Expected output shape [" + y_shape.ToString() + "] did not match run output shape [" +
output_tensor.Shape().ToString() + "] for Y @" + provider_type);
auto* output = output_tensor.Data<uint8_t>();
auto size = static_cast<int>(output_tensor.Shape().Size());
for (int i = 0; i < size; ++i) {
int diff = abs(y_data[i] - output[i]);
EXPECT_LE(diff, 1) << "i:" << i << " expected:" << y_data[i] << " " << (int)y_data[i]
<< ", got:" << output[i] << " " << (int)output[i] << ", provider_type: " << provider_type;
}
};
test.SetCustomOutputVerifier(q8checker);
test.Run();
}
TEST(QLinearPoolTest, AveragePool1D_ExcludePadPixel) {
RunQLinearAveragePoolNchwU8(
{1, 1, 5}, // x shape
{1, 1, 6}, // expected y shape
{3}, // kernel shape
{1}, // strides
{1, 2}, // pads
0); // count_include_pad
}
TEST(QLinearPoolTest, AveragePool1D_IncludePadPixel) {
RunQLinearAveragePoolNchwU8(
{1, 1, 5}, // x shape
{1, 1, 6}, // expected y shape
{3}, // kernel shape
{1}, // strides
{1, 2}, // pads
1); // count_include_pad
}
TEST(QLinearPoolTest, AveragePool2D_ExcludePadPixel) {
RunQLinearAveragePoolNchwU8(
{1, 1, 5, 7}, // x shape
{1, 1, 6, 4}, // expected y shape
{3, 4}, // kernel shape
{1, 2}, // strides
{1, 3, 2, 1}, // pads
0); // count_include_pad
}
TEST(QLinearPoolTest, AveragePool2D_IncludePadPixel) {
RunQLinearAveragePoolNchwU8(
{1, 1, 5, 7}, // x shape
{1, 1, 6, 4}, // expected y shape
{3, 4}, // kernel shape
{1, 2}, // strides
{1, 3, 2, 1}, // pads
1); // count_include_pad
}
TEST(QLinearPoolTest, AveragePool3D_ExcludePadPixel) {
RunQLinearAveragePoolNchwU8(
{1, 1, 5, 7, 9}, // x shape
{1, 1, 6, 4, 3}, // expected y shape
{3, 4, 5}, // kernel shape
{1, 2, 3}, // strides
{1, 3, 2, 2, 1, 2}, // pads
0); // count_include_pad
}
TEST(QLinearPoolTest, AveragePool3D_IncludePadPixel) {
RunQLinearAveragePoolNchwU8(
{1, 1, 5, 7, 9}, // x shape
{1, 1, 6, 4, 3}, // expected y shape
{3, 4, 5}, // kernel shape
{1, 2, 3}, // strides
{1, 3, 2, 2, 1, 2}, // pads
1); // count_include_pad
}
} // namespace test
} // namespace onnxruntime