mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-19 21:32:23 +00:00
cpu support of LpPool(18) (#14205)
Signed-off-by: Liqun Fu <liqfu@microsoft.com> ### Description To support LpPool (18) ### Motivation and Context for Ort 1.14 release Signed-off-by: Liqun Fu <liqfu@microsoft.com>
This commit is contained in:
parent
edb377f2cb
commit
2b1a59f01a
7 changed files with 417 additions and 5 deletions
|
|
@ -175,7 +175,8 @@ Do not modify directly.*
|
|||
|||[11, 12]|**B** = tensor(bool)<br/> **I** = tensor(int64)<br/> **V** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
|
||||
|||[1, 10]|**B** = tensor(bool)<br/> **I** = tensor(int64)<br/> **V** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
|
||||
|LpNormalization|*in* input:**T**<br> *out* output:**T**|1+|**T** = tensor(double), tensor(float)|
|
||||
|LpPool|*in* X:**T**<br> *out* Y:**T**|11+|**T** = tensor(float)|
|
||||
|LpPool|*in* X:**T**<br> *out* Y:**T**|18+|**T** = tensor(float)|
|
||||
|||[11, 17]|**T** = tensor(float)|
|
||||
|||[2, 10]|**T** = tensor(float)|
|
||||
|MatMul|*in* A:**T**<br> *in* B:**T**<br> *out* Y:**T**|13+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)|
|
||||
|||[9, 12]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)|
|
||||
|
|
|
|||
|
|
@ -439,7 +439,7 @@ class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDoma
|
|||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, NonMaxSuppression);
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, AveragePool);
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, MaxUnpool);
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, LpPool);
|
||||
class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 17, LpPool);
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, Conv);
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, ConvTranspose);
|
||||
class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 12, If);
|
||||
|
|
@ -830,6 +830,7 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain,
|
|||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, float, ReduceSumSquare);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, double, ReduceSumSquare);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int32_t, ReduceSumSquare);
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, LpPool);
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, Col2Im);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int8_t, BitwiseAnd);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int16_t, BitwiseAnd);
|
||||
|
|
@ -1471,7 +1472,7 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) {
|
|||
NonMaxSuppression)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, AveragePool)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, MaxUnpool)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, LpPool)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 17, LpPool)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, Conv)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, ConvTranspose)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 12, If)>,
|
||||
|
|
@ -2164,6 +2165,7 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) {
|
|||
ReduceSumSquare)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, double,
|
||||
ReduceSumSquare)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, LpPool)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, Col2Im)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int8_t, BitwiseAnd)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int16_t, BitwiseAnd)>,
|
||||
|
|
|
|||
|
|
@ -249,6 +249,81 @@ Status MaxPoolV8::ComputeImpl(OpKernelContext* context) const {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Status LpPoolV18<T>::Compute(OpKernelContext* context) const {
|
||||
concurrency::ThreadPool* tp = context->GetOperatorThreadPool();
|
||||
bool need_dilation = false;
|
||||
for (auto n : pool_attrs_.dilations) {
|
||||
need_dilation |= n > 1;
|
||||
}
|
||||
|
||||
const auto* X = context->Input<Tensor>(0);
|
||||
const TensorShape& x_shape = X->Shape();
|
||||
|
||||
ORT_RETURN_IF_NOT(x_shape.NumDimensions() >= 3, "Input dimension cannot be less than 3.");
|
||||
|
||||
auto pads = pool_attrs_.pads;
|
||||
auto kernel_shape = pool_attrs_.kernel_shape;
|
||||
|
||||
auto output_dims = pool_attrs_.SetOutputSize(x_shape, x_shape[1], &pads);
|
||||
Tensor* Y = context->Output(0, output_dims);
|
||||
|
||||
const auto* X_data = X->Data<T>();
|
||||
auto* Y_data = Y->MutableData<T>();
|
||||
|
||||
// The main loop
|
||||
int64_t channels = x_shape[1];
|
||||
int64_t height = x_shape[2];
|
||||
int64_t width = kernel_shape.size() > 1 ? x_shape[3] : 1;
|
||||
int64_t depth = kernel_shape.size() > 2 ? x_shape[4] : 1;
|
||||
int64_t pooled_height = output_dims[2];
|
||||
int64_t pooled_width = kernel_shape.size() > 1 ? output_dims[3] : 1;
|
||||
int64_t pooled_depth = kernel_shape.size() > 2 ? output_dims[4] : 1;
|
||||
const int64_t total_channels = x_shape[0] * channels;
|
||||
|
||||
switch (kernel_shape.size()) {
|
||||
case 1: {
|
||||
int64_t x_step = height;
|
||||
int64_t y_step = pooled_height;
|
||||
const int64_t dilation_h = pool_attrs_.dilations[0];
|
||||
|
||||
RunLoop<LpPool1DTask<T>>(tp, onnxruntime::narrow<size_t>(total_channels),
|
||||
{X_data, Y_data, x_step, y_step, dilation_h, pooled_height, stride_h(),
|
||||
height, kernel_shape, pads, p_});
|
||||
break;
|
||||
}
|
||||
|
||||
case 2: {
|
||||
int64_t x_step = height * width;
|
||||
int64_t y_step = pooled_height * pooled_width;
|
||||
const int64_t dilation_h = pool_attrs_.dilations[0];
|
||||
const int64_t dilation_w = pool_attrs_.dilations[1];
|
||||
RunLoop<LpPool2DTask<T>>(
|
||||
tp, onnxruntime::narrow<size_t>(total_channels),
|
||||
{X_data, Y_data, x_step, y_step, dilation_h, dilation_w, pooled_height, pooled_width, stride_h(),
|
||||
stride_w(), height, width, kernel_shape, pads, p_});
|
||||
break;
|
||||
}
|
||||
case 3: {
|
||||
int64_t x_step = height * width * depth;
|
||||
int64_t y_step = pooled_height * pooled_width * pooled_depth;
|
||||
const int64_t dilation_h = pool_attrs_.dilations[0];
|
||||
const int64_t dilation_w = pool_attrs_.dilations[1];
|
||||
const int64_t dilation_d = pool_attrs_.dilations[2];
|
||||
RunLoop<LpPool3DTask<T>>(tp, onnxruntime::narrow<size_t>(total_channels),
|
||||
{X_data, Y_data, x_step, y_step,
|
||||
dilation_h, dilation_w, dilation_d, pooled_height, pooled_width,
|
||||
pooled_depth, stride_h(), stride_w(), stride_d(), height,
|
||||
width, depth, kernel_shape, pads, p_});
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return Status(ONNXRUNTIME, INVALID_ARGUMENT, "Unsupported kernel dimension : " + std::to_string(kernel_shape.size()));
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
ONNX_CPU_OPERATOR_VERSIONED_KERNEL(AveragePool, 7, 9,
|
||||
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
|
||||
Pool<float, AveragePool>);
|
||||
|
|
@ -284,8 +359,16 @@ ONNX_CPU_OPERATOR_VERSIONED_KERNEL(LpPool, 2, 10,
|
|||
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
|
||||
Pool<float, LpPool>);
|
||||
|
||||
ONNX_CPU_OPERATOR_KERNEL(LpPool, 11, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
|
||||
Pool<float, LpPool>);
|
||||
ONNX_CPU_OPERATOR_VERSIONED_KERNEL(LpPool, 11, 17,
|
||||
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
|
||||
Pool<float, LpPool>);
|
||||
|
||||
ONNX_CPU_OPERATOR_KERNEL(LpPool, 18,
|
||||
KernelDefBuilder()
|
||||
.TypeConstraint(
|
||||
"T",
|
||||
DataTypeImpl::GetTensorType<float>()),
|
||||
LpPoolV18<float>);
|
||||
|
||||
ONNX_CPU_OPERATOR_KERNEL(GlobalLpPool, 2, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
|
||||
Pool<float, LpPool>);
|
||||
|
|
|
|||
|
|
@ -46,4 +46,20 @@ class MaxPoolV8 : public OpKernel, public PoolBase {
|
|||
template <typename T>
|
||||
Status ComputeImpl(OpKernelContext* context) const;
|
||||
};
|
||||
|
||||
// For lppool v18 and beyond
|
||||
// version 18: Added ceil_mode and dilations
|
||||
template <typename T>
|
||||
class LpPoolV18 : public OpKernel, public PoolBase {
|
||||
public:
|
||||
LpPoolV18(const OpKernelInfo& info) : OpKernel(info), PoolBase(info) {
|
||||
ORT_ENFORCE(info.GetAttr<int64_t>("p", &p_).IsOK());
|
||||
}
|
||||
|
||||
Status Compute(OpKernelContext* context) const override;
|
||||
|
||||
private:
|
||||
int64_t p_;
|
||||
};
|
||||
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -377,4 +377,170 @@ struct MaxPool3DTask {
|
|||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct LpPool1DTask final {
|
||||
const T* X_data;
|
||||
T* Y_data;
|
||||
int64_t x_step;
|
||||
int64_t y_step;
|
||||
int64_t dilation_h;
|
||||
int64_t pooled_height;
|
||||
int64_t stride_h;
|
||||
int64_t height;
|
||||
gsl::span<const int64_t> kernel_shape;
|
||||
gsl::span<const int64_t> pads;
|
||||
int64_t p;
|
||||
TensorOpCost Cost() {
|
||||
double loop_count = static_cast<double>(pooled_height * kernel_shape[0]);
|
||||
return TensorOpCost{loop_count, loop_count, loop_count};
|
||||
}
|
||||
|
||||
void operator()(std::ptrdiff_t begin, std::ptrdiff_t end) const {
|
||||
for (std::ptrdiff_t c = begin; c < end; ++c) {
|
||||
operator()(c);
|
||||
}
|
||||
}
|
||||
void operator()(std::ptrdiff_t c) const {
|
||||
const T* x_d = X_data + c * x_step;
|
||||
T* y_d = Y_data + c * y_step;
|
||||
for (int64_t ph = 0; ph < pooled_height; ++ph) {
|
||||
int64_t hstart = ph * stride_h - pads[0];
|
||||
int64_t hend = hstart + kernel_shape[0] * dilation_h;
|
||||
y_d[ph] = 0;
|
||||
for (int64_t h = hstart; h < hend; h += dilation_h) {
|
||||
if (math::is_a_ge_zero_and_a_lt_b(h, height)) {
|
||||
y_d[ph] += static_cast<T>(std::pow(std::abs(x_d[h]), p));
|
||||
}
|
||||
}
|
||||
y_d[ph] = static_cast<T>(std::pow(y_d[ph], 1.0f / p));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct LpPool2DTask final {
|
||||
const T* X_data;
|
||||
T* Y_data;
|
||||
int64_t x_step;
|
||||
int64_t y_step;
|
||||
int64_t dilation_h;
|
||||
int64_t dilation_w;
|
||||
int64_t pooled_height;
|
||||
int64_t pooled_width;
|
||||
int64_t stride_h;
|
||||
int64_t stride_w;
|
||||
int64_t height;
|
||||
int64_t width;
|
||||
gsl::span<const int64_t> kernel_shape;
|
||||
gsl::span<const int64_t> pads;
|
||||
int64_t p;
|
||||
|
||||
TensorOpCost Cost() {
|
||||
double loop_count = static_cast<double>(pooled_height * pooled_width * kernel_shape[0] * kernel_shape[1]);
|
||||
return TensorOpCost{loop_count, loop_count, loop_count};
|
||||
}
|
||||
|
||||
void operator()(std::ptrdiff_t begin, std::ptrdiff_t end) const {
|
||||
for (std::ptrdiff_t c = begin; c < end; ++c) {
|
||||
operator()(c);
|
||||
}
|
||||
}
|
||||
|
||||
void operator()(std::ptrdiff_t c) const {
|
||||
const T* x_d = X_data + c * x_step;
|
||||
T* y_d = Y_data + c * y_step;
|
||||
for (int64_t ph = 0; ph < pooled_height; ++ph) {
|
||||
int64_t hstart = ph * stride_h - pads[0];
|
||||
int64_t hend = hstart + kernel_shape[0] * dilation_h;
|
||||
for (int64_t pw = 0; pw < pooled_width; ++pw) {
|
||||
int64_t wstart = pw * stride_w - pads[1];
|
||||
int64_t wend = wstart + kernel_shape[1] * dilation_w;
|
||||
const int64_t pool_index = ph * pooled_width + pw;
|
||||
y_d[pool_index] = 0;
|
||||
for (int64_t h = hstart; h < hend; h += dilation_h) {
|
||||
if (math::is_a_ge_zero_and_a_lt_b(h, height)) {
|
||||
for (int64_t w = wstart; w < wend; w += dilation_w) {
|
||||
if (math::is_a_ge_zero_and_a_lt_b(w, width)) {
|
||||
const int64_t input_index = h * width + w;
|
||||
y_d[pool_index] += static_cast<T>(std::pow(std::abs(x_d[input_index]), p));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
y_d[pool_index] = static_cast<T>(std::pow(y_d[pool_index], 1.0f / p));
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct LpPool3DTask {
|
||||
const T* X_data;
|
||||
T* Y_data;
|
||||
int64_t x_step;
|
||||
int64_t y_step;
|
||||
int64_t dilation_h;
|
||||
int64_t dilation_w;
|
||||
int64_t dilation_d;
|
||||
int64_t pooled_height;
|
||||
int64_t pooled_width;
|
||||
int64_t pooled_depth;
|
||||
int64_t stride_h;
|
||||
int64_t stride_w;
|
||||
int64_t stride_d;
|
||||
int64_t height;
|
||||
int64_t width;
|
||||
int64_t depth;
|
||||
gsl::span<const int64_t> kernel_shape;
|
||||
gsl::span<const int64_t> pads;
|
||||
int64_t p;
|
||||
|
||||
void operator()(std::ptrdiff_t begin, std::ptrdiff_t end) const {
|
||||
for (std::ptrdiff_t c = begin; c < end; ++c) {
|
||||
operator()(c);
|
||||
}
|
||||
}
|
||||
|
||||
TensorOpCost Cost() {
|
||||
double loop_count = static_cast<double>(pooled_height * pooled_width * pooled_depth * kernel_shape[0] *
|
||||
kernel_shape[1] * kernel_shape[2]);
|
||||
return TensorOpCost{loop_count, loop_count, loop_count};
|
||||
}
|
||||
|
||||
void operator()(std::ptrdiff_t c) const {
|
||||
const T* x_d = X_data + c * x_step;
|
||||
T* y_d = Y_data + c * y_step;
|
||||
|
||||
for (int64_t ph = 0; ph < pooled_height; ++ph) {
|
||||
int64_t hstart = ph * stride_h - pads[0];
|
||||
int64_t hend = hstart + kernel_shape[0] * dilation_h;
|
||||
for (int64_t pw = 0; pw < pooled_width; ++pw) {
|
||||
int64_t wstart = pw * stride_w - pads[1];
|
||||
int64_t wend = wstart + kernel_shape[1] * dilation_w;
|
||||
for (int64_t pd = 0; pd < pooled_depth; ++pd) {
|
||||
int64_t dstart = pd * stride_d - pads[2];
|
||||
int64_t dend = dstart + kernel_shape[2] * dilation_d;
|
||||
const int64_t pool_index = ph * pooled_width * pooled_depth + pw * pooled_depth + pd;
|
||||
y_d[pool_index] = 0;
|
||||
for (int64_t h = hstart; h < hend; h += dilation_h) {
|
||||
if (math::is_a_ge_zero_and_a_lt_b(h, height)) {
|
||||
for (int64_t w = wstart; w < wend; w += dilation_w) {
|
||||
if (math::is_a_ge_zero_and_a_lt_b(w, width)) {
|
||||
for (int64_t d = dstart; d < dend; d += dilation_d) {
|
||||
if (math::is_a_ge_zero_and_a_lt_b(d, depth)) {
|
||||
const int64_t input_index = h * width * depth + w * depth + d;
|
||||
y_d[pool_index] += static_cast<T>(std::pow(std::abs(x_d[input_index]), p));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
y_d[pool_index] = static_cast<T>(std::pow(y_d[pool_index], 1.0f / p));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
62
onnxruntime/test/providers/cpu/nn/lp_pool_test_generator.py
Normal file
62
onnxruntime/test/providers/cpu/nn/lp_pool_test_generator.py
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from torch import nn
|
||||
|
||||
|
||||
# use this code to generate test data for PoolTest.LpPool1d and PoolTest.LpPool2d
|
||||
def generate_lppool_1d_test_cases() -> None:
|
||||
p = 2
|
||||
x = np.array(
|
||||
[
|
||||
[
|
||||
[1, 2, 3, 4],
|
||||
]
|
||||
]
|
||||
).astype(np.float32)
|
||||
|
||||
print(x)
|
||||
kernel_sizes = [2, 3]
|
||||
strides = [[1], [2]]
|
||||
for kernel_size in kernel_sizes:
|
||||
for stride in strides:
|
||||
print(kernel_size)
|
||||
print(stride)
|
||||
model = nn.LPPool1d(p, kernel_size=kernel_size, stride=stride)
|
||||
pt_y = model(torch.from_numpy(x))
|
||||
print(torch.flatten(pt_y))
|
||||
print(pt_y.shape)
|
||||
|
||||
|
||||
def generate_lppool_2d_test_cases() -> None:
|
||||
p = 2
|
||||
x = np.array(
|
||||
[
|
||||
[
|
||||
[
|
||||
[1, 2, 3, 4],
|
||||
[5, 6, 7, 8],
|
||||
[9, 10, 11, 12],
|
||||
[13, 14, 15, 16],
|
||||
]
|
||||
]
|
||||
]
|
||||
).astype(np.float32)
|
||||
|
||||
print(x)
|
||||
kernel_sizes = [[2, 2], [3, 3]]
|
||||
strides = [[1, 1], [2, 2]]
|
||||
for kernel_size in kernel_sizes:
|
||||
for stride in strides:
|
||||
model = nn.LPPool2d(p, kernel_size=kernel_size, stride=stride)
|
||||
pt_y = model(torch.from_numpy(x))
|
||||
print(kernel_size)
|
||||
print(stride)
|
||||
print(torch.flatten(pt_y))
|
||||
print(pt_y.shape)
|
||||
|
||||
|
||||
generate_lppool_1d_test_cases()
|
||||
generate_lppool_2d_test_cases()
|
||||
|
|
@ -1331,6 +1331,88 @@ TEST(PoolTest, LpPool) {
|
|||
test.Run();
|
||||
}
|
||||
|
||||
// test data generated with lp_pool_test_generator.py
|
||||
TEST(PoolTest, LpPool1d) {
|
||||
std::vector<int64_t> kernel_sizes[2] = {{2}, {3}};
|
||||
std::vector<int64_t> strides[2] = {{1}, {2}};
|
||||
std::vector<float> ys[4] = {
|
||||
{2.2361f, 3.6056f, 5.0000f},
|
||||
{2.2361f, 5.0000f},
|
||||
{3.7417f, 5.3852f},
|
||||
{3.7417f}};
|
||||
std::vector<int64_t> y_sizes[4] = {
|
||||
{1, 1, 3},
|
||||
{1, 1, 2},
|
||||
{1, 1, 2},
|
||||
{1, 1, 1},
|
||||
};
|
||||
int y_count = 0;
|
||||
for (int kernel_size_count = 0; kernel_size_count < 2; kernel_size_count++)
|
||||
for (int stride_count = 0; stride_count < 2; stride_count++) {
|
||||
OpTester test("LpPool", 18);
|
||||
test.AddAttribute("auto_pad", "");
|
||||
test.AddAttribute("p", static_cast<int64_t>(2));
|
||||
test.AddInput<float>("X", {1, 1, 4}, {1, 2, 3, 4});
|
||||
test.AddAttribute("strides", strides[stride_count]);
|
||||
test.AddAttribute("kernel_shape", kernel_sizes[kernel_size_count]);
|
||||
|
||||
test.AddOutput<float>("Y", y_sizes[y_count], ys[y_count]);
|
||||
|
||||
// https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/classnvinfer1_1_1_i_network_definition.html#a94f434942252e6d98ac17705c06ce060
|
||||
// TensorRT does not support 1d pooling
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
|
||||
y_count++;
|
||||
}
|
||||
}
|
||||
|
||||
// test data generated with lp_pool_test_generator.py
|
||||
TEST(PoolTest, LpPool2d) {
|
||||
std::vector<int64_t> kernel_sizes[2] = {{2, 2}, {3, 3}};
|
||||
std::vector<int64_t> strides[2] = {{1, 1}, {2, 2}};
|
||||
std::vector<float> ys[4] = {
|
||||
{8.1240f, 9.8995f, 11.7473f, 15.5563f, 17.4929f, 19.4422f, 23.3666f, 25.3377f, 27.3130f},
|
||||
{8.1240f, 11.7473f, 23.3666f, 27.3130f},
|
||||
{20.6398f, 23.3024f, 31.6544f, 34.5109f},
|
||||
{20.6398f}};
|
||||
std::vector<int64_t> y_sizes[4] = {
|
||||
{1, 1, 3, 3},
|
||||
{1, 1, 2, 2},
|
||||
{1, 1, 2, 2},
|
||||
{1, 1, 1, 1},
|
||||
};
|
||||
int y_count = 0;
|
||||
for (int kernel_size_count = 0; kernel_size_count < 2; kernel_size_count++)
|
||||
for (int stride_count = 0; stride_count < 2; stride_count++) {
|
||||
OpTester test("LpPool", 18);
|
||||
test.AddAttribute("auto_pad", "");
|
||||
test.AddAttribute("p", static_cast<int64_t>(2));
|
||||
test.AddInput<float>("X", {1, 1, 4, 4},
|
||||
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
|
||||
test.AddAttribute("strides", strides[stride_count]);
|
||||
test.AddAttribute("kernel_shape", kernel_sizes[kernel_size_count]);
|
||||
|
||||
test.AddOutput<float>("Y", y_sizes[y_count], ys[y_count]);
|
||||
test.Run();
|
||||
y_count++;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(PoolTest, LpPoolCeilMode) {
|
||||
OpTester test("LpPool", 18);
|
||||
|
||||
test.AddAttribute("auto_pad", "");
|
||||
test.AddAttribute("strides", std::vector<int64_t>{2});
|
||||
test.AddAttribute("kernel_shape", vector<int64_t>{3});
|
||||
test.AddAttribute("ceil_mode", static_cast<int64_t>(1));
|
||||
test.AddAttribute("p", static_cast<int64_t>(1));
|
||||
test.AddInput<float>("X", {1, 1, 4}, {1, 2, 3, 4});
|
||||
test.AddOutput<float>("Y", {1, 1, 2}, {6, 7});
|
||||
|
||||
// https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/classnvinfer1_1_1_i_network_definition.html#a94f434942252e6d98ac17705c06ce060
|
||||
// TensorRT does not support 1d pooling
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
|
||||
}
|
||||
|
||||
TEST(PoolTest, GlobalLpPool) {
|
||||
OpTester test("GlobalLpPool");
|
||||
test.AddAttribute("p", static_cast<int64_t>(3));
|
||||
|
|
|
|||
Loading…
Reference in a new issue