diff --git a/docs/OperatorKernels.md b/docs/OperatorKernels.md
index ad571dacb2..41c0da6fa0 100644
--- a/docs/OperatorKernels.md
+++ b/docs/OperatorKernels.md
@@ -175,7 +175,8 @@ Do not modify directly.*
|||[11, 12]|**B** = tensor(bool)
**I** = tensor(int64)
**V** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
|||[1, 10]|**B** = tensor(bool)
**I** = tensor(int64)
**V** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
|LpNormalization|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(double), tensor(float)|
-|LpPool|*in* X:**T**
*out* Y:**T**|11+|**T** = tensor(float)|
+|LpPool|*in* X:**T**
*out* Y:**T**|18+|**T** = tensor(float)|
+|||[11, 17]|**T** = tensor(float)|
|||[2, 10]|**T** = tensor(float)|
|MatMul|*in* A:**T**
*in* B:**T**
*out* Y:**T**|13+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)|
|||[9, 12]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)|
diff --git a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc
index 75060fbf9f..caba009075 100644
--- a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc
+++ b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc
@@ -439,7 +439,7 @@ class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDoma
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, NonMaxSuppression);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, AveragePool);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, MaxUnpool);
-class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, LpPool);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 17, LpPool);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, Conv);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, ConvTranspose);
class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 12, If);
@@ -830,6 +830,7 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain,
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, float, ReduceSumSquare);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, double, ReduceSumSquare);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int32_t, ReduceSumSquare);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, LpPool);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, Col2Im);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int8_t, BitwiseAnd);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int16_t, BitwiseAnd);
@@ -1471,7 +1472,7 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) {
NonMaxSuppression)>,
BuildKernelCreateInfo,
BuildKernelCreateInfo,
- BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
BuildKernelCreateInfo,
BuildKernelCreateInfo,
BuildKernelCreateInfo,
@@ -2164,6 +2165,7 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) {
ReduceSumSquare)>,
BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
BuildKernelCreateInfo,
BuildKernelCreateInfo,
BuildKernelCreateInfo,
diff --git a/onnxruntime/core/providers/cpu/nn/pool.cc b/onnxruntime/core/providers/cpu/nn/pool.cc
index 53c24e7e3d..0f3681d64c 100644
--- a/onnxruntime/core/providers/cpu/nn/pool.cc
+++ b/onnxruntime/core/providers/cpu/nn/pool.cc
@@ -249,6 +249,81 @@ Status MaxPoolV8::ComputeImpl(OpKernelContext* context) const {
return Status::OK();
}
+template
+Status LpPoolV18::Compute(OpKernelContext* context) const {
+ concurrency::ThreadPool* tp = context->GetOperatorThreadPool();
+ bool need_dilation = false;
+ for (auto n : pool_attrs_.dilations) {
+ need_dilation |= n > 1;
+ }
+
+ const auto* X = context->Input(0);
+ const TensorShape& x_shape = X->Shape();
+
+ ORT_RETURN_IF_NOT(x_shape.NumDimensions() >= 3, "Input dimension cannot be less than 3.");
+
+ auto pads = pool_attrs_.pads;
+ auto kernel_shape = pool_attrs_.kernel_shape;
+
+ auto output_dims = pool_attrs_.SetOutputSize(x_shape, x_shape[1], &pads);
+ Tensor* Y = context->Output(0, output_dims);
+
+ const auto* X_data = X->Data();
+ auto* Y_data = Y->MutableData();
+
+ // The main loop
+ int64_t channels = x_shape[1];
+ int64_t height = x_shape[2];
+ int64_t width = kernel_shape.size() > 1 ? x_shape[3] : 1;
+ int64_t depth = kernel_shape.size() > 2 ? x_shape[4] : 1;
+ int64_t pooled_height = output_dims[2];
+ int64_t pooled_width = kernel_shape.size() > 1 ? output_dims[3] : 1;
+ int64_t pooled_depth = kernel_shape.size() > 2 ? output_dims[4] : 1;
+ const int64_t total_channels = x_shape[0] * channels;
+
+ switch (kernel_shape.size()) {
+ case 1: {
+ int64_t x_step = height;
+ int64_t y_step = pooled_height;
+ const int64_t dilation_h = pool_attrs_.dilations[0];
+
+ RunLoop>(tp, onnxruntime::narrow(total_channels),
+ {X_data, Y_data, x_step, y_step, dilation_h, pooled_height, stride_h(),
+ height, kernel_shape, pads, p_});
+ break;
+ }
+
+ case 2: {
+ int64_t x_step = height * width;
+ int64_t y_step = pooled_height * pooled_width;
+ const int64_t dilation_h = pool_attrs_.dilations[0];
+ const int64_t dilation_w = pool_attrs_.dilations[1];
+ RunLoop>(
+ tp, onnxruntime::narrow(total_channels),
+ {X_data, Y_data, x_step, y_step, dilation_h, dilation_w, pooled_height, pooled_width, stride_h(),
+ stride_w(), height, width, kernel_shape, pads, p_});
+ break;
+ }
+ case 3: {
+ int64_t x_step = height * width * depth;
+ int64_t y_step = pooled_height * pooled_width * pooled_depth;
+ const int64_t dilation_h = pool_attrs_.dilations[0];
+ const int64_t dilation_w = pool_attrs_.dilations[1];
+ const int64_t dilation_d = pool_attrs_.dilations[2];
+ RunLoop>(tp, onnxruntime::narrow(total_channels),
+ {X_data, Y_data, x_step, y_step,
+ dilation_h, dilation_w, dilation_d, pooled_height, pooled_width,
+ pooled_depth, stride_h(), stride_w(), stride_d(), height,
+ width, depth, kernel_shape, pads, p_});
+ break;
+ }
+ default:
+ return Status(ONNXRUNTIME, INVALID_ARGUMENT, "Unsupported kernel dimension : " + std::to_string(kernel_shape.size()));
+ }
+
+ return Status::OK();
+}
+
ONNX_CPU_OPERATOR_VERSIONED_KERNEL(AveragePool, 7, 9,
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()),
Pool);
@@ -284,8 +359,16 @@ ONNX_CPU_OPERATOR_VERSIONED_KERNEL(LpPool, 2, 10,
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()),
Pool);
-ONNX_CPU_OPERATOR_KERNEL(LpPool, 11, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()),
- Pool);
+ONNX_CPU_OPERATOR_VERSIONED_KERNEL(LpPool, 11, 17,
+ KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()),
+ Pool);
+
+ONNX_CPU_OPERATOR_KERNEL(LpPool, 18,
+ KernelDefBuilder()
+ .TypeConstraint(
+ "T",
+ DataTypeImpl::GetTensorType()),
+ LpPoolV18);
ONNX_CPU_OPERATOR_KERNEL(GlobalLpPool, 2, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()),
Pool);
diff --git a/onnxruntime/core/providers/cpu/nn/pool.h b/onnxruntime/core/providers/cpu/nn/pool.h
index 5458c5ba6c..7e4899ea1d 100644
--- a/onnxruntime/core/providers/cpu/nn/pool.h
+++ b/onnxruntime/core/providers/cpu/nn/pool.h
@@ -46,4 +46,20 @@ class MaxPoolV8 : public OpKernel, public PoolBase {
template
Status ComputeImpl(OpKernelContext* context) const;
};
+
+// For lppool v18 and beyond
+// version 18: Added ceil_mode and dilations
+template
+class LpPoolV18 : public OpKernel, public PoolBase {
+ public:
+ LpPoolV18(const OpKernelInfo& info) : OpKernel(info), PoolBase(info) {
+ ORT_ENFORCE(info.GetAttr("p", &p_).IsOK());
+ }
+
+ Status Compute(OpKernelContext* context) const override;
+
+private:
+ int64_t p_;
+};
+
} // namespace onnxruntime
diff --git a/onnxruntime/core/providers/cpu/nn/pool_functors.h b/onnxruntime/core/providers/cpu/nn/pool_functors.h
index 19ffb8f2d5..9948e1d809 100644
--- a/onnxruntime/core/providers/cpu/nn/pool_functors.h
+++ b/onnxruntime/core/providers/cpu/nn/pool_functors.h
@@ -377,4 +377,170 @@ struct MaxPool3DTask {
}
};
+template
+struct LpPool1DTask final {
+ const T* X_data;
+ T* Y_data;
+ int64_t x_step;
+ int64_t y_step;
+ int64_t dilation_h;
+ int64_t pooled_height;
+ int64_t stride_h;
+ int64_t height;
+ gsl::span kernel_shape;
+ gsl::span pads;
+ int64_t p;
+ TensorOpCost Cost() {
+ double loop_count = static_cast(pooled_height * kernel_shape[0]);
+ return TensorOpCost{loop_count, loop_count, loop_count};
+ }
+
+ void operator()(std::ptrdiff_t begin, std::ptrdiff_t end) const {
+ for (std::ptrdiff_t c = begin; c < end; ++c) {
+ operator()(c);
+ }
+ }
+ void operator()(std::ptrdiff_t c) const {
+ const T* x_d = X_data + c * x_step;
+ T* y_d = Y_data + c * y_step;
+ for (int64_t ph = 0; ph < pooled_height; ++ph) {
+ int64_t hstart = ph * stride_h - pads[0];
+ int64_t hend = hstart + kernel_shape[0] * dilation_h;
+ y_d[ph] = 0;
+ for (int64_t h = hstart; h < hend; h += dilation_h) {
+ if (math::is_a_ge_zero_and_a_lt_b(h, height)) {
+ y_d[ph] += static_cast(std::pow(std::abs(x_d[h]), p));
+ }
+ }
+ y_d[ph] = static_cast(std::pow(y_d[ph], 1.0f / p));
+ }
+ }
+};
+
+template
+struct LpPool2DTask final {
+ const T* X_data;
+ T* Y_data;
+ int64_t x_step;
+ int64_t y_step;
+ int64_t dilation_h;
+ int64_t dilation_w;
+ int64_t pooled_height;
+ int64_t pooled_width;
+ int64_t stride_h;
+ int64_t stride_w;
+ int64_t height;
+ int64_t width;
+ gsl::span kernel_shape;
+ gsl::span pads;
+ int64_t p;
+
+ TensorOpCost Cost() {
+ double loop_count = static_cast(pooled_height * pooled_width * kernel_shape[0] * kernel_shape[1]);
+ return TensorOpCost{loop_count, loop_count, loop_count};
+ }
+
+ void operator()(std::ptrdiff_t begin, std::ptrdiff_t end) const {
+ for (std::ptrdiff_t c = begin; c < end; ++c) {
+ operator()(c);
+ }
+ }
+
+ void operator()(std::ptrdiff_t c) const {
+ const T* x_d = X_data + c * x_step;
+ T* y_d = Y_data + c * y_step;
+ for (int64_t ph = 0; ph < pooled_height; ++ph) {
+ int64_t hstart = ph * stride_h - pads[0];
+ int64_t hend = hstart + kernel_shape[0] * dilation_h;
+ for (int64_t pw = 0; pw < pooled_width; ++pw) {
+ int64_t wstart = pw * stride_w - pads[1];
+ int64_t wend = wstart + kernel_shape[1] * dilation_w;
+ const int64_t pool_index = ph * pooled_width + pw;
+ y_d[pool_index] = 0;
+ for (int64_t h = hstart; h < hend; h += dilation_h) {
+ if (math::is_a_ge_zero_and_a_lt_b(h, height)) {
+ for (int64_t w = wstart; w < wend; w += dilation_w) {
+ if (math::is_a_ge_zero_and_a_lt_b(w, width)) {
+ const int64_t input_index = h * width + w;
+ y_d[pool_index] += static_cast(std::pow(std::abs(x_d[input_index]), p));
+ }
+ }
+ }
+ }
+ y_d[pool_index] = static_cast(std::pow(y_d[pool_index], 1.0f / p));
+ }
+ }
+ }
+};
+
+template
+struct LpPool3DTask {
+ const T* X_data;
+ T* Y_data;
+ int64_t x_step;
+ int64_t y_step;
+ int64_t dilation_h;
+ int64_t dilation_w;
+ int64_t dilation_d;
+ int64_t pooled_height;
+ int64_t pooled_width;
+ int64_t pooled_depth;
+ int64_t stride_h;
+ int64_t stride_w;
+ int64_t stride_d;
+ int64_t height;
+ int64_t width;
+ int64_t depth;
+ gsl::span kernel_shape;
+ gsl::span pads;
+ int64_t p;
+
+ void operator()(std::ptrdiff_t begin, std::ptrdiff_t end) const {
+ for (std::ptrdiff_t c = begin; c < end; ++c) {
+ operator()(c);
+ }
+ }
+
+ TensorOpCost Cost() {
+ double loop_count = static_cast(pooled_height * pooled_width * pooled_depth * kernel_shape[0] *
+ kernel_shape[1] * kernel_shape[2]);
+ return TensorOpCost{loop_count, loop_count, loop_count};
+ }
+
+ void operator()(std::ptrdiff_t c) const {
+ const T* x_d = X_data + c * x_step;
+ T* y_d = Y_data + c * y_step;
+
+ for (int64_t ph = 0; ph < pooled_height; ++ph) {
+ int64_t hstart = ph * stride_h - pads[0];
+ int64_t hend = hstart + kernel_shape[0] * dilation_h;
+ for (int64_t pw = 0; pw < pooled_width; ++pw) {
+ int64_t wstart = pw * stride_w - pads[1];
+ int64_t wend = wstart + kernel_shape[1] * dilation_w;
+ for (int64_t pd = 0; pd < pooled_depth; ++pd) {
+ int64_t dstart = pd * stride_d - pads[2];
+ int64_t dend = dstart + kernel_shape[2] * dilation_d;
+ const int64_t pool_index = ph * pooled_width * pooled_depth + pw * pooled_depth + pd;
+ y_d[pool_index] = 0;
+ for (int64_t h = hstart; h < hend; h += dilation_h) {
+ if (math::is_a_ge_zero_and_a_lt_b(h, height)) {
+ for (int64_t w = wstart; w < wend; w += dilation_w) {
+ if (math::is_a_ge_zero_and_a_lt_b(w, width)) {
+ for (int64_t d = dstart; d < dend; d += dilation_d) {
+ if (math::is_a_ge_zero_and_a_lt_b(d, depth)) {
+ const int64_t input_index = h * width * depth + w * depth + d;
+ y_d[pool_index] += static_cast(std::pow(std::abs(x_d[input_index]), p));
+ }
+ }
+ }
+ }
+ }
+ }
+ y_d[pool_index] = static_cast(std::pow(y_d[pool_index], 1.0f / p));
+ }
+ }
+ }
+ }
+};
+
} // namespace onnxruntime
diff --git a/onnxruntime/test/providers/cpu/nn/lp_pool_test_generator.py b/onnxruntime/test/providers/cpu/nn/lp_pool_test_generator.py
new file mode 100644
index 0000000000..e068784557
--- /dev/null
+++ b/onnxruntime/test/providers/cpu/nn/lp_pool_test_generator.py
@@ -0,0 +1,62 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import numpy as np
+import torch
+from torch import nn
+
+
+# use this code to generate test data for PoolTest.LpPool1d and PoolTest.LpPool2d
+def generate_lppool_1d_test_cases() -> None:
+ p = 2
+ x = np.array(
+ [
+ [
+ [1, 2, 3, 4],
+ ]
+ ]
+ ).astype(np.float32)
+
+ print(x)
+ kernel_sizes = [2, 3]
+ strides = [[1], [2]]
+ for kernel_size in kernel_sizes:
+ for stride in strides:
+ print(kernel_size)
+ print(stride)
+ model = nn.LPPool1d(p, kernel_size=kernel_size, stride=stride)
+ pt_y = model(torch.from_numpy(x))
+ print(torch.flatten(pt_y))
+ print(pt_y.shape)
+
+
+def generate_lppool_2d_test_cases() -> None:
+ p = 2
+ x = np.array(
+ [
+ [
+ [
+ [1, 2, 3, 4],
+ [5, 6, 7, 8],
+ [9, 10, 11, 12],
+ [13, 14, 15, 16],
+ ]
+ ]
+ ]
+ ).astype(np.float32)
+
+ print(x)
+ kernel_sizes = [[2, 2], [3, 3]]
+ strides = [[1, 1], [2, 2]]
+ for kernel_size in kernel_sizes:
+ for stride in strides:
+ model = nn.LPPool2d(p, kernel_size=kernel_size, stride=stride)
+ pt_y = model(torch.from_numpy(x))
+ print(kernel_size)
+ print(stride)
+ print(torch.flatten(pt_y))
+ print(pt_y.shape)
+
+
+generate_lppool_1d_test_cases()
+generate_lppool_2d_test_cases()
diff --git a/onnxruntime/test/providers/cpu/nn/pool_op_test.cc b/onnxruntime/test/providers/cpu/nn/pool_op_test.cc
index c68d9839d2..44f81df407 100644
--- a/onnxruntime/test/providers/cpu/nn/pool_op_test.cc
+++ b/onnxruntime/test/providers/cpu/nn/pool_op_test.cc
@@ -1331,6 +1331,88 @@ TEST(PoolTest, LpPool) {
test.Run();
}
+// test data generated with lp_pool_test_generator.py
+TEST(PoolTest, LpPool1d) {
+ std::vector kernel_sizes[2] = {{2}, {3}};
+ std::vector strides[2] = {{1}, {2}};
+ std::vector ys[4] = {
+ {2.2361f, 3.6056f, 5.0000f},
+ {2.2361f, 5.0000f},
+ {3.7417f, 5.3852f},
+ {3.7417f}};
+ std::vector y_sizes[4] = {
+ {1, 1, 3},
+ {1, 1, 2},
+ {1, 1, 2},
+ {1, 1, 1},
+ };
+ int y_count = 0;
+ for (int kernel_size_count = 0; kernel_size_count < 2; kernel_size_count++)
+ for (int stride_count = 0; stride_count < 2; stride_count++) {
+ OpTester test("LpPool", 18);
+ test.AddAttribute("auto_pad", "");
+ test.AddAttribute("p", static_cast(2));
+ test.AddInput("X", {1, 1, 4}, {1, 2, 3, 4});
+ test.AddAttribute("strides", strides[stride_count]);
+ test.AddAttribute("kernel_shape", kernel_sizes[kernel_size_count]);
+
+ test.AddOutput("Y", y_sizes[y_count], ys[y_count]);
+
+ // https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/classnvinfer1_1_1_i_network_definition.html#a94f434942252e6d98ac17705c06ce060
+ // TensorRT does not support 1d pooling
+ test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
+ y_count++;
+ }
+}
+
+// test data generated with lp_pool_test_generator.py
+TEST(PoolTest, LpPool2d) {
+ std::vector kernel_sizes[2] = {{2, 2}, {3, 3}};
+ std::vector strides[2] = {{1, 1}, {2, 2}};
+ std::vector ys[4] = {
+ {8.1240f, 9.8995f, 11.7473f, 15.5563f, 17.4929f, 19.4422f, 23.3666f, 25.3377f, 27.3130f},
+ {8.1240f, 11.7473f, 23.3666f, 27.3130f},
+ {20.6398f, 23.3024f, 31.6544f, 34.5109f},
+ {20.6398f}};
+ std::vector y_sizes[4] = {
+ {1, 1, 3, 3},
+ {1, 1, 2, 2},
+ {1, 1, 2, 2},
+ {1, 1, 1, 1},
+ };
+ int y_count = 0;
+ for (int kernel_size_count = 0; kernel_size_count < 2; kernel_size_count++)
+ for (int stride_count = 0; stride_count < 2; stride_count++) {
+ OpTester test("LpPool", 18);
+ test.AddAttribute("auto_pad", "");
+ test.AddAttribute("p", static_cast(2));
+ test.AddInput("X", {1, 1, 4, 4},
+ {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
+ test.AddAttribute("strides", strides[stride_count]);
+ test.AddAttribute("kernel_shape", kernel_sizes[kernel_size_count]);
+
+ test.AddOutput("Y", y_sizes[y_count], ys[y_count]);
+ test.Run();
+ y_count++;
+ }
+}
+
+TEST(PoolTest, LpPoolCeilMode) {
+ OpTester test("LpPool", 18);
+
+ test.AddAttribute("auto_pad", "");
+ test.AddAttribute("strides", std::vector{2});
+ test.AddAttribute("kernel_shape", vector{3});
+ test.AddAttribute("ceil_mode", static_cast(1));
+ test.AddAttribute("p", static_cast(1));
+ test.AddInput("X", {1, 1, 4}, {1, 2, 3, 4});
+ test.AddOutput("Y", {1, 1, 2}, {6, 7});
+
+ // https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/classnvinfer1_1_1_i_network_definition.html#a94f434942252e6d98ac17705c06ce060
+ // TensorRT does not support 1d pooling
+ test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
+}
+
TEST(PoolTest, GlobalLpPool) {
OpTester test("GlobalLpPool");
test.AddAttribute("p", static_cast(3));