From 32c05fcdd12636ef06e2ca3875704183e3da293a Mon Sep 17 00:00:00 2001 From: Thiago Crepaldi Date: Wed, 25 Jan 2023 15:23:00 -0500 Subject: [PATCH] Add Col2Im CPU op (#12311) **Description** This PR implements N-dimensional Col2Im as a contrib CPU Op as specified by ONNX's https://github.com/onnx/onnx/pull/3948 **Motivation and Context** - Col2Im enables models such as: - [SS-DCNet](https://github.com/xhp-hust-2018-2011/SS-DCNet) - [DSTT](https://github.com/ruiliu-ai/DSTT) - It also serves to document the ORT's obscure `math::Col2ImNd` utility Signed-off-by: Liqun Fu Co-authored-by: Liqun Fu --- docs/OperatorKernels.md | 1 + .../providers/cpu/cpu_execution_provider.cc | 2 + .../core/providers/cpu/tensor/col2im.cc | 113 ++++++++++++ .../core/providers/cpu/tensor/col2im.h | 30 ++++ onnxruntime/test/onnx/main.cc | 1 + .../test/providers/cpu/tensor/col2im_test.cc | 169 ++++++++++++++++++ .../onnx_backend_test_series_filters.jsonc | 2 +- 7 files changed, 317 insertions(+), 1 deletion(-) create mode 100644 onnxruntime/core/providers/cpu/tensor/col2im.cc create mode 100644 onnxruntime/core/providers/cpu/tensor/col2im.h create mode 100644 onnxruntime/test/providers/cpu/tensor/col2im_test.cc diff --git a/docs/OperatorKernels.md b/docs/OperatorKernels.md index 27d511c55d..ad571dacb2 100644 --- a/docs/OperatorKernels.md +++ b/docs/OperatorKernels.md @@ -58,6 +58,7 @@ Do not modify directly.* |||12|**T** = tensor(double), tensor(float), tensor(int64), tensor(int8), tensor(uint64), tensor(uint8)| |||11|**T** = tensor(float)| |||[6, 10]|**T** = tensor(float)| +|Col2Im|*in* input:**T**
*in* image_shape:**tensor(int64)**
*in* block_shape:**tensor(int64)**
*out* output:**T**|18+|**T** = tensor(float)| |Compress|*in* input:**T**
*in* condition:**T1**
*out* output:**T**|11+|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(bool)| |||[9, 10]|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(bool)| |Concat|*in* inputs:**T**
*out* concat_result:**T**|13+|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| diff --git a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc index 3bcef3d9ff..75060fbf9f 100644 --- a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc +++ b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc @@ -830,6 +830,7 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, float, ReduceSumSquare); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, double, ReduceSumSquare); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int32_t, ReduceSumSquare); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, Col2Im); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int8_t, BitwiseAnd); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int16_t, BitwiseAnd); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int32_t, BitwiseAnd); @@ -2163,6 +2164,7 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { ReduceSumSquare)>, BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, diff --git a/onnxruntime/core/providers/cpu/tensor/col2im.cc b/onnxruntime/core/providers/cpu/tensor/col2im.cc new file mode 100644 index 0000000000..b2e7d1c8e0 --- /dev/null +++ b/onnxruntime/core/providers/cpu/tensor/col2im.cc @@ -0,0 +1,113 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/providers/cpu/tensor/col2im.h" +#include "core/util/math.h" +#include "core/util/math_cpuonly.h" + +namespace onnxruntime { + +// math::Col2im and math::Col2imNd only support float data type +ONNX_CPU_OPERATOR_KERNEL( + Col2Im, + 18, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Col2Im); + +template +Status Col2Im::Compute(OpKernelContext* context) const { + const auto* col_tensor = context->Input(0); + const auto* image_shape = context->Input(1); + const auto* kernel_shape = context->Input(2); + + size_t image_dim_number = onnxruntime::narrow(image_shape->Shape().Size()); + TensorShapeVector dilations; + if (dilations_.empty()) { + dilations.resize(image_dim_number, 1); + } else { + ORT_ENFORCE(dilations_.size() == image_dim_number, "size of 'dilations' attribute, if provided, should equal to the number of image dimmensions."); + dilations = dilations_; + } + + TensorShapeVector pads; + if (pads_.empty()) { + pads.resize(image_dim_number * 2, 0); + } else { + ORT_ENFORCE(pads_.size() == 2 * image_dim_number, "size of 'pads' attribute, if provided, should equal to twice the number of image dimmensions."); + pads = pads_; + } + + TensorShapeVector strides; + if (strides_.empty()) { + strides.resize(image_dim_number, 1); + } else { + ORT_ENFORCE(strides_.size() == image_dim_number, "size of 'strides' attribute, if provided, should equal to the number of image dimmensions."); + strides = strides_; + } + + int64_t image_shape_size = 1; + int64_t kernel_shape_size = 1; + TensorShapeVector adjusted_kernel_shape_dims; + auto image_dims = image_shape->Data(); + auto kernel_dims = kernel_shape->Data(); + for (size_t i = 0; i < image_dim_number; ++i) { + image_shape_size *= image_dims[i]; + kernel_shape_size *= kernel_dims[i]; + adjusted_kernel_shape_dims.push_back(dilations[i] * (kernel_dims[i] - 1) + 1); + } + TensorShape col_shape = col_tensor->Shape(); + const auto N = col_shape[0]; + const int64_t C = col_shape[1] / kernel_shape_size; + const int64_t col_stride = C * image_shape_size; + TensorShape adjusted_kernel_shape(adjusted_kernel_shape_dims); + const int64_t col_data_stride = col_shape.SizeFromDimension(1); + + TensorShapeVector batched_image_shape_dims, adjusted_image_shape_dims; + batched_image_shape_dims.insert(batched_image_shape_dims.begin(), {N, C}); + for (size_t i = 0; i < image_dim_number; ++i) { + batched_image_shape_dims.push_back(image_dims[i]); + adjusted_image_shape_dims.push_back(image_dims[i] - adjusted_kernel_shape[i] + 1); + } + TensorShape batched_image_shape(batched_image_shape_dims); + T* image_data = context->Output(0, batched_image_shape)->template MutableData(); + + const T* col_data = col_tensor->template Data(); + for (auto image_id = 0; image_id < N; ++image_id) { + if (image_dim_number == 2) { + math::Col2im( + col_data + image_id * col_data_stride, + C, + image_dims[0], + image_dims[1], + kernel_dims[0], + kernel_dims[1], + dilations[0], + dilations[1], + pads[0], + pads[1], + pads[2], + pads[3], + strides[0], + strides[1], + image_data + image_id * col_stride, + &CPUMathUtil::Instance()); + } else { + math::Col2imNd( + col_data + image_id * col_data_stride, + image_dims, + adjusted_image_shape_dims.data(), + kernel_shape_size * C, + image_shape_size * C, + adjusted_kernel_shape.GetDims().data(), + strides.data(), + dilations.data(), + pads.data(), + image_dim_number, + image_data + image_id * col_stride, + &CPUMathUtil::Instance()); + } + } + return Status::OK(); +} + +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/cpu/tensor/col2im.h b/onnxruntime/core/providers/cpu/tensor/col2im.h new file mode 100644 index 0000000000..2f2894a7f2 --- /dev/null +++ b/onnxruntime/core/providers/cpu/tensor/col2im.h @@ -0,0 +1,30 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/framework/op_kernel.h" + +namespace onnxruntime { + +template +class Col2Im final : public OpKernel { + public: + explicit Col2Im(const OpKernelInfo& info) : OpKernel(info) { + if (!info.GetAttrs("strides", strides_).IsOK()) + ORT_ENFORCE(strides_.empty()); + if (!info.GetAttrs("dilations", dilations_).IsOK()) + ORT_ENFORCE(dilations_.empty()); + if (!info.GetAttrs("pads", pads_).IsOK()) + ORT_ENFORCE(pads_.empty()); + } + + Status Compute(OpKernelContext* context) const override; + + private: + TensorShapeVector pads_; + TensorShapeVector dilations_; + TensorShapeVector strides_; +}; + +} // namespace onnxruntime diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc index a7350e517a..d4d093d5a1 100644 --- a/onnxruntime/test/onnx/main.cc +++ b/onnxruntime/test/onnx/main.cc @@ -702,6 +702,7 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); {"test_scatternd_add", "Opset 16 not supported yet."}, {"test_scatternd_multiply", "Opset 16 not supported yet."}, {"test_scatter_elements_with_duplicate_indices", "Opset 16 not supported yet."}, + {"col2im_pads", "onnx 18 test data error."}, #if defined(DISABLE_OPTIONAL_TYPE) {"test_optional_get_element", "Optional type not supported in this build flavor."}, diff --git a/onnxruntime/test/providers/cpu/tensor/col2im_test.cc b/onnxruntime/test/providers/cpu/tensor/col2im_test.cc new file mode 100644 index 0000000000..3a4539024e --- /dev/null +++ b/onnxruntime/test/providers/cpu/tensor/col2im_test.cc @@ -0,0 +1,169 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include +#include "gtest/gtest.h" +#include "test/providers/provider_test_utils.h" + +#include "core/util/math.h" + +namespace onnxruntime { +namespace test { + +namespace { +template +std::vector TransposeSerializedVector(std::vector& input, size_t N, size_t C, size_t H, size_t W) { + size_t input_size = input.size(); + if (input_size == 0) { + throw std::runtime_error("Invalid input"); + } + std::vector trans_vec(input); + + for (size_t n = 0; n < N; ++n) + for (size_t c = 0; c < C; ++c) + for (size_t h = 0; h < H; ++h) + for (size_t w = 0; w < W; ++w) + trans_vec[n * (C * H * W) + c * (H * W) + (h + H * w)] = + input[n * (C * H * W) + c * (H * W) + (w + W * h)]; + + return trans_vec; +} + +} // namespace + +TEST(Col2ImOpTest, Simple4dNCHW) { + OpTester test("Col2Im", 18); + + test.AddAttribute("strides", std::vector{1, 1}); + test.AddAttribute("dilations", std::vector{1, 1}); + test.AddAttribute("pads", std::vector{0, 0, 0, 0}); + + std::vector input(25); + std::vector output(25); + std::iota(output.begin(), output.end(), 1.0f); + + input = TransposeSerializedVector(output, 1, 1, 5, 5); + test.AddInput("input", {1, 5, 5}, input); + test.AddInput("image_shape", {2}, std::vector{5, 5}); + test.AddInput("block_shape", {2}, std::vector{1, 5}); + + test.AddOutput("output", {1, 1, 5, 5}, output); + test.Run(); +} + +TEST(Col2ImOpTest, With2Images3channelsNonSquare4dNCHW) { + OpTester test("Col2Im", 18); + + test.AddAttribute("strides", std::vector{1, 1}); + test.AddAttribute("dilations", std::vector{1, 1}); + test.AddAttribute("pads", std::vector{0, 0, 0, 0}); + + std::vector input(120); + std::vector output(120); + std::iota(output.begin(), output.end(), 1.0f); + input = TransposeSerializedVector(output, 2, 3, 4, 5); + test.AddInput("input", {2, 15, 4}, input); + test.AddInput("image_shape", {2}, std::vector{4, 5}); + test.AddInput("block_shape", {2}, std::vector{1, 5}); + + test.AddOutput("output", {2, 3, 4, 5}, output); + test.Run(); +} + +TEST(Col2ImOpTest, With2Images2channelsNonSquareDilationPadStride4dNCHW) { + OpTester test("Col2Im", 18); + + test.AddAttribute("strides", std::vector{2, 2}); + test.AddAttribute("dilations", std::vector{2, 2}); + test.AddAttribute("pads", std::vector{2, 2, 2, 2}); + + std::vector input{0., 0., 0., 0., 0., 1., 3., 5., 0., 11., 13., 15., 0., 0., 0., 0., + 0., 0., 0., 0., 1., 3., 5., 0., 11., 13., 15., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 21., 23., 25., 0., 31., 33., 35., 0., 0., 0., 0., + 0., 0., 0., 0., 21., 23., 25., 0., 31., 33., 35., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 41., 43., 45., 0., 51., 53., 55., 0., 0., 0., 0., + 0., 0., 0., 0., 41., 43., 45., 0., 51., 53., 55., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 61., 63., 65., 0., 71., 73., 75., 0., 0., 0., 0., + 0., 0., 0., 0., 61., 63., 65., 0., 71., 73., 75., 0., 0., 0., 0., 0.}; + std::vector output{2., 0., 6., 0., 10., + 0., 0., 0., 0., 0., + 22., 0., 26., 0., 30., + 0., 0., 0., 0., 0., + 42., 0., 46., 0., 50., + 0., 0., 0., 0., 0., + 62., 0., 66., 0., 70., + 0., 0., 0., 0., 0., + 82., 0., 86., 0., 90., + 0., 0., 0., 0., 0., + 102., 0., 106., 0., 110., + 0., 0., 0., 0., 0., + 122., 0., 126., 0., 130., + 0., 0., 0., 0., 0., + 142., 0., 146., 0., 150., + 0., 0., 0., 0., 0.}; + test.AddInput("input", {2, 4, 16}, input); + test.AddInput("image_shape", {2}, std::vector{4, 5}); + test.AddInput("block_shape", {2}, std::vector{1, 2}); + + test.AddOutput("output", {2, 2, 4, 5}, output); + test.Run(); +} + +TEST(Col2ImOpTest, With3channels4dNCHW) { + OpTester test("Col2Im", 18); + + test.AddAttribute("strides", std::vector{1, 1}); + test.AddAttribute("dilations", std::vector{1, 1}); + test.AddAttribute("pads", std::vector{0, 0, 0, 0}); + + std::vector input(75); + std::vector output(75); + std::iota(output.begin(), output.end(), 1.0f); + input = TransposeSerializedVector(output, 1, 3, 5, 5); + test.AddInput("input", {1, 15, 5}, input); + test.AddInput("image_shape", {2}, std::vector{5, 5}); + test.AddInput("block_shape", {2}, std::vector{1, 5}); + + test.AddOutput("output", {1, 3, 5, 5}, output); + test.Run(); +} + +TEST(Col2ImOpTest, With2Images3channels4dNCHW) { + OpTester test("Col2Im", 18); + + test.AddAttribute("strides", std::vector{1, 1}); + test.AddAttribute("dilations", std::vector{1, 1}); + test.AddAttribute("pads", std::vector{0, 0, 0, 0}); + + std::vector input(150); + std::vector output(150); + std::iota(output.begin(), output.end(), 1.0f); + input = TransposeSerializedVector(output, 2, 3, 5, 5); + test.AddInput("input", {2, 15, 5}, input); + test.AddInput("image_shape", {2}, std::vector{5, 5}); + test.AddInput("block_shape", {2}, std::vector{1, 5}); + + test.AddOutput("output", {2, 3, 5, 5}, output); + test.Run(); +} + +TEST(Col2ImOpTest, Simple5dNCHWD) { + OpTester test("Col2Im", 18); + + test.AddAttribute("strides", std::vector{1, 1, 1}); + test.AddAttribute("dilations", std::vector{1, 1, 1}); + test.AddAttribute("pads", std::vector{0, 0, 0, 0, 0, 0}); + + std::vector input(25); + std::vector output(25); + std::iota(output.begin(), output.end(), 1.0f); + input = TransposeSerializedVector(output, 1, 1, 5, 5); + test.AddInput("input", {1, 5, 5}, input); + test.AddInput("image_shape", {3}, std::vector{1, 5, 5}); + test.AddInput("block_shape", {3}, std::vector{1, 1, 5}); + test.AddOutput("output", {1, 1, 1, 5, 5}, output); + test.Run(); +} + +} // namespace test +} // namespace onnxruntime diff --git a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc index 6bdfe58cc2..b04c2ca20e 100644 --- a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc +++ b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc @@ -102,6 +102,7 @@ "^test_if_opt", "^test_loop16_seq_none", "^test_identity_opt", + "^test_col2im_pads*", // remove this when using ONNX with this: https://github.com/onnx/onnx/pull/4769 // Following tests are for opset 16 ops and are not yet implemented in ORT "^test_roialign_aligned_*", //GPU failures @@ -118,7 +119,6 @@ "^test_roialign_aligned_*", "^test_clip_default_int8_max_expanded_cpu", "^test_clip_default_int8_min_expanded_cpu", - "^test_col2im_*", "^test_softplus_example_expanded_cpu", "^test_softplus_expanded_cpu", "^test_split_*",