From 3185680b6c8f5be6aafd2001f2d0ce74299a0569 Mon Sep 17 00:00:00 2001 From: Changming Sun Date: Thu, 10 Feb 2022 15:47:49 -0800 Subject: [PATCH] Add NHWC CONV contrib op (#10506) --- CODEOWNERS | 5 + docs/ContribOperators.md | 50 ++++++++ onnxruntime/core/graph/contrib_ops/ms_opset.h | 2 + .../graph/contrib_ops/nhwc_schema_defs.cc | 112 +++++++++++++++++- 4 files changed, 168 insertions(+), 1 deletion(-) diff --git a/CODEOWNERS b/CODEOWNERS index 59e103be91..2274b0a848 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -12,3 +12,8 @@ samples/python/training/** @thiagocrepaldi @tlh20 @liqunfu @baijumeswani @Sherlo # Mobile /onnxruntime/test/testdata/kernel_def_hashes/ @skottmckay @gwang-msft @YUNQIUGUO @edgchen1 + +# Contrib Ops +onnxruntime/core/graph/contrib_ops/nhwc_schema_defs.cc @zhanghuanrong @chenfucn @yufenglee @yihonglyu @snnn +onnxruntime/core/graph/contrib_ops/nchwc_schema_defs.cc @zhanghuanrong @chenfucn @yufenglee @yihonglyu @snnn +onnxruntime/core/graph/contrib_ops/quantization_defs.* @zhanghuanrong @chenfucn @yufenglee @yihonglyu @snnn \ No newline at end of file diff --git a/docs/ContribOperators.md b/docs/ContribOperators.md index efbdbb52a2..d9092cd185 100644 --- a/docs/ContribOperators.md +++ b/docs/ContribOperators.md @@ -37,6 +37,7 @@ Do not modify directly.* * com.microsoft.MulInteger * com.microsoft.MurmurHash3 * com.microsoft.NGramRepeatBlock + * com.microsoft.NhwcConv * com.microsoft.NhwcMaxPool * com.microsoft.Pad * com.microsoft.QAttention @@ -1823,6 +1824,55 @@ This version of the operator has been available since version 1 of the 'com.micr +### **com.microsoft.NhwcConv** + +#### Version + +This version of the operator has been available since version 1 of the 'com.microsoft' operator set. + +#### Attributes + +
+
auto_pad : string
+
+
dilations : list of ints
+
dilation value along each spatial axis of the filter. If not present, the dilation defaults is 1 along each spatial axis.
+
group : int
+
number of groups input channels and output channels are divided into.
+
kernel_shape : list of ints
+
The shape of the convolution kernel. If not present, should be inferred from input W.
+
pads : list of ints
+
+
strides : list of ints
+
Stride along each spatial axis. If not present, the stride defaults is 1 along each spatial axis.
+
+ +#### Inputs (2 - 3) + +
+
X : T
+
Input data tensor from previous layer; has size (N x C x H x W), where N is the batch size, C is the number of channels, and H and W are the height and width. Note that this is for the 2D image. Otherwise the size is (N x C x D1 x D2 ... x Dn). Optionally, if dimension denotation is in effect, the operation expects input data tensor to arrive with the dimension denotation of [DATA_BATCH, DATA_CHANNEL, DATA_FEATURE, DATA_FEATURE ...].
+
W : T
+
The weight tensor that will be used in the convolutions; has size (M x C/group x kH x kW), where C is the number of channels, and kH and kW are the height and width of the kernel, and M is the number of feature maps. For more than 2 dimensions, the kernel shape will be (M x C/group x k1 x k2 x ... x kn), where (k1 x k2 x ... kn) is the dimension of the kernel. Optionally, if dimension denotation is in effect, the operation expects the weight tensor to arrive with the dimension denotation of [FILTER_OUT_CHANNEL, FILTER_IN_CHANNEL, FILTER_SPATIAL, FILTER_SPATIAL ...]. Assuming zero based indices for the shape array, X.shape[1] == (W.shape[1] * group) == C and W.shape[0] mod G == 0. Or in other words FILTER_IN_CHANNEL multiplied by the number of groups should be equal to DATA_CHANNEL and the number of feature maps M should be a multiple of the number of groups G.
+
B (optional) : T
+
Optional 1D bias to be added to the convolution, has size of M.
+
+ +#### Outputs + +
+
Y : T
+
Output data tensor that contains the result of the convolution. The output dimensions are functions of the kernel size, stride size, and pad lengths.
+
+ +#### Type Constraints + +
+
T : tensor(float16), tensor(float), tensor(double)
+
Constrain input and output types to float tensors.
+
+ + ### **com.microsoft.NhwcMaxPool** #### Version diff --git a/onnxruntime/core/graph/contrib_ops/ms_opset.h b/onnxruntime/core/graph/contrib_ops/ms_opset.h index 1b45809d19..b82a3b5292 100644 --- a/onnxruntime/core/graph/contrib_ops/ms_opset.h +++ b/onnxruntime/core/graph/contrib_ops/ms_opset.h @@ -12,6 +12,7 @@ class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, NhwcMaxPool); class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QLinearGlobalAveragePool); class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QLinearAveragePool); class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QLinearConv); +class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, NhwcConv); //Quantization ops class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, DequantizeLinear); @@ -80,6 +81,7 @@ class OpSet_Microsoft_ver1 { fn(GetOpSchema()); fn(GetOpSchema()); fn(GetOpSchema()); + fn(GetOpSchema()); fn(GetOpSchema()); fn(GetOpSchema()); diff --git a/onnxruntime/core/graph/contrib_ops/nhwc_schema_defs.cc b/onnxruntime/core/graph/contrib_ops/nhwc_schema_defs.cc index a5a76364bc..0b910440c6 100644 --- a/onnxruntime/core/graph/contrib_ops/nhwc_schema_defs.cc +++ b/onnxruntime/core/graph/contrib_ops/nhwc_schema_defs.cc @@ -354,5 +354,115 @@ ONNX_MS_OPERATOR_SET_SCHEMA(QLinearConv, 1, onnxruntime::contrib::convPoolShapeInferenceNhwc(ctx, true, false, 0, 3); } })); +std::function ConvOpSchemaGenerator() { + return [=](OpSchema& schema) { + schema.Input( + 0, + "X", + "Input data tensor from previous layer; " + "has size (N x C x H x W), where N is the batch size, " + "C is the number of channels, and H and W are the " + "height and width. Note that this is for the 2D image. " + "Otherwise the size is (N x C x D1 x D2 ... x Dn). " + "Optionally, if dimension denotation is " + "in effect, the operation expects input data tensor " + "to arrive with the dimension denotation of [DATA_BATCH, " + "DATA_CHANNEL, DATA_FEATURE, DATA_FEATURE ...].", + "T", + OpSchema::Single, + true, + 1, + OpSchema::Differentiable); + schema.Input( + 1, + "W", + "The weight tensor that will be used in the " + "convolutions; has size (M x C/group x kH x kW), where C " + "is the number of channels, and kH and kW are the " + "height and width of the kernel, and M is the number " + "of feature maps. For more than 2 dimensions, the " + "kernel shape will be (M x C/group x k1 x k2 x ... x kn), " + "where (k1 x k2 x ... kn) is the dimension of the kernel. " + "Optionally, if dimension denotation is in effect, " + "the operation expects the weight tensor to arrive " + "with the dimension denotation of [FILTER_OUT_CHANNEL, " + "FILTER_IN_CHANNEL, FILTER_SPATIAL, FILTER_SPATIAL ...]. " + "Assuming zero based indices for the shape array, " + "X.shape[1] == (W.shape[1] * group) == C and " + "W.shape[0] mod G == 0. Or in other words " + "FILTER_IN_CHANNEL multiplied by the number of groups " + "should be equal to DATA_CHANNEL and the number of " + "feature maps M should be a multiple of the number of " + "groups G.", + "T", + OpSchema::Single, + true, + 1, + OpSchema::Differentiable); + schema.Input( + 2, + "B", + "Optional 1D bias to be added to the convolution, has size of M.", + "T", + OpSchema::Optional, + true, + 1, + OpSchema::Differentiable); + schema.Output( + 0, + "Y", + "Output data tensor that contains the result of the " + "convolution. The output dimensions are functions " + "of the kernel size, stride size, and pad lengths.", + "T", + OpSchema::Single, + true, + 1, + OpSchema::Differentiable); + schema.TypeConstraint( + "T", + {"tensor(float16)", "tensor(float)", "tensor(double)"}, + "Constrain input and output types to float tensors."); + schema.Attr( + "kernel_shape", + "The shape of the convolution kernel. If not present, should be inferred from input W.", + AttributeProto::INTS, + OPTIONAL_VALUE); + schema.Attr( + "dilations", + "dilation value along each spatial axis of the filter. If not present, the dilation defaults is 1 along each spatial axis.", + AttributeProto::INTS, + OPTIONAL_VALUE); + schema.Attr( + "strides", + "Stride along each spatial axis. If not present, the stride defaults is 1 along each spatial axis.", + AttributeProto::INTS, + OPTIONAL_VALUE); + schema.Attr( + "auto_pad", + "", + AttributeProto::STRING, + std::string("NOTSET")); + schema.Attr("pads", "", AttributeProto::INTS, OPTIONAL_VALUE); + schema.Attr( + "group", + "number of groups input channels and output channels are divided into.", + AttributeProto::INT, + static_cast(1)); + schema.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { + propagateElemTypeFromInputToOutput(ctx, 0, 0); + NhwcInferenceContext nhwc_ctx(ctx); + nhwc_ctx.TransposeInputShape(); + convPoolShapeInference(nhwc_ctx, true, false, 0, 1); + nhwc_ctx.TransposeOutputShape(); + }); + }; +} + +ONNX_MS_OPERATOR_SET_SCHEMA( + NhwcConv, + 1, + OpSchema().FillUsing(ConvOpSchemaGenerator())); } // namespace contrib -} // namespace onnxruntime \ No newline at end of file +} // namespace onnxruntime +