Add NHWC CONV contrib op (#10506)

This commit is contained in:
Changming Sun 2022-02-10 15:47:49 -08:00 committed by GitHub
parent eba730500f
commit 3185680b6c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 168 additions and 1 deletions

View file

@ -12,3 +12,8 @@ samples/python/training/** @thiagocrepaldi @tlh20 @liqunfu @baijumeswani @Sherlo
# Mobile
/onnxruntime/test/testdata/kernel_def_hashes/ @skottmckay @gwang-msft @YUNQIUGUO @edgchen1
# Contrib Ops
onnxruntime/core/graph/contrib_ops/nhwc_schema_defs.cc @zhanghuanrong @chenfucn @yufenglee @yihonglyu @snnn
onnxruntime/core/graph/contrib_ops/nchwc_schema_defs.cc @zhanghuanrong @chenfucn @yufenglee @yihonglyu @snnn
onnxruntime/core/graph/contrib_ops/quantization_defs.* @zhanghuanrong @chenfucn @yufenglee @yihonglyu @snnn

View file

@ -37,6 +37,7 @@ Do not modify directly.*
* <a href="#com.microsoft.MulInteger">com.microsoft.MulInteger</a>
* <a href="#com.microsoft.MurmurHash3">com.microsoft.MurmurHash3</a>
* <a href="#com.microsoft.NGramRepeatBlock">com.microsoft.NGramRepeatBlock</a>
* <a href="#com.microsoft.NhwcConv">com.microsoft.NhwcConv</a>
* <a href="#com.microsoft.NhwcMaxPool">com.microsoft.NhwcMaxPool</a>
* <a href="#com.microsoft.Pad">com.microsoft.Pad</a>
* <a href="#com.microsoft.QAttention">com.microsoft.QAttention</a>
@ -1823,6 +1824,55 @@ This version of the operator has been available since version 1 of the 'com.micr
</dl>
### <a name="com.microsoft.NhwcConv"></a><a name="com.microsoft.nhwcconv">**com.microsoft.NhwcConv**</a>
#### Version
This version of the operator has been available since version 1 of the 'com.microsoft' operator set.
#### Attributes
<dl>
<dt><tt>auto_pad</tt> : string</dt>
<dd></dd>
<dt><tt>dilations</tt> : list of ints</dt>
<dd>dilation value along each spatial axis of the filter. If not present, the dilation defaults is 1 along each spatial axis.</dd>
<dt><tt>group</tt> : int</dt>
<dd>number of groups input channels and output channels are divided into.</dd>
<dt><tt>kernel_shape</tt> : list of ints</dt>
<dd>The shape of the convolution kernel. If not present, should be inferred from input W.</dd>
<dt><tt>pads</tt> : list of ints</dt>
<dd></dd>
<dt><tt>strides</tt> : list of ints</dt>
<dd>Stride along each spatial axis. If not present, the stride defaults is 1 along each spatial axis.</dd>
</dl>
#### Inputs (2 - 3)
<dl>
<dt><tt>X</tt> : T</dt>
<dd>Input data tensor from previous layer; has size (N x C x H x W), where N is the batch size, C is the number of channels, and H and W are the height and width. Note that this is for the 2D image. Otherwise the size is (N x C x D1 x D2 ... x Dn). Optionally, if dimension denotation is in effect, the operation expects input data tensor to arrive with the dimension denotation of [DATA_BATCH, DATA_CHANNEL, DATA_FEATURE, DATA_FEATURE ...].</dd>
<dt><tt>W</tt> : T</dt>
<dd>The weight tensor that will be used in the convolutions; has size (M x C/group x kH x kW), where C is the number of channels, and kH and kW are the height and width of the kernel, and M is the number of feature maps. For more than 2 dimensions, the kernel shape will be (M x C/group x k1 x k2 x ... x kn), where (k1 x k2 x ... kn) is the dimension of the kernel. Optionally, if dimension denotation is in effect, the operation expects the weight tensor to arrive with the dimension denotation of [FILTER_OUT_CHANNEL, FILTER_IN_CHANNEL, FILTER_SPATIAL, FILTER_SPATIAL ...]. Assuming zero based indices for the shape array, X.shape[1] == (W.shape[1] * group) == C and W.shape[0] mod G == 0. Or in other words FILTER_IN_CHANNEL multiplied by the number of groups should be equal to DATA_CHANNEL and the number of feature maps M should be a multiple of the number of groups G.</dd>
<dt><tt>B</tt> (optional) : T</dt>
<dd>Optional 1D bias to be added to the convolution, has size of M.</dd>
</dl>
#### Outputs
<dl>
<dt><tt>Y</tt> : T</dt>
<dd>Output data tensor that contains the result of the convolution. The output dimensions are functions of the kernel size, stride size, and pad lengths.</dd>
</dl>
#### Type Constraints
<dl>
<dt><tt>T</tt> : tensor(float16), tensor(float), tensor(double)</dt>
<dd>Constrain input and output types to float tensors.</dd>
</dl>
### <a name="com.microsoft.NhwcMaxPool"></a><a name="com.microsoft.nhwcmaxpool">**com.microsoft.NhwcMaxPool**</a>
#### Version

View file

@ -12,6 +12,7 @@ class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, NhwcMaxPool);
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QLinearGlobalAveragePool);
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QLinearAveragePool);
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QLinearConv);
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, NhwcConv);
//Quantization ops
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, DequantizeLinear);
@ -80,6 +81,7 @@ class OpSet_Microsoft_ver1 {
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QLinearGlobalAveragePool)>());
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QLinearAveragePool)>());
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QLinearConv)>());
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, NhwcConv)>());
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, DequantizeLinear)>());
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, DynamicQuantizeLSTM)>());

View file

@ -354,5 +354,115 @@ ONNX_MS_OPERATOR_SET_SCHEMA(QLinearConv, 1,
onnxruntime::contrib::convPoolShapeInferenceNhwc(ctx, true, false, 0, 3);
}
}));
std::function<void(OpSchema&)> ConvOpSchemaGenerator() {
return [=](OpSchema& schema) {
schema.Input(
0,
"X",
"Input data tensor from previous layer; "
"has size (N x C x H x W), where N is the batch size, "
"C is the number of channels, and H and W are the "
"height and width. Note that this is for the 2D image. "
"Otherwise the size is (N x C x D1 x D2 ... x Dn). "
"Optionally, if dimension denotation is "
"in effect, the operation expects input data tensor "
"to arrive with the dimension denotation of [DATA_BATCH, "
"DATA_CHANNEL, DATA_FEATURE, DATA_FEATURE ...].",
"T",
OpSchema::Single,
true,
1,
OpSchema::Differentiable);
schema.Input(
1,
"W",
"The weight tensor that will be used in the "
"convolutions; has size (M x C/group x kH x kW), where C "
"is the number of channels, and kH and kW are the "
"height and width of the kernel, and M is the number "
"of feature maps. For more than 2 dimensions, the "
"kernel shape will be (M x C/group x k1 x k2 x ... x kn), "
"where (k1 x k2 x ... kn) is the dimension of the kernel. "
"Optionally, if dimension denotation is in effect, "
"the operation expects the weight tensor to arrive "
"with the dimension denotation of [FILTER_OUT_CHANNEL, "
"FILTER_IN_CHANNEL, FILTER_SPATIAL, FILTER_SPATIAL ...]. "
"Assuming zero based indices for the shape array, "
"X.shape[1] == (W.shape[1] * group) == C and "
"W.shape[0] mod G == 0. Or in other words "
"FILTER_IN_CHANNEL multiplied by the number of groups "
"should be equal to DATA_CHANNEL and the number of "
"feature maps M should be a multiple of the number of "
"groups G.",
"T",
OpSchema::Single,
true,
1,
OpSchema::Differentiable);
schema.Input(
2,
"B",
"Optional 1D bias to be added to the convolution, has size of M.",
"T",
OpSchema::Optional,
true,
1,
OpSchema::Differentiable);
schema.Output(
0,
"Y",
"Output data tensor that contains the result of the "
"convolution. The output dimensions are functions "
"of the kernel size, stride size, and pad lengths.",
"T",
OpSchema::Single,
true,
1,
OpSchema::Differentiable);
schema.TypeConstraint(
"T",
{"tensor(float16)", "tensor(float)", "tensor(double)"},
"Constrain input and output types to float tensors.");
schema.Attr(
"kernel_shape",
"The shape of the convolution kernel. If not present, should be inferred from input W.",
AttributeProto::INTS,
OPTIONAL_VALUE);
schema.Attr(
"dilations",
"dilation value along each spatial axis of the filter. If not present, the dilation defaults is 1 along each spatial axis.",
AttributeProto::INTS,
OPTIONAL_VALUE);
schema.Attr(
"strides",
"Stride along each spatial axis. If not present, the stride defaults is 1 along each spatial axis.",
AttributeProto::INTS,
OPTIONAL_VALUE);
schema.Attr(
"auto_pad",
"",
AttributeProto::STRING,
std::string("NOTSET"));
schema.Attr("pads", "", AttributeProto::INTS, OPTIONAL_VALUE);
schema.Attr(
"group",
"number of groups input channels and output channels are divided into.",
AttributeProto::INT,
static_cast<int64_t>(1));
schema.TypeAndShapeInferenceFunction([](InferenceContext& ctx) {
propagateElemTypeFromInputToOutput(ctx, 0, 0);
NhwcInferenceContext nhwc_ctx(ctx);
nhwc_ctx.TransposeInputShape();
convPoolShapeInference(nhwc_ctx, true, false, 0, 1);
nhwc_ctx.TransposeOutputShape();
});
};
}
ONNX_MS_OPERATOR_SET_SCHEMA(
NhwcConv,
1,
OpSchema().FillUsing(ConvOpSchemaGenerator()));
} // namespace contrib
} // namespace onnxruntime
} // namespace onnxruntime