Add schema for new Qops (#2611)

* Add schema for new Qops

* adding shape inference + qlinearaveragepool

* plus review comments

* plus review comments

* updates per review comments

* plus review comments
This commit is contained in:
Ashwini Khade 2019-12-23 10:19:28 -08:00 committed by GitHub
parent 71940c0915
commit cd6ff42442
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -162,7 +162,6 @@ void convTransposeWithDynamicPadsShapeInference(InferenceContext& ctx) {
return;
}
}
} // namespace ONNX_NAMESPACE
namespace onnxruntime {
@ -202,6 +201,126 @@ void NchwcGlobalPoolOpSchemaGenerator(OpSchema& schema) {
});
}
void ValidateTypeAndShapeForScaleAndZP(ONNX_NAMESPACE::InferenceContext& ctx, int index, ::google::protobuf::int32 expectedType, bool isScalar, int expectedTensorSize = 0) {
if (ctx.getNumInputs() > static_cast<size_t>(index)) {
auto data_type = ctx.getInputType(index);
if (nullptr == data_type || data_type->value_case() != ONNX_NAMESPACE::TypeProto::kTensorType ||
data_type->tensor_type().elem_type() != expectedType) {
fail_type_inference(
"Input data type does not match the expected data type. Current data type is ", data_type->tensor_type().elem_type());
}
}
if (hasInputShape(ctx, index)) {
ONNX_NAMESPACE::TensorShapeProto shape = ctx.getInputType(index)->tensor_type().shape();
if (isScalar) {
if (shape.dim_size() != 0) {
fail_type_inference("Scale and Zero-point must be a scalar");
}
} else {
if (shape.dim_size() != 1) {
fail_type_inference("Scale and Zero-point must be of rank 1");
}
if (shape.dim((int)0).has_dim_value() && shape.dim((int)0).dim_value() != expectedTensorSize) {
fail_type_inference(
"Scale and Zero-point must be of rank 1 and the number of elements should be equal to the number of rows of the corresponding input.");
}
}
}
}
std::function<void(OpSchema&)> QLinearMathDocGenerator(const char* name, const char* additionalDocumentation) {
return [=](OpSchema& schema) {
std::string doc = R"DOC(
Performs element-wise binary {name} on 8 bit data types (with Numpy-style broadcasting support).
{additionalDocumentation}
)DOC";
ONNX_NAMESPACE::ReplaceAll(doc, "{name}", name);
ONNX_NAMESPACE::ReplaceAll(doc, "{additionalDocumentation}", additionalDocumentation);
schema.SetDoc(doc);
schema.Input(0, "A", "First operand.", "T");
schema.Input(
1,
"A_scale",
"Input A's scale. It's a scalar, which means a per-tensor/layer quantization.",
"tensor(float)");
schema.Input(
2,
"A_zero_point",
"Input A zero point. Default value is 0 if it's not specified. It's a scalar, which means a per-tensor/layer quantization.",
"T",
OpSchema::Optional);
schema.Input(3, "B", "Second operand.", "T");
schema.Input(
4,
"B_scale",
"Input B's scale. It's a scalar, which means a per-tensor/layer quantization.",
"tensor(float)");
schema.Input(
5,
"B_zero_point",
"Input B zero point. Default value is 0 if it's not specified. It's a scalar, which means a per-tensor/layer quantization.",
"T",
OpSchema::Optional);
schema.Input(
6,
"C_scale",
"Output scale. It's a scalar, which means a per-tensor/layer quantization.",
"tensor(float)");
schema.Input(
7,
"C_zero_point",
"Output zero point. Default value is 0 if it's not specified. It's a scalar, which means a per-tensor/layer quantization.",
"T",
OpSchema::Optional);
schema.Output(0, "C", "Result, has same element type as two inputs", "T");
schema.TypeConstraint("T", {"tensor(uint8)", "tensor(int8)"}, "Constrain input and output types to 8 bit signed and unsigned tensors.");
schema.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
propagateElemTypeFromInputToOutput(ctx, 0, 0);
auto a_type = ctx.getInputType(0);
auto b_type = ctx.getInputType(3);
if (nullptr == a_type || nullptr == b_type ||
a_type->value_case() != ONNX_NAMESPACE::TypeProto::kTensorType ||
b_type->value_case() != ONNX_NAMESPACE::TypeProto::kTensorType) {
fail_type_inference("inputs are expected to have tensor type.");
}
// validate scale and zero points
ValidateTypeAndShapeForScaleAndZP(ctx, 1, ONNX_NAMESPACE::TensorProto::FLOAT, true);
ValidateTypeAndShapeForScaleAndZP(ctx, 2, a_type->tensor_type().elem_type(), true);
ValidateTypeAndShapeForScaleAndZP(ctx, 4, ONNX_NAMESPACE::TensorProto::FLOAT, true);
ValidateTypeAndShapeForScaleAndZP(ctx, 5, b_type->tensor_type().elem_type(), true);
ValidateTypeAndShapeForScaleAndZP(ctx, 6, ONNX_NAMESPACE::TensorProto::FLOAT, true);
ValidateTypeAndShapeForScaleAndZP(ctx, 7, a_type->tensor_type().elem_type(), true);
if (hasInputShape(ctx, 0) && hasInputShape(ctx, 3))
bidirectionalBroadcastShapeInference(
ctx.getInputType(0)->tensor_type().shape(),
ctx.getInputType(3)->tensor_type().shape(),
*ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape());
});
};
}
const char* contrib_ops_pads_doc =
"Padding for the beginning and ending along each spatial axis, it can take any value greater "
"than or equal to 0. The value represent the number of pixels added to the beginning "
"and end part of the corresponding axis. `pads` format should be as follow "
"[x1_begin, x2_begin...x1_end, x2_end,...], where xi_begin the number of pixels "
"added at the beginning of axis `i` and xi_end, the number of pixels added at "
"the end of axis `i`. This attribute cannot be used simultaneously with "
"auto_pad attribute. If not present, the padding defaults to 0 along start and end of each spatial axis.";
const char* contrib_ops_auto_pad_doc =
"auto_pad must be either NOTSET, SAME_UPPER, SAME_LOWER or VALID. Where "
"default value is NOTSET, which means explicit padding is used. "
"SAME_UPPER or SAME_LOWER mean pad the input so that the output spatial size match the input."
"In case of odd number add the extra padding at the end for SAME_UPPER and at the "
"beginning for SAME_LOWER. VALID mean no padding.";
void RegisterNchwcSchemas() {
ONNX_CONTRIB_OPERATOR_SCHEMA(ReorderInput)
.SetDomain(kMSNchwcDomain)
@ -1541,6 +1660,307 @@ with the exception that numpy default keepdims to False instead of True.)DOC")
"Keep the reduced dimension or not, default 1 mean keep reduced dimension.",
AttributeProto::INT);
ONNX_CONTRIB_OPERATOR_SCHEMA(QLinearAdd)
.SetDomain(kMSDomain)
.SinceVersion(1)
.FillUsing(QLinearMathDocGenerator("addition",
"C = (A_scale * (A - A_zero_point) + B_scale * (B - B_zero_point))/C_scale + C_zero_point"));
ONNX_CONTRIB_OPERATOR_SCHEMA(QLinearMul)
.SetDomain(kMSDomain)
.SinceVersion(1)
.FillUsing(QLinearMathDocGenerator("multiplication",
"C = ((A - A_zero_point) * (B - B_zero_point)) * (A_scale * B_scale)/C_scale + C_zero_point"));
ONNX_CONTRIB_OPERATOR_SCHEMA(QLinearReduceMean)
.SetDomain(kMSDomain)
.SinceVersion(1)
.SetDoc(R"DOC(
Computes the mean of the low-precision input tensor's element along the provided axes.
The resulting tensor has the same rank as the input if keepdims equal 1. If keepdims equal 0,
then the resulting tensor have the reduced dimension pruned. The above behavior is similar to numpy,
with the exception that numpy default keepdims to False instead of True.
Input and Output scales and zero points are used to requantize the output in a new range.
This helps to improve accuracy as after ReduceMean operation the range of the output is expected to decrease.
```
"Output = Dequantize(Input) -> ReduceMean on fp32 data -> Quantize(output)",
```
)DOC")
.Input(0, "data", "An input tensor.", "T")
.Input(
1,
"data_scale",
"Input scale. It's a scalar, which means a per-tensor/layer quantization.",
"tensor(float)")
.Input(
2,
"data_zero_point",
"Input zero point. Default value is 0 if it's not specified. It's a scalar, which means a per-tensor/layer quantization.",
"T",
OpSchema::Optional)
.Input(
3,
"reduced_scale",
"Output scale. It's a scalar, which means a per-tensor/layer quantization.",
"tensor(float)")
.Input(
4,
"reduced_zero_point",
"Output zero point. Default value is 0 if it's not specified. It's a scalar, which means a per-tensor/layer quantization.",
"T",
OpSchema::Optional)
.Output(0, "reduced", "Reduced output tensor.", "T")
.TypeConstraint("T", {"tensor(uint8)", "tensor(int8)"}, "Constrain input types to 8 bit signed and unsigned tensors.")
.Attr(
"axes",
"A list of integers, along which to reduce. The default is to reduce over all the dimensions of the input tensor.",
AttributeProto::INTS)
.Attr(
"keepdims",
"Keep the reduced dimension or not, default 1 mean keep reduced dimension.",
AttributeProto::INT)
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
propagateElemTypeFromInputToOutput(ctx, 0, 0);
if (!hasNInputShapes(ctx, 1)) {
return;
}
auto data_type = ctx.getInputType(0);
if (nullptr == data_type || data_type->value_case() != ONNX_NAMESPACE::TypeProto::kTensorType) {
fail_type_inference("inputs are expected to have tensor type.");
}
// validate scale and zero points
ValidateTypeAndShapeForScaleAndZP(ctx, 1, ONNX_NAMESPACE::TensorProto::FLOAT, true);
ValidateTypeAndShapeForScaleAndZP(ctx, 2, data_type->tensor_type().elem_type(), true);
ValidateTypeAndShapeForScaleAndZP(ctx, 3, ONNX_NAMESPACE::TensorProto::FLOAT, true);
ValidateTypeAndShapeForScaleAndZP(ctx, 4, data_type->tensor_type().elem_type(), true);
int64_t keep_dims = 1;
auto attr_proto = ctx.getAttribute("keepdims");
if (attr_proto) {
keep_dims = attr_proto->i();
}
auto& input_shape = ctx.getInputType(0)->tensor_type().shape();
int64_t input_ndim = input_shape.dim_size();
auto output_shape =
ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape();
std::vector<int64_t> axes;
auto axes_proto = ctx.getAttribute("axes");
if (axes_proto)
axes.assign(axes_proto->ints().begin(), axes_proto->ints().end());
for (size_t i = 0; i < axes.size(); ++i) {
if (axes[i] < -input_ndim || axes[i] >= input_ndim) {
fail_shape_inference(
"axis must be in [-rank, rank-1]. input rank was ", input_ndim);
}
if (axes[i] < 0)
axes[i] += input_ndim;
}
// do we need to handle negative axis?
for (int i = 0; i < input_ndim; ++i) {
// axes empty means reduce all dim
if (!axes.empty() &&
std::find(axes.begin(), axes.end(), i) == axes.end()) {
auto dim = output_shape->add_dim();
dim->CopyFrom(input_shape.dim(i));
} else {
if (keep_dims == 1) {
auto dim = output_shape->add_dim();
dim->set_dim_value(1);
}
}
}
});
ONNX_CONTRIB_OPERATOR_SCHEMA(MulInteger)
.SetDomain(kMSDomain)
.SinceVersion(1)
.SetDoc(R"DOC(Performs element-wise binary quantized multiplication (with Numpy-style broadcasting support).
"This operator supports **multidirectional (i.e., Numpy-style) broadcasting**"
The output of this op is the int32 accumulated result of the mul operation
```
C (int32) = (A - A_zero_point) * (B - B_zero_point)
```
)DOC")
.Input(0, "A", "First operand.", "T")
.Input(
1,
"A_zero_point",
"Input A zero point. Default value is 0 if it's not specified. It's a scalar, which means a per-tensor/layer quantization.",
"T",
OpSchema::Optional)
.Input(2, "B", "Second operand.", "T")
.Input(
3,
"B_zero_point",
"Input B zero point. Default value is 0 if it's not specified. It's a scalar, which means a per-tensor/layer quantization.",
"T",
OpSchema::Optional)
.Output(0, "C", "Constrain output to 32 bit tensor", "T1")
.TypeConstraint("T", {"tensor(uint8)", "tensor(int8)"}, "Constrain input types to 8 bit signed and unsigned tensors.")
.TypeConstraint("T1", {"tensor(int32)"}, "Constrain output types to 32 bit tensors.")
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
auto c_type = ctx.getOutputType(0);
c_type->mutable_tensor_type()->set_elem_type(
ONNX_NAMESPACE::TensorProto::INT32);
auto a_type = ctx.getInputType(0);
auto b_type = ctx.getInputType(3);
if (nullptr == a_type || nullptr == b_type ||
a_type->value_case() != ONNX_NAMESPACE::TypeProto::kTensorType ||
b_type->value_case() != ONNX_NAMESPACE::TypeProto::kTensorType) {
fail_type_inference("inputs are expected to have tensor type.");
}
ValidateTypeAndShapeForScaleAndZP(ctx, 1, a_type->tensor_type().elem_type(), true);
ValidateTypeAndShapeForScaleAndZP(ctx, 3, b_type->tensor_type().elem_type(), true);
if (hasInputShape(ctx, 0) && hasInputShape(ctx, 2)) {
bidirectionalBroadcastShapeInference(
ctx.getInputType(0)->tensor_type().shape(),
ctx.getInputType(2)->tensor_type().shape(),
*ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape());
}
});
const char* QLinearAveragePoolDoc_ver1 = R"DOC(
QLinearAveragePool consumes an input tensor X and applies average pooling across
the tensor according to kernel sizes, stride sizes, and pad lengths.
average pooling consisting of computing the average on all values of a
subset of the input tensor according to the kernel size and downsampling the
data into the output tensor Y for further processing. The output spatial shape will be following:
```
output_spatial_shape[i] = floor((input_spatial_shape[i] + pad_shape[i] - kernel_spatial_shape[i]) / strides_spatial_shape[i] + 1)
```
or
```
output_spatial_shape[i] = ceil((input_spatial_shape[i] + pad_shape[i] - kernel_spatial_shape[i]) / strides_spatial_shape[i] + 1)
```
if ceil_mode is enabled
```
* pad_shape[i] is sum of pads along axis i
```
`auto_pad` is a DEPRECATED attribute. If you are using them currently, the output spatial shape will be following:
```
VALID: output_spatial_shape[i] = ceil((input_spatial_shape[i] - kernel_spatial_shape[i] + 1) / strides_spatial_shape[i])
SAME_UPPER or SAME_LOWER: output_spatial_shape[i] = ceil(input_spatial_shape[i] / strides_spatial_shape[i])
```
And pad shape will be following if `SAME_UPPER` or `SAME_LOWER`:
```
pad_shape[i] = (output_spatial_shape[i] - 1) * strides_spatial_shape[i] + kernel_spatial_shape[i] - input_spatial_shape[i]
```
The output of each pooling window is divided by the number of elements (exclude pad when attribute count_include_pad is zero).
Input and output scales and zero points are used to convert the output to a new quantization range.
Output = Dequantize(Input) -> AveragePool on fp32 data -> Quantize(output)
)DOC";
ONNX_CONTRIB_OPERATOR_SCHEMA(QLinearAveragePool)
.SetDomain(kMSDomain)
.SinceVersion(1)
.SetDoc(QLinearAveragePoolDoc_ver1)
.Attr(
"count_include_pad",
"Whether include pad pixels when calculating values for the edges. Default is 0, doesn't count include pad.",
AttributeProto::INT,
static_cast<int64_t>(0))
.Attr(
"kernel_shape",
"The size of the kernel along each axis.",
AttributeProto::INTS)
.Attr(
"strides",
"Stride along each spatial axis. If not present, the stride defaults to 1 along each spatial axis.",
AttributeProto::INTS,
OPTIONAL)
.Attr(
"auto_pad",
contrib_ops_auto_pad_doc,
AttributeProto::STRING,
std::string("NOTSET"))
.Attr("pads", contrib_ops_pads_doc, AttributeProto::INTS, OPTIONAL)
.Attr(
"ceil_mode",
"Whether to use ceil or floor (default) to compute the output shape.",
AttributeProto::INT,
static_cast<int64_t>(0))
.Input(
0,
"X",
"Input data tensor from the previous operator; "
"dimensions for image case are (N x C x H x W), "
"where N is the batch size, C is the number of "
"channels, and H and W are the height and the "
"width of the data. For non image case, the "
"dimensions are in the form of "
"(N x C x D1 x D2 ... Dn), where N is the batch "
"size. Optionally, if dimension denotation is "
"in effect, the operation expects the input "
"data tensor to arrive with the dimension denotation "
"of [DATA_BATCH, DATA_CHANNEL, DATA_FEATURE, DATA_FEATURE ...].",
"T")
.Input(
1,
"x_scale",
"Input scale. It's a scalar, which means a per-tensor/layer quantization.",
"tensor(float)")
.Input(
2,
"x_zero_point",
"Input zero point. Default value is 0 if it's not specified. It's a scalar, which means a per-tensor/layer quantization.",
"T",
OpSchema::Optional)
.Input(
3,
"y_scale",
"Output scale. It's a scalar, which means a per-tensor/layer quantization.",
"tensor(float)")
.Input(
4,
"y_zero_point",
"Output zero point. Default value is 0 if it's not specified. It's a scalar, which means a per-tensor/layer quantization.",
"T",
OpSchema::Optional)
.Output(
0,
"Y",
"Output data tensor from average or max pooling across "
"the input tensor. Dimensions will vary based "
"on various kernel, stride, and pad sizes. Floor value of "
"the dimension is used",
"T")
.TypeConstraint(
"T",
{"tensor(uint8)", "tensor(int8)"},
"Constrain input and output types to 8 bit tensors.")
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
ONNX_NAMESPACE::propagateElemTypeFromInputToOutput(ctx, 0, 0);
auto data_type = ctx.getInputType(0);
if (nullptr == data_type || data_type->value_case() != ONNX_NAMESPACE::TypeProto::kTensorType) {
fail_type_inference("inputs are expected to have tensor type.");
}
// validate scale and zero points
ValidateTypeAndShapeForScaleAndZP(ctx, 1, ONNX_NAMESPACE::TensorProto::FLOAT, true);
ValidateTypeAndShapeForScaleAndZP(ctx, 2, data_type->tensor_type().elem_type(), true);
ValidateTypeAndShapeForScaleAndZP(ctx, 3, ONNX_NAMESPACE::TensorProto::FLOAT, true);
ValidateTypeAndShapeForScaleAndZP(ctx, 4, data_type->tensor_type().elem_type(), true);
ONNX_NAMESPACE::convPoolShapeInference(ctx, false, true, 0, 5);
});
ONNX_CONTRIB_OPERATOR_SCHEMA(MurmurHash3)
.SetDomain(kMSDomain)
.SinceVersion(1)