From a82a0907e0afda1b0bd623792699e2a7cd0dc7eb Mon Sep 17 00:00:00 2001 From: linkerzhang Date: Fri, 23 Nov 2018 11:56:14 -0800 Subject: [PATCH] add ops for quantization support. --- onnxruntime/contrib_ops/contrib_ops.cc | 223 +++++++++++++++++++++++++ 1 file changed, 223 insertions(+) diff --git a/onnxruntime/contrib_ops/contrib_ops.cc b/onnxruntime/contrib_ops/contrib_ops.cc index 49c8a133ea..d698cdcbd3 100644 --- a/onnxruntime/contrib_ops/contrib_ops.cc +++ b/onnxruntime/contrib_ops/contrib_ops.cc @@ -104,6 +104,229 @@ The quantization formula is y = (x / y_scale) + y_zero_point. For (x / y_scale), The linear de-quantization operator. It consumes a quantized data, a scale, a zero point and computes the full precision data. The dequantization formula is y = (x - x_zero_point) * x_scale. Scale and zero point must have same shape. They must be either scalar (per tensor) or 1-D tensor (per 'axis').)DOC"); + + const char* auto_pad_doc = + "auto_pad must be either NOTSET, SAME_UPPER, SAME_LOWER or VALID. Where " + "default value is NOTSET, which means explicit padding is used. " + "SAME_UPPER or SAME_LOWER mean pad the input so that the output size match the input." + "In case of odd number add the extra padding at the end for SAME_UPPER and at the " + "beginning for SAME_LOWER. VALID mean no padding."; + + ONNX_CONTRIB_OPERATOR_SCHEMA(QLinearConv) + .SetDomain(kMSDomain) + .SinceVersion(1) + .SetDoc(R"DOC( +The convolution operator consumes a quantized input tensor, its scale and zero point, +a quantized filter, its scale and zero point, and output’s scale and zero point, +and computes the quantized output. Each scale and zero point pair must have same shape. +It means they must be either scalars (per tensor) or 1-D tensors (per channel).)DOC") + .Input( + 0, + "x", + "Input data tensor from previous layer; " + "has size (N x C x H x W), where N is the batch size, " + "C is the number of channels, and H and W are the " + "height and width. Note that this is for the 2D image. " + "Otherwise the size is (N x C x D1 x D2 ... x Dn). " + "Optionally, if dimension denotation is " + "in effect, the operation expects input data tensor " + "to arrive with the dimension denotation of [DATA_BATCH, " + "DATA_CHANNEL, DATA_FEATURE, DATA_FEATURE ...].", + "T1") + .Input(1, "x_scale", "Scale tensor for input ‘x’. It could be a scalar or a 1-D tensor, which means a per-tensor or per-channel quantization. If it’s a 1-D tensor, its number of elements should be equal to the number of channels of input ‘x’.", "T3") + .Input(2, "x_zero_point", "Zero point tensor for input ‘x’. It could be a scalar or a 1-D tensor, which means a per-tensor or per-channel quantization. If it’s a 1-D tensor, its number of elements should be equal to the number of channels of input ‘x’.", "T1") + .Input( + 3, + "w", + "The weight tensor that will be used in the " + "convolutions; has size (M x C/group x kH x kW), where C " + "is the number of channels, and kH and kW are the " + "height and width of the kernel, and M is the number " + "of feature maps. For more than 2 dimensions, the " + "kernel shape will be (M x C/group x k1 x k2 x ... x kn), " + "where (k1 x k2 x ... kn) is the dimension of the kernel. " + "Optionally, if dimension denotation is in effect, " + "the operation expects the weight tensor to arrive " + "with the dimension denotation of [FILTER_OUT_CHANNEL, " + "FILTER_IN_CHANNEL, FILTER_SPATIAL, FILTER_SPATIAL ...]. " + "X.shape[1] == (W.shape[1] * group) == C " + "(assuming zero based indices for the shape array). " + "Or in other words FILTER_IN_CHANNEL should be equal to DATA_CHANNEL. ", + "T1") + .Input(4, "w_scale", "Scale tensor for input ‘w’. It could be a scalar or a 1-D tensor, which means a per-tensor or per-channel quantization. If it’s a 1-D tensor, its number of elements should be equal to the number of channels of input ‘w’.", "T3") + .Input(5, "w_zero_point", "Scale tensor for input ‘w’. It could be a scalar or a 1-D tensor, which means a per-tensor or per-channel quantization. If it’s a 1-D tensor, its number of elements should be equal to the number of channels of input ‘w’.", "T1") + .Input(6, "y_scale", "Scale tensor for output ‘y’. It could be a scalar or a 1-D tensor, which means a per-tensor or per-channel quantization. If it’s a 1-D tensor, its number of elements should be equal to the number of channels of input ‘y’.", "T3") + .Input(7, "y_zero_point", "Scale tensor for output ‘y’. It could be a scalar or a 1-D tensor, which means a per-tensor or per-channel quantization. If it’s a 1-D tensor, its number of elements should be equal to the number of channels of input ‘y’.", "T1") + .Input(8, "B", "Optional 1D bias to be added to the convolution, has size of M.", "T2", OpSchema::Optional) + .Output( + 0, + "y", + "Output data tensor that contains the result of the " + "convolution. The output dimensions are functions " + "of the kernel size, stride size, and pad lengths.", + "T1") + .TypeConstraint( + "T1", + {"tensor(int8)", "tensor(uint8)"}, + "Constrain input, filter, and output types to 8-bit integer tensors.") + .TypeConstraint("T2", {"tensor(int32)", "tensor(uint32)"}, "Constrain bias type to 32-bit integer tensor.") + .TypeConstraint("T3", {"tensor(float)"}, "Constrain scale of input, filter and output to float tensor.") + .Attr( + "auto_pad", + auto_pad_doc, + AttributeProto::STRING, + std::string("NOTSET")) + .Attr( + "kernel_shape", + "The shape of the convolution kernel. If not present, should be inferred from input 'w'.", + AttributeProto::INTS, + OPTIONAL) + .Attr( + "dilations", + "dilation value along each axis of the filter. If not present, the dilation defaults to 1 along each axis.", + AttributeProto::INTS, + OPTIONAL) + .Attr( + "strides", "Stride along each axis. If not present, the stride defaults to 1 along each axis.", AttributeProto::INTS, OPTIONAL) + .Attr("pads", + "Padding for the beginning and ending along each axis, it can take any value greater than or equal to 0." + "The value represent the number of pixels added to the beginning and end part of the corresponding axis." + "`pads` format should be as follow [x1_begin, x2_begin...x1_end, x2_end,...], where xi_begin the number of" + "pixels added at the beginning of axis `i` and xi_end, the number of pixels added at the end of axis `i`." + "This attribute cannot be used simultaneously with auto_pad attribute. If not present, the padding defaults" + "to 0 along start and end of each axis.", + AttributeProto::INTS, OPTIONAL) + .Attr( + "group", + "number of groups input channels and output channels are divided into. default is 1.", + AttributeProto::INT, + static_cast(1)); + + ONNX_CONTRIB_OPERATOR_SCHEMA(ConvInteger) + .SetDomain(kMSDomain) + .SinceVersion(1) + .SetDoc(R"DOC( +The integer convolution operator consumes an input tensor, a filter, and a padding value, + and computes the output. The production MUST never overflow. The accumulation may overflow + if and only if in 32 bits.)DOC") + .Input( + 0, + "x", + "Input data tensor from previous layer; " + "has size (N x C x H x W), where N is the batch size, " + "C is the number of channels, and H and W are the " + "height and width. Note that this is for the 2D image. " + "Otherwise the size is (N x C x D1 x D2 ... x Dn). " + "Optionally, if dimension denotation is " + "in effect, the operation expects input data tensor " + "to arrive with the dimension denotation of [DATA_BATCH, " + "DATA_CHANNEL, DATA_FEATURE, DATA_FEATURE ...].", + "T1") + .Input( + 1, + "w", + "The weight tensor that will be used in the " + "convolutions; has size (M x C/group x kH x kW), where C " + "is the number of channels, and kH and kW are the " + "height and width of the kernel, and M is the number " + "of feature maps. For more than 2 dimensions, the " + "kernel shape will be (M x C/group x k1 x k2 x ... x kn), " + "where (k1 x k2 x ... kn) is the dimension of the kernel. " + "Optionally, if dimension denotation is in effect, " + "the operation expects the weight tensor to arrive " + "with the dimension denotation of [FILTER_OUT_CHANNEL, " + "FILTER_IN_CHANNEL, FILTER_SPATIAL, FILTER_SPATIAL ...]. " + "X.shape[1] == (W.shape[1] * group) == C " + "(assuming zero based indices for the shape array). " + "Or in other words FILTER_IN_CHANNEL should be equal to DATA_CHANNEL. ", + "T2") + .Input(2, "z", "Padding value (zero_point normally), it's optional and default value is 0.", "T1", OpSchema::Optional) + .Output( + 0, + "y", + "Output data tensor that contains the result of the " + "convolution. The output dimensions are functions " + "of the kernel size, stride size, and pad lengths.", + "T1") + .TypeConstraint("T1", {"tensor(int8)", "tensor(uint8)"}, "Constrain input X and Z data types as 8-bit integer tensors") + .TypeConstraint("T2", {"tensor(int8)", "tensor(uint8)"}, "Constrain input W data types as 8-bit integer tensor") + .TypeConstraint("T3", + {"tensor(int32)", "tensor(uint32)"}, + "Constrain output Y data types as 32-bits integer tensors." + "T3 must be tensor(uint32) when both T1 and T2 are tensor(uint8)," + "or must be tensor(int32) when either T1 or T2 is tensor(int8).") + .Attr( + "auto_pad", + auto_pad_doc, + AttributeProto::STRING, + std::string("NOTSET")) + .Attr( + "kernel_shape", + "The shape of the convolution kernel. If not present, should be inferred from input 'w'.", + AttributeProto::INTS, + OPTIONAL) + .Attr( + "dilations", + "dilation value along each axis of the filter. If not present, the dilation defaults to 1 along each axis.", + AttributeProto::INTS, + OPTIONAL) + .Attr( + "strides", "Stride along each axis. If not present, the stride defaults to 1 along each axis.", AttributeProto::INTS, OPTIONAL) + .Attr("pads", + "Padding for the beginning and ending along each axis, it can take any value greater than or equal to 0." + "The value represent the number of pixels added to the beginning and end part of the corresponding axis." + "`pads` format should be as follow [x1_begin, x2_begin...x1_end, x2_end,...], where xi_begin the number of" + "pixels added at the beginning of axis `i` and xi_end, the number of pixels added at the end of axis `i`." + "This attribute cannot be used simultaneously with auto_pad attribute. If not present, the padding defaults" + "to 0 along start and end of each axis.", + AttributeProto::INTS, OPTIONAL) + .Attr( + "group", + "number of groups input channels and output channels are divided into. default is 1.", + AttributeProto::INT, + static_cast(1)); + + ONNX_CONTRIB_OPERATOR_SCHEMA(MatMulInteger) + .SetDomain(kMSDomain) + .SinceVersion(1) + .SetDoc(R"DOC( +Matrix product that behaves like numpy.matmul: https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.matmul.html. + The production MUST never overflow. The accumulation may overflow if and only if in 32 bits.)DOC") + .Input(0, "A", "N-dimensional matrix A", "T1") + .Input(0, "B", "N-dimensional matrix B", "T2") + .Output(0, "Y", "Matrix multiply results from A * B", "T3") + .TypeConstraint("T1", {"tensor(int8)", "tensor(uint8)"}, "Constrain input A data types as 8-bit integer tensor") + .TypeConstraint("T2", {"tensor(int8)", "tensor(uint8)"}, "Constrain input B data types as 8-bit integer tensor") + .TypeConstraint("T3", + {"tensor(int32)", "tensor(uint32)"}, + "Constrain output Y data types as 32-bit integer tensor." + "T3 must be tensor(uint32) when both T1 and T2 are tensor(uint8)," + "or must be tensor(int32) when either T1 or T2 is tensor(int8)."); + + ONNX_CONTRIB_OPERATOR_SCHEMA(ReduceSumInteger) + .SetDomain(kMSDomain) + .SinceVersion(1) + .SetDoc(R"DOC( +Computes the sum of the low-precision input tensor's element along the provided axes. +The resulting tensor has the same rank as the input if keepdims equal 1. If keepdims equal 0, +then the resulting tensor have the reduced dimension pruned. The above behavior is similar to numpy, +with the exception that numpy default keepdims to False instead of True.)DOC") + .Input(0, "data", "An input tensor.", "T1") + .Output(0, "reduced", "Reduced output tensor.", "T2") + .TypeConstraint("T1", {"tensor(int8)", "tensor(uint8)"}, "Constrain input type to 8-bit integer tensor.") + .TypeConstraint("T2", + {"tensor(int32)", "tensor(uint32)"}, + "Constrain output data type to 32-bit integer tensor." + "T2 must be tensor(uint32) when T1 is tensor(uint8)," + "or must be tensor(int32) when T1 is tensor(int8).") + .Attr( + "axes", + "A list of integers, along which to reduce. The default is to reduce over all the dimensions of the input tensor.", + AttributeProto::INTS) + .Attr( + "keepdims", + "Keep the reduced dimension or not, default 1 mean keep reduced dimension.", + AttributeProto::INT); } class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, SampleOp);