mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-30 03:37:44 +00:00
QLinearConv (#370)
* First draft QLinearConv * Add shape inference for quantized conv operators * adding test cases for QLinearConv * plus minor corrections
This commit is contained in:
parent
5ef4c90f1d
commit
b92bc99861
6 changed files with 603 additions and 3 deletions
|
|
@ -28,6 +28,7 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MatMu
|
|||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, ConvInteger);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, ROIAlign);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double, ROIAlign);
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, QLinearConv);
|
||||
|
||||
void RegisterContribKernels(KernelRegistry& kernel_registry) {
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, SampleOp)>());
|
||||
|
|
@ -54,6 +55,7 @@ void RegisterContribKernels(KernelRegistry& kernel_registry) {
|
|||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, ConvInteger)>());
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, ROIAlign)>());
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double, ROIAlign)>());
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, QLinearConv)>());
|
||||
}
|
||||
|
||||
} // namespace contrib
|
||||
|
|
|
|||
|
|
@ -91,6 +91,134 @@ void matmulShapeInference(ONNX_NAMESPACE::InferenceContext& ctx, int input1Idx,
|
|||
*ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape() = resultShape;
|
||||
}
|
||||
|
||||
void convPoolShapeInference(
|
||||
ONNX_NAMESPACE::InferenceContext& ctx,
|
||||
bool use_dilation,
|
||||
bool require_kernel_shape,
|
||||
int input1Idx, int input2Idx) {
|
||||
if (!hasInputShape(ctx, input1Idx)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// if kernel shape is an input (and not attribute)
|
||||
// we need the shape of the second input.
|
||||
if (!require_kernel_shape && !hasNInputShapes(ctx, input2Idx)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// don't bother with legacy auto_pad for now
|
||||
if (ctx.getAttribute("auto_pad")) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto input_shape = ctx.getInputType(input1Idx)->tensor_type().shape();
|
||||
if (input_shape.dim_size() < 2) {
|
||||
fail_shape_inference("Input tensor must have atleast 2 dimensions");
|
||||
}
|
||||
|
||||
// first dim is the batch axis and the next is the number of channels.
|
||||
size_t n_input_dims = static_cast<size_t>(input_shape.dim_size() - 2);
|
||||
|
||||
// Pooling operations don't support dilation, only Conv. For
|
||||
// simplicity of the code, we just treat them as having all-1s
|
||||
// dilation.
|
||||
std::vector<int64_t> dilations;
|
||||
if (use_dilation && getRepeatedAttribute(ctx, "dilations", dilations)) {
|
||||
if (dilations.size() != n_input_dims) {
|
||||
fail_shape_inference("Attribute dilations has incorrect size");
|
||||
}
|
||||
} else {
|
||||
dilations.assign(n_input_dims, 1);
|
||||
}
|
||||
|
||||
int64_t groups = getAttribute(ctx, "group", 1);
|
||||
if (groups != 1) {
|
||||
return; // we don't handle the group case.
|
||||
}
|
||||
|
||||
std::vector<int64_t> pads;
|
||||
if (getRepeatedAttribute(ctx, "pads", pads)) {
|
||||
if (pads.size() != n_input_dims * 2) {
|
||||
fail_shape_inference("Attribute pads has incorrect size");
|
||||
}
|
||||
} else {
|
||||
pads.assign(n_input_dims * 2, 0);
|
||||
}
|
||||
|
||||
std::vector<int64_t> strides;
|
||||
if (getRepeatedAttribute(ctx, "strides", strides)) {
|
||||
if (strides.size() != n_input_dims) {
|
||||
fail_shape_inference("Attribute strides has incorrect size");
|
||||
}
|
||||
} else {
|
||||
strides.assign(n_input_dims, 1);
|
||||
}
|
||||
|
||||
std::vector<int64_t> kernel_shape;
|
||||
if (getRepeatedAttribute(ctx, "kernel_shape", kernel_shape)) {
|
||||
if (kernel_shape.size() != n_input_dims) {
|
||||
fail_shape_inference("Attribute kernel_shape has incorrect size");
|
||||
}
|
||||
} else if (require_kernel_shape) {
|
||||
fail_shape_inference("Attribute kernel_shape must be specified");
|
||||
} else {
|
||||
auto second_input_shape = ctx.getInputType(input2Idx)->tensor_type().shape();
|
||||
for (int i = 2; i < second_input_shape.dim_size(); ++i) {
|
||||
if (!second_input_shape.dim(i).has_dim_value()) {
|
||||
return;
|
||||
}
|
||||
kernel_shape.push_back(second_input_shape.dim(i).dim_value());
|
||||
}
|
||||
}
|
||||
|
||||
auto output_shape =
|
||||
ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape();
|
||||
|
||||
if (require_kernel_shape) {
|
||||
// add the first two dimensions from the input.
|
||||
*output_shape->add_dim() = input_shape.dim(0);
|
||||
*output_shape->add_dim() = input_shape.dim(1);
|
||||
} else {
|
||||
*output_shape->add_dim() = input_shape.dim(0);
|
||||
auto& second_input_shape = getInputShape(ctx, 1);
|
||||
if (second_input_shape.dim_size() < 1) {
|
||||
fail_shape_inference("Second input tensor has wrong dimension");
|
||||
}
|
||||
*output_shape->add_dim() = second_input_shape.dim(0);
|
||||
}
|
||||
|
||||
int kernel_shape_size = static_cast<int>(kernel_shape.size());
|
||||
for (int i = 0; i < kernel_shape_size; ++i) {
|
||||
auto newdim = output_shape->add_dim();
|
||||
if (!input_shape.dim(2 + i).has_dim_value()) {
|
||||
continue;
|
||||
}
|
||||
// how big is the input, including padding
|
||||
int64_t effective_input_size = input_shape.dim(2 + i).dim_value();
|
||||
effective_input_size += pads[i];
|
||||
effective_input_size += pads[i + kernel_shape_size];
|
||||
|
||||
int64_t effective_kernel_size = kernel_shape[i];
|
||||
// accounting for dilation, how big is the kernel in this dimension
|
||||
effective_kernel_size = (effective_kernel_size - 1) * dilations[i] + 1;
|
||||
|
||||
// how many times we can move the kernel from it's initial position, based
|
||||
// on the stride
|
||||
int64_t strided_kernel_positions =
|
||||
(effective_input_size - effective_kernel_size) / strides[i];
|
||||
|
||||
// add in the initial position
|
||||
newdim->set_dim_value(1 + strided_kernel_positions);
|
||||
}
|
||||
|
||||
if (ctx.getNumOutputs() > 1) {
|
||||
// MaxPool with two outputs case.
|
||||
auto second_output_shape =
|
||||
ctx.getOutputType(1)->mutable_tensor_type()->mutable_shape();
|
||||
second_output_shape->CopyFrom(*output_shape);
|
||||
}
|
||||
}
|
||||
|
||||
void RegisterContribSchemas() {
|
||||
ONNX_CONTRIB_OPERATOR_SCHEMA(SampleOp)
|
||||
.SetDomain(kMSDomain)
|
||||
|
|
@ -567,7 +695,31 @@ if the input is 8 bits or in 64 bits if the input is 16 bits.)DOC")
|
|||
"group",
|
||||
"number of groups input channels and output channels are divided into. default is 1.",
|
||||
AttributeProto::INT,
|
||||
static_cast<int64_t>(1));
|
||||
static_cast<int64_t>(1))
|
||||
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
|
||||
auto x_type = ctx.getInputType(0);
|
||||
auto w_type = ctx.getInputType(3);
|
||||
auto y_type = ctx.getOutputType(0);
|
||||
if (nullptr == x_type || nullptr == w_type || nullptr == y_type ||
|
||||
x_type->value_case() != ONNX_NAMESPACE::TypeProto::kTensorType ||
|
||||
w_type->value_case() != ONNX_NAMESPACE::TypeProto::kTensorType) {
|
||||
fail_type_inference(
|
||||
"inputs are expected to have tensor type and output type should not be null.");
|
||||
}
|
||||
|
||||
if (ONNX_NAMESPACE::TensorProto::UINT8 == x_type->tensor_type().elem_type() &&
|
||||
ONNX_NAMESPACE::TensorProto::UINT8 == w_type->tensor_type().elem_type()) {
|
||||
y_type->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto::UINT8);
|
||||
} else {
|
||||
y_type->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto::INT8);
|
||||
}
|
||||
|
||||
convPoolShapeInference(ctx, true, false, 0, 3);
|
||||
});
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SCHEMA(ConvInteger)
|
||||
.SetDomain(kMSDomain)
|
||||
|
|
@ -660,7 +812,23 @@ The integer convolution operator consumes an input tensor, a filter, and a paddi
|
|||
"group",
|
||||
"number of groups input channels and output channels are divided into. default is 1.",
|
||||
AttributeProto::INT,
|
||||
static_cast<int64_t>(1));
|
||||
static_cast<int64_t>(1))
|
||||
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
|
||||
auto x_type = ctx.getInputType(0);
|
||||
auto w_type = ctx.getInputType(1);
|
||||
auto y_type = ctx.getOutputType(0);
|
||||
if (nullptr == x_type || nullptr == w_type || nullptr == y_type ||
|
||||
x_type->value_case() != ONNX_NAMESPACE::TypeProto::kTensorType ||
|
||||
w_type->value_case() != ONNX_NAMESPACE::TypeProto::kTensorType) {
|
||||
fail_type_inference(
|
||||
"inputs are expected to have tensor type and output type should not be null.");
|
||||
}
|
||||
|
||||
// Right now we only support int32
|
||||
y_type->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto::INT32);
|
||||
|
||||
convPoolShapeInference(ctx, true, false, 0, 1);
|
||||
});
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SCHEMA(MatMulInteger)
|
||||
.SetDomain(kMSDomain)
|
||||
|
|
|
|||
|
|
@ -118,7 +118,7 @@ Status ConvInteger::Compute(OpKernelContext* context) const {
|
|||
gemmlowp::MatrixMap<const std::uint8_t, RhsOrder> rhs(
|
||||
col_buffer_data, static_cast<int>(kernel_dim), static_cast<int>(output_image_size));
|
||||
gemmlowp::MatrixMap<std::int32_t, ResultOrder> result(
|
||||
Ydata, static_cast<int>(M / group_), static_cast<int>(output_image_size));
|
||||
Ydata + group_id * Y_offset, static_cast<int>(M / group_), static_cast<int>(output_image_size));
|
||||
const std::tuple<> empty_pipeline = {};
|
||||
|
||||
gemmlowp::GemmContext gemm_context;
|
||||
|
|
|
|||
193
onnxruntime/core/providers/cpu/nn/qlinearconv.cc
Normal file
193
onnxruntime/core/providers/cpu/nn/qlinearconv.cc
Normal file
|
|
@ -0,0 +1,193 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable : 4244)
|
||||
#pragma warning(disable : 4267)
|
||||
#endif
|
||||
|
||||
#include "core/providers/cpu/nn/qlinearconv.h"
|
||||
#include "core/util/math.h"
|
||||
#include "core/util/math_cpuonly.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace contrib {
|
||||
|
||||
Status QLinearConv::Compute(OpKernelContext* context) const {
|
||||
const Tensor* X = context->Input<Tensor>(0);
|
||||
const Tensor* W = context->Input<Tensor>(3);
|
||||
|
||||
// validate scale and zero points
|
||||
auto input_scale = context->Input<Tensor>(1);
|
||||
auto input_offset = context->Input<Tensor>(2);
|
||||
ScaleAndZeropointPairValidationHelper(input_scale, input_offset);
|
||||
auto filter_scale = context->Input<Tensor>(4);
|
||||
auto filter_offset = context->Input<Tensor>(5);
|
||||
ScaleAndZeropointPairValidationHelper(filter_scale, filter_offset);
|
||||
auto result_scale = context->Input<Tensor>(6);
|
||||
auto result_offset = context->Input<Tensor>(7);
|
||||
ScaleAndZeropointPairValidationHelper(result_scale, result_offset);
|
||||
|
||||
auto input_scale_data = *(input_scale->template Data<float>());
|
||||
auto filter_scale_data = *(filter_scale->template Data<float>());
|
||||
auto result_scale_data = *(result_scale->template Data<float>());
|
||||
|
||||
auto input_offset_data = *(input_offset->template Data<uint8_t>());
|
||||
auto filter_offset_data = *(filter_offset->template Data<uint8_t>());
|
||||
auto result_offset_data = *(result_offset->template Data<uint8_t>());
|
||||
|
||||
const float real_multiplier = (input_scale_data * filter_scale_data) / result_scale_data;
|
||||
int32_t integer_multiplier;
|
||||
int right_shift;
|
||||
QuantizeMultiplier(real_multiplier, &integer_multiplier, &right_shift);
|
||||
|
||||
size_t num_inputs = OpKernel::Node().InputDefs().size();
|
||||
const Tensor* bias = nullptr;
|
||||
if (num_inputs == 9) {
|
||||
bias = context->Input<Tensor>(8);
|
||||
}
|
||||
|
||||
const int64_t N = X->Shape()[0];
|
||||
const int64_t C = X->Shape()[1];
|
||||
const int64_t M = W->Shape()[0];
|
||||
ORT_RETURN_IF_ERROR(ValidateInputShape(X, W));
|
||||
|
||||
std::vector<int64_t> kernel_shape;
|
||||
ORT_RETURN_IF_ERROR(ComputeKernelShape(W->Shape(), kernel_shape));
|
||||
|
||||
std::vector<int64_t> pads(pads_);
|
||||
if (pads.empty()) {
|
||||
pads.resize(kernel_shape.size() * 2, 0);
|
||||
}
|
||||
std::vector<int64_t> dilations(dilations_);
|
||||
if (dilations.empty()) {
|
||||
dilations.resize(kernel_shape.size(), 1);
|
||||
}
|
||||
std::vector<int64_t> strides(strides_);
|
||||
if (strides.empty()) {
|
||||
strides.resize(kernel_shape.size(), 1);
|
||||
}
|
||||
|
||||
std::vector<int64_t> Y_dims;
|
||||
Y_dims.insert(Y_dims.begin(), {N, M});
|
||||
TensorShape input_shape = X->Shape().Slice(2);
|
||||
ORT_RETURN_IF_ERROR(InferOutputShape(input_shape, kernel_shape, strides, dilations, &pads, &Y_dims));
|
||||
Tensor* Y = context->Output(0, TensorShape(Y_dims));
|
||||
TensorShape output_shape = Y->Shape().Slice(2);
|
||||
|
||||
AllocatorPtr alloc;
|
||||
ORT_RETURN_IF_ERROR(context->GetTempSpaceAllocator(&alloc));
|
||||
|
||||
const uint8_t* Xdata = X->template Data<uint8_t>();
|
||||
uint8_t* Ydata = Y->template MutableData<uint8_t>();
|
||||
|
||||
const int64_t input_image_size = input_shape.Size();
|
||||
const int64_t output_image_size = output_shape.Size();
|
||||
const int64_t kernel_size = TensorShape(kernel_shape).Size();
|
||||
const int64_t X_offset = C / group_ * input_image_size;
|
||||
const int64_t Y_offset = Y->Shape().Size() / Y->Shape()[0] / group_;
|
||||
const int64_t W_offset = W->Shape().Size() / group_;
|
||||
const int64_t kernel_dim = C / group_ * kernel_size;
|
||||
const int64_t col_buffer_size = kernel_dim * output_image_size;
|
||||
const int bias_offset = static_cast<int>(M / group_);
|
||||
|
||||
auto col_data = alloc->Alloc(sizeof(uint8_t) * col_buffer_size);
|
||||
BufferUniquePtr col_buffer(col_data, BufferDeleter(alloc));
|
||||
uint8_t* col_buffer_data = static_cast<uint8_t*>(col_buffer.get());
|
||||
|
||||
TensorShape image_shape = X->Shape().Slice(1);
|
||||
std::vector<int64_t> col_buffer_shape{kernel_dim};
|
||||
col_buffer_shape.insert(col_buffer_shape.end(), output_shape.GetDims().begin(),
|
||||
output_shape.GetDims().end());
|
||||
|
||||
for (int image_id = 0; image_id < N; ++image_id) {
|
||||
for (int group_id = 0; group_id < group_; ++group_id) {
|
||||
math::Im2colNd<uint8_t, CPUMathUtil, StorageOrder::NCHW>()(
|
||||
Xdata + group_id * X_offset,
|
||||
image_shape.GetDims().data(),
|
||||
col_buffer_shape.data(),
|
||||
C * input_image_size,
|
||||
col_buffer_size,
|
||||
kernel_shape.data(),
|
||||
strides.data(),
|
||||
dilations.data(),
|
||||
pads.data(),
|
||||
static_cast<int>(kernel_shape.size()),
|
||||
col_buffer_data,
|
||||
&CPUMathUtil::Instance(),
|
||||
false,
|
||||
input_offset_data);
|
||||
|
||||
const uint8_t* filter_data_as_uint8 = W->template Data<uint8_t>() + group_id * W_offset;
|
||||
static const gemmlowp::MapOrder MatOrder = gemmlowp::MapOrder::RowMajor;
|
||||
gemmlowp::MatrixMap<const std::uint8_t, MatOrder> lhs(
|
||||
filter_data_as_uint8, static_cast<int>(M / group_), static_cast<int>(kernel_dim));
|
||||
gemmlowp::MatrixMap<const std::uint8_t, MatOrder> rhs(
|
||||
col_buffer_data, static_cast<int>(kernel_dim), static_cast<int>(output_image_size));
|
||||
gemmlowp::MatrixMap<std::uint8_t, MatOrder> result(
|
||||
Ydata + group_id * Y_offset, static_cast<int>(M / group_), static_cast<int>(output_image_size));
|
||||
|
||||
// TODO: worker thread pool needs to be handled.
|
||||
gemmlowp::GemmContext gemm_context;
|
||||
if (bias == nullptr) {
|
||||
auto output_pipeline = MakeOutputPipelineWithOutBias(result_offset_data,
|
||||
integer_multiplier, right_shift);
|
||||
gemmlowp::GemmWithOutputPipeline<std::uint8_t, std::uint8_t,
|
||||
gemmlowp::DefaultL8R8BitDepthParams>(
|
||||
&gemm_context, lhs, rhs, &result, -filter_offset_data, -input_offset_data,
|
||||
output_pipeline);
|
||||
} else {
|
||||
auto output_pipeline = MakeOutputPipelineWithBias(bias->template Data<int32_t>() + group_id * bias_offset,
|
||||
static_cast<int>(M / group_), result_offset_data, integer_multiplier, right_shift);
|
||||
gemmlowp::GemmWithOutputPipeline<std::uint8_t, std::uint8_t,
|
||||
gemmlowp::DefaultL8R8BitDepthParams>(
|
||||
&gemm_context, lhs, rhs, &result, -filter_offset_data, -input_offset_data,
|
||||
output_pipeline);
|
||||
}
|
||||
}
|
||||
|
||||
Xdata += X_offset * group_;
|
||||
Ydata += Y_offset * group_;
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void QLinearConv::QuantizeMultiplier(float fp_multiplier, std::int32_t* integer_multiplier, int* right_shift) const {
|
||||
uint32_t* fp_as_bits = reinterpret_cast<uint32_t*>(&fp_multiplier);
|
||||
auto current_exponent = (*fp_as_bits >> 23);
|
||||
// bring multiplier in [.5,1) range and calculate the shift
|
||||
auto bumped_multiplier_as_bits =
|
||||
(*fp_as_bits & UINT32_C(0x007fffff)) | UINT32_C(0x3f000000);
|
||||
float* bumped_multiplier =
|
||||
reinterpret_cast<float*>(&bumped_multiplier_as_bits);
|
||||
auto shift = 126 - current_exponent;
|
||||
// convert to fixed point number
|
||||
std::int64_t int_multiplier =
|
||||
static_cast<std::int64_t>(std::round(*bumped_multiplier * (1ll << 31)));
|
||||
|
||||
*integer_multiplier = static_cast<int32_t>(int_multiplier);
|
||||
*right_shift = shift;
|
||||
}
|
||||
|
||||
void QLinearConv::ScaleAndZeropointPairValidationHelper(const Tensor* scale, const Tensor* zeropoint) const {
|
||||
ORT_ENFORCE(scale->Shape().NumDimensions() == 0 ||
|
||||
(scale->Shape().NumDimensions() == 1 && scale->Shape().GetDims().size() == 1),
|
||||
"scale must be a scalar");
|
||||
ORT_ENFORCE(zeropoint->Shape().NumDimensions() == 0 ||
|
||||
(zeropoint->Shape().NumDimensions() == 1 && zeropoint->Shape().GetDims().size() == 1),
|
||||
"zeropoint must be a scalar");
|
||||
}
|
||||
|
||||
ONNX_OPERATOR_KERNEL_EX(
|
||||
QLinearConv,
|
||||
kMSDomain,
|
||||
1,
|
||||
kCpuExecutionProvider,
|
||||
KernelDefBuilder()
|
||||
.TypeConstraint("T1", DataTypeImpl::GetTensorType<uint8_t>())
|
||||
.TypeConstraint("T2", DataTypeImpl::GetTensorType<uint8_t>())
|
||||
.TypeConstraint("T3", DataTypeImpl::GetTensorType<uint8_t>()),
|
||||
QLinearConv);
|
||||
} // namespace contrib
|
||||
} // namespace onnxruntime
|
||||
57
onnxruntime/core/providers/cpu/nn/qlinearconv.h
Normal file
57
onnxruntime/core/providers/cpu/nn/qlinearconv.h
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "core/providers/cpu/nn/conv_base.h"
|
||||
#include "core/util/gemmlowp_common_wrapper.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace contrib {
|
||||
class QLinearConv : public OpKernel, public ConvBase {
|
||||
public:
|
||||
explicit QLinearConv(const OpKernelInfo& info) : OpKernel(info), ConvBase(info) {
|
||||
}
|
||||
|
||||
Status Compute(OpKernelContext* context) const override;
|
||||
|
||||
void QuantizeMultiplier(float fp_multiplier, std::int32_t* integer_multiplier, int* right_shift) const;
|
||||
|
||||
void ScaleAndZeropointPairValidationHelper(const Tensor* scale, const Tensor* zeropoint) const;
|
||||
};
|
||||
|
||||
typedef gemmlowp::VectorMap<const std::int32_t, gemmlowp::VectorShape::Col> ColVectorMap;
|
||||
|
||||
inline std::tuple<gemmlowp::OutputStageBiasAddition<ColVectorMap>,
|
||||
gemmlowp::OutputStageQuantizeDownInt32ByFixedPoint,
|
||||
gemmlowp::OutputStageSaturatingCastToUint8>
|
||||
MakeOutputPipelineWithBias(const int32_t* bias,
|
||||
int rows,
|
||||
std::int32_t result_offset,
|
||||
std::int32_t result_mult_int,
|
||||
std::int32_t result_shift) {
|
||||
ColVectorMap bias_vector(bias, rows);
|
||||
gemmlowp::OutputStageBiasAddition<ColVectorMap> bias_addition_stage;
|
||||
bias_addition_stage.bias_vector = bias_vector;
|
||||
gemmlowp::OutputStageQuantizeDownInt32ByFixedPoint quantize_down_stage;
|
||||
quantize_down_stage.result_offset_after_shift = result_offset;
|
||||
quantize_down_stage.result_fixedpoint_multiplier = result_mult_int;
|
||||
quantize_down_stage.result_shift = result_shift;
|
||||
gemmlowp::OutputStageSaturatingCastToUint8 saturating_cast_stage;
|
||||
return std::make_tuple(bias_addition_stage, quantize_down_stage, saturating_cast_stage);
|
||||
}
|
||||
|
||||
inline std::tuple<gemmlowp::OutputStageQuantizeDownInt32ByFixedPoint,
|
||||
gemmlowp::OutputStageSaturatingCastToUint8>
|
||||
MakeOutputPipelineWithOutBias(std::int32_t result_offset,
|
||||
std::int32_t result_mult_int,
|
||||
std::int32_t result_shift) {
|
||||
gemmlowp::OutputStageQuantizeDownInt32ByFixedPoint quantize_down_stage;
|
||||
quantize_down_stage.result_offset_after_shift = result_offset;
|
||||
quantize_down_stage.result_fixedpoint_multiplier = result_mult_int;
|
||||
quantize_down_stage.result_shift = result_shift;
|
||||
gemmlowp::OutputStageSaturatingCastToUint8 saturating_cast_stage;
|
||||
return std::make_tuple(quantize_down_stage, saturating_cast_stage);
|
||||
}
|
||||
}
|
||||
} // namespace onnxruntime
|
||||
180
onnxruntime/test/providers/cpu/nn/qlinearconv_op_test.cc
Normal file
180
onnxruntime/test/providers/cpu/nn/qlinearconv_op_test.cc
Normal file
|
|
@ -0,0 +1,180 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "test/providers/provider_test_utils.h"
|
||||
using namespace std;
|
||||
namespace onnxruntime {
|
||||
namespace test {
|
||||
|
||||
namespace {
|
||||
|
||||
void FindMinMax(const vector<float>& vec, float* min,
|
||||
float* max) {
|
||||
*min = *max = 0;
|
||||
*min = *std::min_element(vec.begin(), vec.end());
|
||||
*max = *std::max_element(vec.begin(), vec.end());
|
||||
}
|
||||
|
||||
// uses quantization range 0-255
|
||||
void FindScaleAndZeroPoint(float min, float max, float* scale, uint8_t* zero_point) {
|
||||
min = std::min(min, 0.f);
|
||||
max = std::max(max, 0.f);
|
||||
float qmin = 0;
|
||||
float qmax = 255;
|
||||
|
||||
*scale = (max - min) / (qmax - qmin);
|
||||
const auto initial_zero_point = qmin - min / *scale;
|
||||
*zero_point = static_cast<uint8_t>(std::round(std::max(0.f, std::min(255.f, initial_zero_point))));
|
||||
}
|
||||
|
||||
void Quantize(float scale, uint8_t zero_point,
|
||||
const std::vector<float>& input, std::vector<uint8_t>* input_quantized) {
|
||||
for (size_t i = 0; i < input.size(); i++) {
|
||||
const float clamped_val = std::max(0.f, std::min(255.f, std::round(static_cast<float>(input[i]) / scale) + zero_point));
|
||||
(*input_quantized)[i] = static_cast<uint8_t>(clamped_val);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(ConvTest, QLinearConv2DTest) {
|
||||
OpTester test("QLinearConv", 1, onnxruntime::kMSDomain);
|
||||
|
||||
vector<float> X = {0.45246148109436035f, 0.15498268604278564f, 0.11199361085891724f, -0.39421093463897705f,
|
||||
0.2626858949661255f, 0.13414543867111206f, -0.27184486389160156f, -0.43028733134269714f,
|
||||
-0.26825493574142456f, 0.3893144130706787f, -0.13631996512413025f, -0.009590476751327515f,
|
||||
-0.48771554231643677f, -0.25256502628326416f, -0.2812897562980652f, 0.4043201804161072f,
|
||||
0.07795023918151855f, 0.326981782913208f, 0.13114392757415771f, -0.4416425824165344f,
|
||||
0.12446999549865723f, 0.36739975214004517f, 0.1698915958404541f, 0.2008744478225708f,
|
||||
0.23339951038360596f, 0.38613730669021606f, 0.11117297410964966f, 0.3877097964286804f,
|
||||
0.20812749862670898f, -0.34297940135002136f, -0.029246658086776733f, -0.20483523607254028f,
|
||||
-0.19244328141212463f, -0.11104947328567505f, -0.32830488681793213f, -0.01800677180290222f,
|
||||
0.3618946671485901f, -0.40949052572250366f, -0.18248388171195984f, -0.3349453806877136f,
|
||||
-0.34091079235076904f, 0.006497859954833984f, 0.4537564516067505f, 0.08006560802459717f,
|
||||
-0.14788749814033508f, 0.034442365169525146f, -0.33322954177856445f, 0.06049239635467529f,
|
||||
0.42619407176971436f};
|
||||
vector<int64_t> X_shape = {1, 1, 7, 7};
|
||||
|
||||
vector<float> W = {-0.4406261742115021f};
|
||||
vector<int64_t> W_shape = {1, 1, 1, 1};
|
||||
|
||||
auto expected_vals = {-0.19936637580394745f, -0.06828942894935608f, -0.04934731498360634f, 0.17369966208934784f,
|
||||
-0.11574628204107285f, -0.05910799279808998f, 0.1197819635272026f, 0.18959586322307587f,
|
||||
0.1182001456618309f, -0.17154212296009064f, 0.06006614491343498f, 0.0042258151806890965f,
|
||||
0.21490024030208588f, 0.11128675937652588f, 0.12394362688064575f, -0.17815405130386353f,
|
||||
-0.034346915781497955f, -0.14407673478126526f, -0.05778544768691063f, 0.19459928572177887f,
|
||||
-0.05484473705291748f, -0.16188594698905945f, -0.07485868036746979f, -0.08851054310798645f,
|
||||
-0.10284193605184555f, -0.17014220356941223f, -0.04898572340607643f, -0.17083507776260376f,
|
||||
-0.09170642495155334f, 0.1511256992816925f, 0.012886842712759972f, 0.09025576710700989f,
|
||||
0.08479554951190948f, 0.0489313043653965f, 0.14465972781181335f, 0.007934254594147205f,
|
||||
-0.15946026146411896f, 0.1804322451353073f, 0.08040717244148254f, 0.1475857049226761f,
|
||||
0.15021422505378723f, -0.0028631272725760937f, -0.19993697106838226f, -0.03527900204062462f,
|
||||
0.06516310572624207f, -0.015176207758486271f, 0.14682966470718384f, -0.02665453404188156f,
|
||||
-0.18779225647449493f};
|
||||
vector<int64_t> Y_shape = {1, 1, 7, 7};
|
||||
|
||||
// Calculate quantization params and quantize the inputs and expected output
|
||||
float lhs_min, lhs_max, rhs_min, rhs_max, result_min, result_max;
|
||||
FindMinMax(X, &lhs_min, &lhs_max);
|
||||
FindMinMax(W, &rhs_min, &rhs_max);
|
||||
FindMinMax(expected_vals, &result_min, &result_max);
|
||||
|
||||
float lhs_scale, rhs_scale, result_scale;
|
||||
uint8_t lhs_zero_point, rhs_zero_point, result_zero_point;
|
||||
FindScaleAndZeroPoint(lhs_min, lhs_max, &lhs_scale, &lhs_zero_point);
|
||||
FindScaleAndZeroPoint(rhs_min, rhs_max, &rhs_scale, &rhs_zero_point);
|
||||
FindScaleAndZeroPoint(result_min, result_max, &result_scale, &result_zero_point);
|
||||
|
||||
vector<uint8_t> x_quantized(X.size()), w_quantized(W.size()), result_quantized(expected_vals.size());
|
||||
Quantize(lhs_scale, lhs_zero_point, X, &x_quantized);
|
||||
Quantize(rhs_scale, rhs_zero_point, W, &w_quantized);
|
||||
Quantize(result_scale, result_zero_point, expected_vals, &result_quantized);
|
||||
|
||||
test.AddInput<uint8_t>("x", X_shape, x_quantized);
|
||||
test.AddInput<float>("x_scale", {}, {lhs_scale});
|
||||
test.AddInput<uint8_t>("x_zero_point", {}, {lhs_zero_point});
|
||||
|
||||
test.AddInput<uint8_t>("w", W_shape, w_quantized);
|
||||
test.AddInput<float>("w_scale", {}, {rhs_scale});
|
||||
test.AddInput<uint8_t>("w_zero_point", {}, {rhs_zero_point});
|
||||
|
||||
test.AddInput<float>("y_scale", {}, {result_scale});
|
||||
test.AddInput<uint8_t>("y_zero_point", {}, {result_zero_point});
|
||||
|
||||
test.AddOutput<uint8_t>("y", Y_shape, result_quantized);
|
||||
|
||||
test.Run();
|
||||
}
|
||||
|
||||
TEST(ConvTest, QLinearConv3DTest) {
|
||||
OpTester test("QLinearConv", 1, onnxruntime::kMSDomain);
|
||||
|
||||
vector<float> X = {0.010772407054901123f, -0.43806642293930054f, 0.455391526222229f, -0.28657248616218567f,
|
||||
0.45676887035369873f, -0.0320507287979126f, 0.4229400157928467f, -0.18730869889259338f,
|
||||
-0.45851585268974304f, 0.042054951190948486f, -0.13332295417785645f, -0.25374430418014526f,
|
||||
-0.23845627903938293f, 0.12214112281799316f, -0.1778157651424408f, 0.1891845464706421f,
|
||||
0.37962496280670166f, -0.033982306718826294f, 0.12737131118774414f, -0.040284961462020874f,
|
||||
0.46427029371261597f, -0.22687292098999023f, 0.17398333549499512f, -0.3014046251773834f,
|
||||
-0.4043419063091278f, -0.33206477761268616f, 0.04655301570892334f, -0.4947906732559204f,
|
||||
0.0755157470703125f, 0.1173025369644165f, 0.47043120861053467f, 0.4824737310409546f,
|
||||
-0.37734976410865784f, -0.056491583585739136f, -0.10790631175041199f, 0.043476223945617676f,
|
||||
0.24469023942947388f, -0.4100031852722168f, 0.0616222620010376f, 0.2296960949897766f,
|
||||
0.27883386611938477f, 0.08150351047515869f, 0.2453773021697998f, 0.08250969648361206f,
|
||||
-0.1471814215183258f, -0.43011274933815f, 0.027180075645446777f, 0.3605625033378601f,
|
||||
0.24954384565353394f, -0.22505927085876465f, -0.36272895336151123f, -0.47674262523651123f,
|
||||
0.11275297403335571f, 0.49773406982421875f, 0.2686365246772766f, 0.025525271892547607f,
|
||||
-0.3037869930267334f, 0.41126757860183716f, 0.36149072647094727f, 0.00883406400680542f,
|
||||
-0.07959523797035217f, 0.3601323366165161f, 0.17322391271591187f, -0.012007325887680054f};
|
||||
vector<int64_t> X_shape = {1, 1, 4, 4, 4};
|
||||
vector<float> W = {0.32824617624282837f};
|
||||
vector<int64_t> W_shape = {1, 1, 1, 1, 1};
|
||||
vector<int64_t> Y_shape = {1, 1, 4, 4, 4};
|
||||
auto expected_vals = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0035360013134777546f, 0.14948052167892456f, 0.0f,
|
||||
0.0f, -0.15050607919692993f, -0.043762750923633575f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -0.12386361509561539f, -0.03541983291506767f, 0.0f,
|
||||
0.0f, 0.09152615070343018f, 0.08054415881633759f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
|
||||
|
||||
vector<int64_t> pads = {2, 2, 2, 2, 2, 2};
|
||||
vector<int64_t>strides = {2, 2, 2};
|
||||
|
||||
// Calculate quantization params and quantize the inputs and expected output
|
||||
float lhs_min, lhs_max, rhs_min, rhs_max, result_min, result_max;
|
||||
FindMinMax(X, &lhs_min, &lhs_max);
|
||||
FindMinMax(W, &rhs_min, &rhs_max);
|
||||
FindMinMax(expected_vals, &result_min, &result_max);
|
||||
|
||||
float lhs_scale, rhs_scale, result_scale;
|
||||
uint8_t lhs_zero_point, rhs_zero_point, result_zero_point;
|
||||
FindScaleAndZeroPoint(lhs_min, lhs_max, &lhs_scale, &lhs_zero_point);
|
||||
FindScaleAndZeroPoint(rhs_min, rhs_max, &rhs_scale, &rhs_zero_point);
|
||||
FindScaleAndZeroPoint(result_min, result_max, &result_scale, &result_zero_point);
|
||||
|
||||
vector<uint8_t> x_quantized(X.size()), w_quantized(W.size()), result_quantized(expected_vals.size());
|
||||
Quantize(lhs_scale, lhs_zero_point, X, &x_quantized);
|
||||
Quantize(rhs_scale, rhs_zero_point, W, &w_quantized);
|
||||
Quantize(result_scale, result_zero_point, expected_vals, &result_quantized);
|
||||
|
||||
test.AddAttribute("pads", pads);
|
||||
test.AddAttribute("strides", strides);
|
||||
|
||||
test.AddInput<uint8_t>("x", X_shape, x_quantized);
|
||||
test.AddInput<float>("x_scale", {}, {lhs_scale});
|
||||
test.AddInput<uint8_t>("x_zero_point", {}, {lhs_zero_point});
|
||||
|
||||
test.AddInput<uint8_t>("w", W_shape, w_quantized);
|
||||
test.AddInput<float>("w_scale", {}, {rhs_scale});
|
||||
test.AddInput<uint8_t>("w_zero_point", {}, {rhs_zero_point});
|
||||
|
||||
test.AddInput<float>("y_scale", {}, {result_scale});
|
||||
test.AddInput<uint8_t>("y_zero_point", {}, {result_zero_point});
|
||||
|
||||
test.AddOutput<uint8_t>("y", Y_shape, result_quantized);
|
||||
|
||||
test.Run();
|
||||
}
|
||||
} // namespace
|
||||
} // namespace test
|
||||
} // namespace onnxruntime
|
||||
Loading…
Reference in a new issue