QLinearConv (#370)

* First draft QLinearConv

* Add shape inference for quantized conv operators

* adding test cases for QLinearConv

* plus minor corrections
This commit is contained in:
Ashwini Khade 2019-01-28 23:13:47 -08:00 committed by GitHub
parent 5ef4c90f1d
commit b92bc99861
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 603 additions and 3 deletions

View file

@ -28,6 +28,7 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MatMu
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, ConvInteger);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, ROIAlign);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double, ROIAlign);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, QLinearConv);
void RegisterContribKernels(KernelRegistry& kernel_registry) {
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, SampleOp)>());
@ -54,6 +55,7 @@ void RegisterContribKernels(KernelRegistry& kernel_registry) {
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, ConvInteger)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, ROIAlign)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double, ROIAlign)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, QLinearConv)>());
}
} // namespace contrib

View file

@ -91,6 +91,134 @@ void matmulShapeInference(ONNX_NAMESPACE::InferenceContext& ctx, int input1Idx,
*ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape() = resultShape;
}
void convPoolShapeInference(
ONNX_NAMESPACE::InferenceContext& ctx,
bool use_dilation,
bool require_kernel_shape,
int input1Idx, int input2Idx) {
if (!hasInputShape(ctx, input1Idx)) {
return;
}
// if kernel shape is an input (and not attribute)
// we need the shape of the second input.
if (!require_kernel_shape && !hasNInputShapes(ctx, input2Idx)) {
return;
}
// don't bother with legacy auto_pad for now
if (ctx.getAttribute("auto_pad")) {
return;
}
auto input_shape = ctx.getInputType(input1Idx)->tensor_type().shape();
if (input_shape.dim_size() < 2) {
fail_shape_inference("Input tensor must have atleast 2 dimensions");
}
// first dim is the batch axis and the next is the number of channels.
size_t n_input_dims = static_cast<size_t>(input_shape.dim_size() - 2);
// Pooling operations don't support dilation, only Conv. For
// simplicity of the code, we just treat them as having all-1s
// dilation.
std::vector<int64_t> dilations;
if (use_dilation && getRepeatedAttribute(ctx, "dilations", dilations)) {
if (dilations.size() != n_input_dims) {
fail_shape_inference("Attribute dilations has incorrect size");
}
} else {
dilations.assign(n_input_dims, 1);
}
int64_t groups = getAttribute(ctx, "group", 1);
if (groups != 1) {
return; // we don't handle the group case.
}
std::vector<int64_t> pads;
if (getRepeatedAttribute(ctx, "pads", pads)) {
if (pads.size() != n_input_dims * 2) {
fail_shape_inference("Attribute pads has incorrect size");
}
} else {
pads.assign(n_input_dims * 2, 0);
}
std::vector<int64_t> strides;
if (getRepeatedAttribute(ctx, "strides", strides)) {
if (strides.size() != n_input_dims) {
fail_shape_inference("Attribute strides has incorrect size");
}
} else {
strides.assign(n_input_dims, 1);
}
std::vector<int64_t> kernel_shape;
if (getRepeatedAttribute(ctx, "kernel_shape", kernel_shape)) {
if (kernel_shape.size() != n_input_dims) {
fail_shape_inference("Attribute kernel_shape has incorrect size");
}
} else if (require_kernel_shape) {
fail_shape_inference("Attribute kernel_shape must be specified");
} else {
auto second_input_shape = ctx.getInputType(input2Idx)->tensor_type().shape();
for (int i = 2; i < second_input_shape.dim_size(); ++i) {
if (!second_input_shape.dim(i).has_dim_value()) {
return;
}
kernel_shape.push_back(second_input_shape.dim(i).dim_value());
}
}
auto output_shape =
ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape();
if (require_kernel_shape) {
// add the first two dimensions from the input.
*output_shape->add_dim() = input_shape.dim(0);
*output_shape->add_dim() = input_shape.dim(1);
} else {
*output_shape->add_dim() = input_shape.dim(0);
auto& second_input_shape = getInputShape(ctx, 1);
if (second_input_shape.dim_size() < 1) {
fail_shape_inference("Second input tensor has wrong dimension");
}
*output_shape->add_dim() = second_input_shape.dim(0);
}
int kernel_shape_size = static_cast<int>(kernel_shape.size());
for (int i = 0; i < kernel_shape_size; ++i) {
auto newdim = output_shape->add_dim();
if (!input_shape.dim(2 + i).has_dim_value()) {
continue;
}
// how big is the input, including padding
int64_t effective_input_size = input_shape.dim(2 + i).dim_value();
effective_input_size += pads[i];
effective_input_size += pads[i + kernel_shape_size];
int64_t effective_kernel_size = kernel_shape[i];
// accounting for dilation, how big is the kernel in this dimension
effective_kernel_size = (effective_kernel_size - 1) * dilations[i] + 1;
// how many times we can move the kernel from it's initial position, based
// on the stride
int64_t strided_kernel_positions =
(effective_input_size - effective_kernel_size) / strides[i];
// add in the initial position
newdim->set_dim_value(1 + strided_kernel_positions);
}
if (ctx.getNumOutputs() > 1) {
// MaxPool with two outputs case.
auto second_output_shape =
ctx.getOutputType(1)->mutable_tensor_type()->mutable_shape();
second_output_shape->CopyFrom(*output_shape);
}
}
void RegisterContribSchemas() {
ONNX_CONTRIB_OPERATOR_SCHEMA(SampleOp)
.SetDomain(kMSDomain)
@ -567,7 +695,31 @@ if the input is 8 bits or in 64 bits if the input is 16 bits.)DOC")
"group",
"number of groups input channels and output channels are divided into. default is 1.",
AttributeProto::INT,
static_cast<int64_t>(1));
static_cast<int64_t>(1))
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
auto x_type = ctx.getInputType(0);
auto w_type = ctx.getInputType(3);
auto y_type = ctx.getOutputType(0);
if (nullptr == x_type || nullptr == w_type || nullptr == y_type ||
x_type->value_case() != ONNX_NAMESPACE::TypeProto::kTensorType ||
w_type->value_case() != ONNX_NAMESPACE::TypeProto::kTensorType) {
fail_type_inference(
"inputs are expected to have tensor type and output type should not be null.");
}
if (ONNX_NAMESPACE::TensorProto::UINT8 == x_type->tensor_type().elem_type() &&
ONNX_NAMESPACE::TensorProto::UINT8 == w_type->tensor_type().elem_type()) {
y_type->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto::UINT8);
} else {
y_type->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto::INT8);
}
convPoolShapeInference(ctx, true, false, 0, 3);
});
ONNX_CONTRIB_OPERATOR_SCHEMA(ConvInteger)
.SetDomain(kMSDomain)
@ -660,7 +812,23 @@ The integer convolution operator consumes an input tensor, a filter, and a paddi
"group",
"number of groups input channels and output channels are divided into. default is 1.",
AttributeProto::INT,
static_cast<int64_t>(1));
static_cast<int64_t>(1))
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
auto x_type = ctx.getInputType(0);
auto w_type = ctx.getInputType(1);
auto y_type = ctx.getOutputType(0);
if (nullptr == x_type || nullptr == w_type || nullptr == y_type ||
x_type->value_case() != ONNX_NAMESPACE::TypeProto::kTensorType ||
w_type->value_case() != ONNX_NAMESPACE::TypeProto::kTensorType) {
fail_type_inference(
"inputs are expected to have tensor type and output type should not be null.");
}
// Right now we only support int32
y_type->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto::INT32);
convPoolShapeInference(ctx, true, false, 0, 1);
});
ONNX_CONTRIB_OPERATOR_SCHEMA(MatMulInteger)
.SetDomain(kMSDomain)

View file

@ -118,7 +118,7 @@ Status ConvInteger::Compute(OpKernelContext* context) const {
gemmlowp::MatrixMap<const std::uint8_t, RhsOrder> rhs(
col_buffer_data, static_cast<int>(kernel_dim), static_cast<int>(output_image_size));
gemmlowp::MatrixMap<std::int32_t, ResultOrder> result(
Ydata, static_cast<int>(M / group_), static_cast<int>(output_image_size));
Ydata + group_id * Y_offset, static_cast<int>(M / group_), static_cast<int>(output_image_size));
const std::tuple<> empty_pipeline = {};
gemmlowp::GemmContext gemm_context;

View file

@ -0,0 +1,193 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#ifdef _MSC_VER
#pragma warning(disable : 4244)
#pragma warning(disable : 4267)
#endif
#include "core/providers/cpu/nn/qlinearconv.h"
#include "core/util/math.h"
#include "core/util/math_cpuonly.h"
namespace onnxruntime {
namespace contrib {
Status QLinearConv::Compute(OpKernelContext* context) const {
const Tensor* X = context->Input<Tensor>(0);
const Tensor* W = context->Input<Tensor>(3);
// validate scale and zero points
auto input_scale = context->Input<Tensor>(1);
auto input_offset = context->Input<Tensor>(2);
ScaleAndZeropointPairValidationHelper(input_scale, input_offset);
auto filter_scale = context->Input<Tensor>(4);
auto filter_offset = context->Input<Tensor>(5);
ScaleAndZeropointPairValidationHelper(filter_scale, filter_offset);
auto result_scale = context->Input<Tensor>(6);
auto result_offset = context->Input<Tensor>(7);
ScaleAndZeropointPairValidationHelper(result_scale, result_offset);
auto input_scale_data = *(input_scale->template Data<float>());
auto filter_scale_data = *(filter_scale->template Data<float>());
auto result_scale_data = *(result_scale->template Data<float>());
auto input_offset_data = *(input_offset->template Data<uint8_t>());
auto filter_offset_data = *(filter_offset->template Data<uint8_t>());
auto result_offset_data = *(result_offset->template Data<uint8_t>());
const float real_multiplier = (input_scale_data * filter_scale_data) / result_scale_data;
int32_t integer_multiplier;
int right_shift;
QuantizeMultiplier(real_multiplier, &integer_multiplier, &right_shift);
size_t num_inputs = OpKernel::Node().InputDefs().size();
const Tensor* bias = nullptr;
if (num_inputs == 9) {
bias = context->Input<Tensor>(8);
}
const int64_t N = X->Shape()[0];
const int64_t C = X->Shape()[1];
const int64_t M = W->Shape()[0];
ORT_RETURN_IF_ERROR(ValidateInputShape(X, W));
std::vector<int64_t> kernel_shape;
ORT_RETURN_IF_ERROR(ComputeKernelShape(W->Shape(), kernel_shape));
std::vector<int64_t> pads(pads_);
if (pads.empty()) {
pads.resize(kernel_shape.size() * 2, 0);
}
std::vector<int64_t> dilations(dilations_);
if (dilations.empty()) {
dilations.resize(kernel_shape.size(), 1);
}
std::vector<int64_t> strides(strides_);
if (strides.empty()) {
strides.resize(kernel_shape.size(), 1);
}
std::vector<int64_t> Y_dims;
Y_dims.insert(Y_dims.begin(), {N, M});
TensorShape input_shape = X->Shape().Slice(2);
ORT_RETURN_IF_ERROR(InferOutputShape(input_shape, kernel_shape, strides, dilations, &pads, &Y_dims));
Tensor* Y = context->Output(0, TensorShape(Y_dims));
TensorShape output_shape = Y->Shape().Slice(2);
AllocatorPtr alloc;
ORT_RETURN_IF_ERROR(context->GetTempSpaceAllocator(&alloc));
const uint8_t* Xdata = X->template Data<uint8_t>();
uint8_t* Ydata = Y->template MutableData<uint8_t>();
const int64_t input_image_size = input_shape.Size();
const int64_t output_image_size = output_shape.Size();
const int64_t kernel_size = TensorShape(kernel_shape).Size();
const int64_t X_offset = C / group_ * input_image_size;
const int64_t Y_offset = Y->Shape().Size() / Y->Shape()[0] / group_;
const int64_t W_offset = W->Shape().Size() / group_;
const int64_t kernel_dim = C / group_ * kernel_size;
const int64_t col_buffer_size = kernel_dim * output_image_size;
const int bias_offset = static_cast<int>(M / group_);
auto col_data = alloc->Alloc(sizeof(uint8_t) * col_buffer_size);
BufferUniquePtr col_buffer(col_data, BufferDeleter(alloc));
uint8_t* col_buffer_data = static_cast<uint8_t*>(col_buffer.get());
TensorShape image_shape = X->Shape().Slice(1);
std::vector<int64_t> col_buffer_shape{kernel_dim};
col_buffer_shape.insert(col_buffer_shape.end(), output_shape.GetDims().begin(),
output_shape.GetDims().end());
for (int image_id = 0; image_id < N; ++image_id) {
for (int group_id = 0; group_id < group_; ++group_id) {
math::Im2colNd<uint8_t, CPUMathUtil, StorageOrder::NCHW>()(
Xdata + group_id * X_offset,
image_shape.GetDims().data(),
col_buffer_shape.data(),
C * input_image_size,
col_buffer_size,
kernel_shape.data(),
strides.data(),
dilations.data(),
pads.data(),
static_cast<int>(kernel_shape.size()),
col_buffer_data,
&CPUMathUtil::Instance(),
false,
input_offset_data);
const uint8_t* filter_data_as_uint8 = W->template Data<uint8_t>() + group_id * W_offset;
static const gemmlowp::MapOrder MatOrder = gemmlowp::MapOrder::RowMajor;
gemmlowp::MatrixMap<const std::uint8_t, MatOrder> lhs(
filter_data_as_uint8, static_cast<int>(M / group_), static_cast<int>(kernel_dim));
gemmlowp::MatrixMap<const std::uint8_t, MatOrder> rhs(
col_buffer_data, static_cast<int>(kernel_dim), static_cast<int>(output_image_size));
gemmlowp::MatrixMap<std::uint8_t, MatOrder> result(
Ydata + group_id * Y_offset, static_cast<int>(M / group_), static_cast<int>(output_image_size));
// TODO: worker thread pool needs to be handled.
gemmlowp::GemmContext gemm_context;
if (bias == nullptr) {
auto output_pipeline = MakeOutputPipelineWithOutBias(result_offset_data,
integer_multiplier, right_shift);
gemmlowp::GemmWithOutputPipeline<std::uint8_t, std::uint8_t,
gemmlowp::DefaultL8R8BitDepthParams>(
&gemm_context, lhs, rhs, &result, -filter_offset_data, -input_offset_data,
output_pipeline);
} else {
auto output_pipeline = MakeOutputPipelineWithBias(bias->template Data<int32_t>() + group_id * bias_offset,
static_cast<int>(M / group_), result_offset_data, integer_multiplier, right_shift);
gemmlowp::GemmWithOutputPipeline<std::uint8_t, std::uint8_t,
gemmlowp::DefaultL8R8BitDepthParams>(
&gemm_context, lhs, rhs, &result, -filter_offset_data, -input_offset_data,
output_pipeline);
}
}
Xdata += X_offset * group_;
Ydata += Y_offset * group_;
}
return Status::OK();
}
void QLinearConv::QuantizeMultiplier(float fp_multiplier, std::int32_t* integer_multiplier, int* right_shift) const {
uint32_t* fp_as_bits = reinterpret_cast<uint32_t*>(&fp_multiplier);
auto current_exponent = (*fp_as_bits >> 23);
// bring multiplier in [.5,1) range and calculate the shift
auto bumped_multiplier_as_bits =
(*fp_as_bits & UINT32_C(0x007fffff)) | UINT32_C(0x3f000000);
float* bumped_multiplier =
reinterpret_cast<float*>(&bumped_multiplier_as_bits);
auto shift = 126 - current_exponent;
// convert to fixed point number
std::int64_t int_multiplier =
static_cast<std::int64_t>(std::round(*bumped_multiplier * (1ll << 31)));
*integer_multiplier = static_cast<int32_t>(int_multiplier);
*right_shift = shift;
}
void QLinearConv::ScaleAndZeropointPairValidationHelper(const Tensor* scale, const Tensor* zeropoint) const {
ORT_ENFORCE(scale->Shape().NumDimensions() == 0 ||
(scale->Shape().NumDimensions() == 1 && scale->Shape().GetDims().size() == 1),
"scale must be a scalar");
ORT_ENFORCE(zeropoint->Shape().NumDimensions() == 0 ||
(zeropoint->Shape().NumDimensions() == 1 && zeropoint->Shape().GetDims().size() == 1),
"zeropoint must be a scalar");
}
ONNX_OPERATOR_KERNEL_EX(
QLinearConv,
kMSDomain,
1,
kCpuExecutionProvider,
KernelDefBuilder()
.TypeConstraint("T1", DataTypeImpl::GetTensorType<uint8_t>())
.TypeConstraint("T2", DataTypeImpl::GetTensorType<uint8_t>())
.TypeConstraint("T3", DataTypeImpl::GetTensorType<uint8_t>()),
QLinearConv);
} // namespace contrib
} // namespace onnxruntime

View file

@ -0,0 +1,57 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#pragma once
#include "core/providers/cpu/nn/conv_base.h"
#include "core/util/gemmlowp_common_wrapper.h"
namespace onnxruntime {
namespace contrib {
class QLinearConv : public OpKernel, public ConvBase {
public:
explicit QLinearConv(const OpKernelInfo& info) : OpKernel(info), ConvBase(info) {
}
Status Compute(OpKernelContext* context) const override;
void QuantizeMultiplier(float fp_multiplier, std::int32_t* integer_multiplier, int* right_shift) const;
void ScaleAndZeropointPairValidationHelper(const Tensor* scale, const Tensor* zeropoint) const;
};
typedef gemmlowp::VectorMap<const std::int32_t, gemmlowp::VectorShape::Col> ColVectorMap;
inline std::tuple<gemmlowp::OutputStageBiasAddition<ColVectorMap>,
gemmlowp::OutputStageQuantizeDownInt32ByFixedPoint,
gemmlowp::OutputStageSaturatingCastToUint8>
MakeOutputPipelineWithBias(const int32_t* bias,
int rows,
std::int32_t result_offset,
std::int32_t result_mult_int,
std::int32_t result_shift) {
ColVectorMap bias_vector(bias, rows);
gemmlowp::OutputStageBiasAddition<ColVectorMap> bias_addition_stage;
bias_addition_stage.bias_vector = bias_vector;
gemmlowp::OutputStageQuantizeDownInt32ByFixedPoint quantize_down_stage;
quantize_down_stage.result_offset_after_shift = result_offset;
quantize_down_stage.result_fixedpoint_multiplier = result_mult_int;
quantize_down_stage.result_shift = result_shift;
gemmlowp::OutputStageSaturatingCastToUint8 saturating_cast_stage;
return std::make_tuple(bias_addition_stage, quantize_down_stage, saturating_cast_stage);
}
inline std::tuple<gemmlowp::OutputStageQuantizeDownInt32ByFixedPoint,
gemmlowp::OutputStageSaturatingCastToUint8>
MakeOutputPipelineWithOutBias(std::int32_t result_offset,
std::int32_t result_mult_int,
std::int32_t result_shift) {
gemmlowp::OutputStageQuantizeDownInt32ByFixedPoint quantize_down_stage;
quantize_down_stage.result_offset_after_shift = result_offset;
quantize_down_stage.result_fixedpoint_multiplier = result_mult_int;
quantize_down_stage.result_shift = result_shift;
gemmlowp::OutputStageSaturatingCastToUint8 saturating_cast_stage;
return std::make_tuple(quantize_down_stage, saturating_cast_stage);
}
}
} // namespace onnxruntime

View file

@ -0,0 +1,180 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "gtest/gtest.h"
#include "test/providers/provider_test_utils.h"
using namespace std;
namespace onnxruntime {
namespace test {
namespace {
void FindMinMax(const vector<float>& vec, float* min,
float* max) {
*min = *max = 0;
*min = *std::min_element(vec.begin(), vec.end());
*max = *std::max_element(vec.begin(), vec.end());
}
// uses quantization range 0-255
void FindScaleAndZeroPoint(float min, float max, float* scale, uint8_t* zero_point) {
min = std::min(min, 0.f);
max = std::max(max, 0.f);
float qmin = 0;
float qmax = 255;
*scale = (max - min) / (qmax - qmin);
const auto initial_zero_point = qmin - min / *scale;
*zero_point = static_cast<uint8_t>(std::round(std::max(0.f, std::min(255.f, initial_zero_point))));
}
void Quantize(float scale, uint8_t zero_point,
const std::vector<float>& input, std::vector<uint8_t>* input_quantized) {
for (size_t i = 0; i < input.size(); i++) {
const float clamped_val = std::max(0.f, std::min(255.f, std::round(static_cast<float>(input[i]) / scale) + zero_point));
(*input_quantized)[i] = static_cast<uint8_t>(clamped_val);
}
}
TEST(ConvTest, QLinearConv2DTest) {
OpTester test("QLinearConv", 1, onnxruntime::kMSDomain);
vector<float> X = {0.45246148109436035f, 0.15498268604278564f, 0.11199361085891724f, -0.39421093463897705f,
0.2626858949661255f, 0.13414543867111206f, -0.27184486389160156f, -0.43028733134269714f,
-0.26825493574142456f, 0.3893144130706787f, -0.13631996512413025f, -0.009590476751327515f,
-0.48771554231643677f, -0.25256502628326416f, -0.2812897562980652f, 0.4043201804161072f,
0.07795023918151855f, 0.326981782913208f, 0.13114392757415771f, -0.4416425824165344f,
0.12446999549865723f, 0.36739975214004517f, 0.1698915958404541f, 0.2008744478225708f,
0.23339951038360596f, 0.38613730669021606f, 0.11117297410964966f, 0.3877097964286804f,
0.20812749862670898f, -0.34297940135002136f, -0.029246658086776733f, -0.20483523607254028f,
-0.19244328141212463f, -0.11104947328567505f, -0.32830488681793213f, -0.01800677180290222f,
0.3618946671485901f, -0.40949052572250366f, -0.18248388171195984f, -0.3349453806877136f,
-0.34091079235076904f, 0.006497859954833984f, 0.4537564516067505f, 0.08006560802459717f,
-0.14788749814033508f, 0.034442365169525146f, -0.33322954177856445f, 0.06049239635467529f,
0.42619407176971436f};
vector<int64_t> X_shape = {1, 1, 7, 7};
vector<float> W = {-0.4406261742115021f};
vector<int64_t> W_shape = {1, 1, 1, 1};
auto expected_vals = {-0.19936637580394745f, -0.06828942894935608f, -0.04934731498360634f, 0.17369966208934784f,
-0.11574628204107285f, -0.05910799279808998f, 0.1197819635272026f, 0.18959586322307587f,
0.1182001456618309f, -0.17154212296009064f, 0.06006614491343498f, 0.0042258151806890965f,
0.21490024030208588f, 0.11128675937652588f, 0.12394362688064575f, -0.17815405130386353f,
-0.034346915781497955f, -0.14407673478126526f, -0.05778544768691063f, 0.19459928572177887f,
-0.05484473705291748f, -0.16188594698905945f, -0.07485868036746979f, -0.08851054310798645f,
-0.10284193605184555f, -0.17014220356941223f, -0.04898572340607643f, -0.17083507776260376f,
-0.09170642495155334f, 0.1511256992816925f, 0.012886842712759972f, 0.09025576710700989f,
0.08479554951190948f, 0.0489313043653965f, 0.14465972781181335f, 0.007934254594147205f,
-0.15946026146411896f, 0.1804322451353073f, 0.08040717244148254f, 0.1475857049226761f,
0.15021422505378723f, -0.0028631272725760937f, -0.19993697106838226f, -0.03527900204062462f,
0.06516310572624207f, -0.015176207758486271f, 0.14682966470718384f, -0.02665453404188156f,
-0.18779225647449493f};
vector<int64_t> Y_shape = {1, 1, 7, 7};
// Calculate quantization params and quantize the inputs and expected output
float lhs_min, lhs_max, rhs_min, rhs_max, result_min, result_max;
FindMinMax(X, &lhs_min, &lhs_max);
FindMinMax(W, &rhs_min, &rhs_max);
FindMinMax(expected_vals, &result_min, &result_max);
float lhs_scale, rhs_scale, result_scale;
uint8_t lhs_zero_point, rhs_zero_point, result_zero_point;
FindScaleAndZeroPoint(lhs_min, lhs_max, &lhs_scale, &lhs_zero_point);
FindScaleAndZeroPoint(rhs_min, rhs_max, &rhs_scale, &rhs_zero_point);
FindScaleAndZeroPoint(result_min, result_max, &result_scale, &result_zero_point);
vector<uint8_t> x_quantized(X.size()), w_quantized(W.size()), result_quantized(expected_vals.size());
Quantize(lhs_scale, lhs_zero_point, X, &x_quantized);
Quantize(rhs_scale, rhs_zero_point, W, &w_quantized);
Quantize(result_scale, result_zero_point, expected_vals, &result_quantized);
test.AddInput<uint8_t>("x", X_shape, x_quantized);
test.AddInput<float>("x_scale", {}, {lhs_scale});
test.AddInput<uint8_t>("x_zero_point", {}, {lhs_zero_point});
test.AddInput<uint8_t>("w", W_shape, w_quantized);
test.AddInput<float>("w_scale", {}, {rhs_scale});
test.AddInput<uint8_t>("w_zero_point", {}, {rhs_zero_point});
test.AddInput<float>("y_scale", {}, {result_scale});
test.AddInput<uint8_t>("y_zero_point", {}, {result_zero_point});
test.AddOutput<uint8_t>("y", Y_shape, result_quantized);
test.Run();
}
TEST(ConvTest, QLinearConv3DTest) {
OpTester test("QLinearConv", 1, onnxruntime::kMSDomain);
vector<float> X = {0.010772407054901123f, -0.43806642293930054f, 0.455391526222229f, -0.28657248616218567f,
0.45676887035369873f, -0.0320507287979126f, 0.4229400157928467f, -0.18730869889259338f,
-0.45851585268974304f, 0.042054951190948486f, -0.13332295417785645f, -0.25374430418014526f,
-0.23845627903938293f, 0.12214112281799316f, -0.1778157651424408f, 0.1891845464706421f,
0.37962496280670166f, -0.033982306718826294f, 0.12737131118774414f, -0.040284961462020874f,
0.46427029371261597f, -0.22687292098999023f, 0.17398333549499512f, -0.3014046251773834f,
-0.4043419063091278f, -0.33206477761268616f, 0.04655301570892334f, -0.4947906732559204f,
0.0755157470703125f, 0.1173025369644165f, 0.47043120861053467f, 0.4824737310409546f,
-0.37734976410865784f, -0.056491583585739136f, -0.10790631175041199f, 0.043476223945617676f,
0.24469023942947388f, -0.4100031852722168f, 0.0616222620010376f, 0.2296960949897766f,
0.27883386611938477f, 0.08150351047515869f, 0.2453773021697998f, 0.08250969648361206f,
-0.1471814215183258f, -0.43011274933815f, 0.027180075645446777f, 0.3605625033378601f,
0.24954384565353394f, -0.22505927085876465f, -0.36272895336151123f, -0.47674262523651123f,
0.11275297403335571f, 0.49773406982421875f, 0.2686365246772766f, 0.025525271892547607f,
-0.3037869930267334f, 0.41126757860183716f, 0.36149072647094727f, 0.00883406400680542f,
-0.07959523797035217f, 0.3601323366165161f, 0.17322391271591187f, -0.012007325887680054f};
vector<int64_t> X_shape = {1, 1, 4, 4, 4};
vector<float> W = {0.32824617624282837f};
vector<int64_t> W_shape = {1, 1, 1, 1, 1};
vector<int64_t> Y_shape = {1, 1, 4, 4, 4};
auto expected_vals = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0035360013134777546f, 0.14948052167892456f, 0.0f,
0.0f, -0.15050607919692993f, -0.043762750923633575f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -0.12386361509561539f, -0.03541983291506767f, 0.0f,
0.0f, 0.09152615070343018f, 0.08054415881633759f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
vector<int64_t> pads = {2, 2, 2, 2, 2, 2};
vector<int64_t>strides = {2, 2, 2};
// Calculate quantization params and quantize the inputs and expected output
float lhs_min, lhs_max, rhs_min, rhs_max, result_min, result_max;
FindMinMax(X, &lhs_min, &lhs_max);
FindMinMax(W, &rhs_min, &rhs_max);
FindMinMax(expected_vals, &result_min, &result_max);
float lhs_scale, rhs_scale, result_scale;
uint8_t lhs_zero_point, rhs_zero_point, result_zero_point;
FindScaleAndZeroPoint(lhs_min, lhs_max, &lhs_scale, &lhs_zero_point);
FindScaleAndZeroPoint(rhs_min, rhs_max, &rhs_scale, &rhs_zero_point);
FindScaleAndZeroPoint(result_min, result_max, &result_scale, &result_zero_point);
vector<uint8_t> x_quantized(X.size()), w_quantized(W.size()), result_quantized(expected_vals.size());
Quantize(lhs_scale, lhs_zero_point, X, &x_quantized);
Quantize(rhs_scale, rhs_zero_point, W, &w_quantized);
Quantize(result_scale, result_zero_point, expected_vals, &result_quantized);
test.AddAttribute("pads", pads);
test.AddAttribute("strides", strides);
test.AddInput<uint8_t>("x", X_shape, x_quantized);
test.AddInput<float>("x_scale", {}, {lhs_scale});
test.AddInput<uint8_t>("x_zero_point", {}, {lhs_zero_point});
test.AddInput<uint8_t>("w", W_shape, w_quantized);
test.AddInput<float>("w_scale", {}, {rhs_scale});
test.AddInput<uint8_t>("w_zero_point", {}, {rhs_zero_point});
test.AddInput<float>("y_scale", {}, {result_scale});
test.AddInput<uint8_t>("y_zero_point", {}, {result_zero_point});
test.AddOutput<uint8_t>("y", Y_shape, result_quantized);
test.Run();
}
} // namespace
} // namespace test
} // namespace onnxruntime