QLinearConv (#370)

* First draft QLinearConv * Add shape inference for quantized conv operators * adding test cases for QLinearConv * plus minor corrections
2026-06-30 03:37:44 +00:00 · 2019-01-28 23:13:47 -08:00 · 2019-01-28 23:13:47 -08:00 · b92bc99861
commit b92bc99861
parent 5ef4c90f1d
6 changed files with 603 additions and 3 deletions
--- a/onnxruntime/contrib_ops/contrib_kernels.cc
+++ b/onnxruntime/contrib_ops/contrib_kernels.cc
@ -28,6 +28,7 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MatMu
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, ConvInteger);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, ROIAlign);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double, ROIAlign);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, QLinearConv);

 void RegisterContribKernels(KernelRegistry& kernel_registry) {
  kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, SampleOp)>());
@ -54,6 +55,7 @@ void RegisterContribKernels(KernelRegistry& kernel_registry) {
  kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, ConvInteger)>());
  kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, ROIAlign)>());
  kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double, ROIAlign)>());
+  kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, QLinearConv)>());
 }

 }  // namespace contrib
--- a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc
+++ b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc
@ -91,6 +91,134 @@ void matmulShapeInference(ONNX_NAMESPACE::InferenceContext& ctx, int input1Idx,
  *ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape() = resultShape;
 }

+void convPoolShapeInference(
+    ONNX_NAMESPACE::InferenceContext& ctx,
+    bool use_dilation,
+    bool require_kernel_shape,
+    int input1Idx, int input2Idx) {
+  if (!hasInputShape(ctx, input1Idx)) {
+    return;
+  }
+
+  // if kernel shape is an input (and not attribute)
+  // we need the shape of the second input.
+  if (!require_kernel_shape && !hasNInputShapes(ctx, input2Idx)) {
+    return;
+  }
+
+  // don't bother with legacy auto_pad for now
+  if (ctx.getAttribute("auto_pad")) {
+    return;
+  }
+
+  auto input_shape = ctx.getInputType(input1Idx)->tensor_type().shape();
+  if (input_shape.dim_size() < 2) {
+    fail_shape_inference("Input tensor must have atleast 2 dimensions");
+  }
+
+  // first dim is the batch axis and the next is the number of channels.
+  size_t n_input_dims = static_cast<size_t>(input_shape.dim_size() - 2);
+
+  // Pooling operations don't support dilation, only Conv. For
+  // simplicity of the code, we just treat them as having all-1s
+  // dilation.
+  std::vector<int64_t> dilations;
+  if (use_dilation && getRepeatedAttribute(ctx, "dilations", dilations)) {
+    if (dilations.size() != n_input_dims) {
+      fail_shape_inference("Attribute dilations has incorrect size");
+    }
+  } else {
+    dilations.assign(n_input_dims, 1);
+  }
+
+  int64_t groups = getAttribute(ctx, "group", 1);
+  if (groups != 1) {
+    return;  // we don't handle the group case.
+  }
+
+  std::vector<int64_t> pads;
+  if (getRepeatedAttribute(ctx, "pads", pads)) {
+    if (pads.size() != n_input_dims * 2) {
+      fail_shape_inference("Attribute pads has incorrect size");
+    }
+  } else {
+    pads.assign(n_input_dims * 2, 0);
+  }
+
+  std::vector<int64_t> strides;
+  if (getRepeatedAttribute(ctx, "strides", strides)) {
+    if (strides.size() != n_input_dims) {
+      fail_shape_inference("Attribute strides has incorrect size");
+    }
+  } else {
+    strides.assign(n_input_dims, 1);
+  }
+
+  std::vector<int64_t> kernel_shape;
+  if (getRepeatedAttribute(ctx, "kernel_shape", kernel_shape)) {
+    if (kernel_shape.size() != n_input_dims) {
+      fail_shape_inference("Attribute kernel_shape has incorrect size");
+    }
+  } else if (require_kernel_shape) {
+    fail_shape_inference("Attribute kernel_shape must be specified");
+  } else {
+    auto second_input_shape = ctx.getInputType(input2Idx)->tensor_type().shape();
+    for (int i = 2; i < second_input_shape.dim_size(); ++i) {
+      if (!second_input_shape.dim(i).has_dim_value()) {
+        return;
+      }
+      kernel_shape.push_back(second_input_shape.dim(i).dim_value());
+    }
+  }
+
+  auto output_shape =
+      ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape();
+
+  if (require_kernel_shape) {
+    // add the first two dimensions from the input.
+    *output_shape->add_dim() = input_shape.dim(0);
+    *output_shape->add_dim() = input_shape.dim(1);
+  } else {
+    *output_shape->add_dim() = input_shape.dim(0);
+    auto& second_input_shape = getInputShape(ctx, 1);
+    if (second_input_shape.dim_size() < 1) {
+      fail_shape_inference("Second input tensor has wrong dimension");
+    }
+    *output_shape->add_dim() = second_input_shape.dim(0);
+  }
+
+  int kernel_shape_size = static_cast<int>(kernel_shape.size());
+  for (int i = 0; i < kernel_shape_size; ++i) {
+    auto newdim = output_shape->add_dim();
+    if (!input_shape.dim(2 + i).has_dim_value()) {
+      continue;
+    }
+    // how big is the input, including padding
+    int64_t effective_input_size = input_shape.dim(2 + i).dim_value();
+    effective_input_size += pads[i];
+    effective_input_size += pads[i + kernel_shape_size];
+
+    int64_t effective_kernel_size = kernel_shape[i];
+    // accounting for dilation, how big is the kernel in this dimension
+    effective_kernel_size = (effective_kernel_size - 1) * dilations[i] + 1;
+
+    // how many times we can move the kernel from it's initial position, based
+    // on the stride
+    int64_t strided_kernel_positions =
+        (effective_input_size - effective_kernel_size) / strides[i];
+
+    // add in the initial position
+    newdim->set_dim_value(1 + strided_kernel_positions);
+  }
+
+  if (ctx.getNumOutputs() > 1) {
+    // MaxPool with two outputs case.
+    auto second_output_shape =
+        ctx.getOutputType(1)->mutable_tensor_type()->mutable_shape();
+    second_output_shape->CopyFrom(*output_shape);
+  }
+}
+
 void RegisterContribSchemas() {
  ONNX_CONTRIB_OPERATOR_SCHEMA(SampleOp)
      .SetDomain(kMSDomain)
@ -567,7 +695,31 @@ if the input is 8 bits or in 64 bits if the input is 16 bits.)DOC")
          "group",
          "number of groups input channels and output channels are divided into. default is 1.",
          AttributeProto::INT,
-          static_cast<int64_t>(1));
+          static_cast<int64_t>(1))
+      .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
+        auto x_type = ctx.getInputType(0);
+        auto w_type = ctx.getInputType(3);
+        auto y_type = ctx.getOutputType(0);
+        if (nullptr == x_type || nullptr == w_type || nullptr == y_type ||
+            x_type->value_case() != ONNX_NAMESPACE::TypeProto::kTensorType ||
+            w_type->value_case() != ONNX_NAMESPACE::TypeProto::kTensorType) {
+          fail_type_inference(
+              "inputs are expected to have tensor type and output type should not be null.");
+        }
+
+        if (ONNX_NAMESPACE::TensorProto::UINT8 == x_type->tensor_type().elem_type() &&
+            ONNX_NAMESPACE::TensorProto::UINT8 == w_type->tensor_type().elem_type()) {
+          y_type->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto::UINT8);
+        } else {
+          y_type->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto::INT8);
+        }
+
+        convPoolShapeInference(ctx, true, false, 0, 3);
+      });
+      
+    
+          
+

  ONNX_CONTRIB_OPERATOR_SCHEMA(ConvInteger)
      .SetDomain(kMSDomain)
@ -660,7 +812,23 @@ The integer convolution operator consumes an input tensor, a filter, and a paddi
          "group",
          "number of groups input channels and output channels are divided into. default is 1.",
          AttributeProto::INT,
-          static_cast<int64_t>(1));
+          static_cast<int64_t>(1))
+      .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
+        auto x_type = ctx.getInputType(0);
+        auto w_type = ctx.getInputType(1);
+        auto y_type = ctx.getOutputType(0);
+        if (nullptr == x_type || nullptr == w_type || nullptr == y_type ||
+            x_type->value_case() != ONNX_NAMESPACE::TypeProto::kTensorType ||
+            w_type->value_case() != ONNX_NAMESPACE::TypeProto::kTensorType) {
+          fail_type_inference(
+              "inputs are expected to have tensor type and output type should not be null.");
+        }
+
+        // Right now we only support int32
+        y_type->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto::INT32);
+
+        convPoolShapeInference(ctx, true, false, 0, 1);
+      });

  ONNX_CONTRIB_OPERATOR_SCHEMA(MatMulInteger)
      .SetDomain(kMSDomain)
--- a/onnxruntime/core/providers/cpu/nn/conv_integer.cc
+++ b/onnxruntime/core/providers/cpu/nn/conv_integer.cc
@ -118,7 +118,7 @@ Status ConvInteger::Compute(OpKernelContext* context) const {
      gemmlowp::MatrixMap<const std::uint8_t, RhsOrder> rhs(
          col_buffer_data, static_cast<int>(kernel_dim), static_cast<int>(output_image_size));
      gemmlowp::MatrixMap<std::int32_t, ResultOrder> result(
-          Ydata, static_cast<int>(M / group_), static_cast<int>(output_image_size));
+          Ydata + group_id * Y_offset, static_cast<int>(M / group_), static_cast<int>(output_image_size));
      const std::tuple<> empty_pipeline = {};

      gemmlowp::GemmContext gemm_context;
--- a/onnxruntime/core/providers/cpu/nn/qlinearconv.cc
+++ b/onnxruntime/core/providers/cpu/nn/qlinearconv.cc
@ -0,0 +1,193 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#ifdef _MSC_VER
+#pragma warning(disable : 4244)
+#pragma warning(disable : 4267)
+#endif
+
+#include "core/providers/cpu/nn/qlinearconv.h"
+#include "core/util/math.h"
+#include "core/util/math_cpuonly.h"
+
+namespace onnxruntime {
+namespace contrib {
+
+Status QLinearConv::Compute(OpKernelContext* context) const {
+  const Tensor* X = context->Input<Tensor>(0);
+  const Tensor* W = context->Input<Tensor>(3);
+
+  // validate scale and zero points
+  auto input_scale = context->Input<Tensor>(1);
+  auto input_offset = context->Input<Tensor>(2);
+  ScaleAndZeropointPairValidationHelper(input_scale, input_offset);
+  auto filter_scale = context->Input<Tensor>(4);
+  auto filter_offset = context->Input<Tensor>(5);
+  ScaleAndZeropointPairValidationHelper(filter_scale, filter_offset);
+  auto result_scale = context->Input<Tensor>(6);
+  auto result_offset = context->Input<Tensor>(7);
+  ScaleAndZeropointPairValidationHelper(result_scale, result_offset);
+
+  auto input_scale_data = *(input_scale->template Data<float>());
+  auto filter_scale_data = *(filter_scale->template Data<float>());
+  auto result_scale_data = *(result_scale->template Data<float>());
+
+  auto input_offset_data = *(input_offset->template Data<uint8_t>());
+  auto filter_offset_data = *(filter_offset->template Data<uint8_t>());
+  auto result_offset_data = *(result_offset->template Data<uint8_t>());
+
+  const float real_multiplier = (input_scale_data * filter_scale_data) / result_scale_data;
+  int32_t integer_multiplier;
+  int right_shift;
+  QuantizeMultiplier(real_multiplier, &integer_multiplier, &right_shift);
+
+  size_t num_inputs = OpKernel::Node().InputDefs().size();
+  const Tensor* bias = nullptr;
+  if (num_inputs == 9) {
+    bias = context->Input<Tensor>(8);
+  }  
+
+  const int64_t N = X->Shape()[0];
+  const int64_t C = X->Shape()[1];
+  const int64_t M = W->Shape()[0];
+  ORT_RETURN_IF_ERROR(ValidateInputShape(X, W));
+
+  std::vector<int64_t> kernel_shape;
+  ORT_RETURN_IF_ERROR(ComputeKernelShape(W->Shape(), kernel_shape));
+
+  std::vector<int64_t> pads(pads_);
+  if (pads.empty()) {
+    pads.resize(kernel_shape.size() * 2, 0);
+  }
+  std::vector<int64_t> dilations(dilations_);
+  if (dilations.empty()) {
+    dilations.resize(kernel_shape.size(), 1);
+  }
+  std::vector<int64_t> strides(strides_);
+  if (strides.empty()) {
+    strides.resize(kernel_shape.size(), 1);
+  }
+
+  std::vector<int64_t> Y_dims;
+  Y_dims.insert(Y_dims.begin(), {N, M});
+  TensorShape input_shape = X->Shape().Slice(2);
+  ORT_RETURN_IF_ERROR(InferOutputShape(input_shape, kernel_shape, strides, dilations, &pads, &Y_dims));
+  Tensor* Y = context->Output(0, TensorShape(Y_dims));
+  TensorShape output_shape = Y->Shape().Slice(2);
+
+  AllocatorPtr alloc;
+  ORT_RETURN_IF_ERROR(context->GetTempSpaceAllocator(&alloc));
+
+  const uint8_t* Xdata = X->template Data<uint8_t>();
+  uint8_t* Ydata = Y->template MutableData<uint8_t>();
+
+  const int64_t input_image_size = input_shape.Size();
+  const int64_t output_image_size = output_shape.Size();
+  const int64_t kernel_size = TensorShape(kernel_shape).Size();
+  const int64_t X_offset = C / group_ * input_image_size;
+  const int64_t Y_offset = Y->Shape().Size() / Y->Shape()[0] / group_;
+  const int64_t W_offset = W->Shape().Size() / group_;  
+  const int64_t kernel_dim = C / group_ * kernel_size;
+  const int64_t col_buffer_size = kernel_dim * output_image_size;
+  const int bias_offset = static_cast<int>(M / group_);
+
+  auto col_data = alloc->Alloc(sizeof(uint8_t) * col_buffer_size);
+  BufferUniquePtr col_buffer(col_data, BufferDeleter(alloc));
+  uint8_t* col_buffer_data = static_cast<uint8_t*>(col_buffer.get());
+
+  TensorShape image_shape = X->Shape().Slice(1);
+  std::vector<int64_t> col_buffer_shape{kernel_dim};
+  col_buffer_shape.insert(col_buffer_shape.end(), output_shape.GetDims().begin(),
+                          output_shape.GetDims().end());
+
+  for (int image_id = 0; image_id < N; ++image_id) {
+    for (int group_id = 0; group_id < group_; ++group_id) {
+      math::Im2colNd<uint8_t, CPUMathUtil, StorageOrder::NCHW>()(
+          Xdata + group_id * X_offset,
+          image_shape.GetDims().data(),
+          col_buffer_shape.data(),
+          C * input_image_size,
+          col_buffer_size,
+          kernel_shape.data(),
+          strides.data(),
+          dilations.data(),
+          pads.data(),
+          static_cast<int>(kernel_shape.size()),
+          col_buffer_data,
+          &CPUMathUtil::Instance(),
+		  false,
+          input_offset_data);
+
+      const uint8_t* filter_data_as_uint8 = W->template Data<uint8_t>() + group_id * W_offset;
+      static const gemmlowp::MapOrder MatOrder = gemmlowp::MapOrder::RowMajor;
+      gemmlowp::MatrixMap<const std::uint8_t, MatOrder> lhs(
+          filter_data_as_uint8, static_cast<int>(M / group_), static_cast<int>(kernel_dim));
+      gemmlowp::MatrixMap<const std::uint8_t, MatOrder> rhs(
+          col_buffer_data, static_cast<int>(kernel_dim), static_cast<int>(output_image_size));
+      gemmlowp::MatrixMap<std::uint8_t, MatOrder> result(
+          Ydata + group_id * Y_offset, static_cast<int>(M / group_), static_cast<int>(output_image_size));
+
+      // TODO: worker thread pool needs to be handled.
+      gemmlowp::GemmContext gemm_context;
+      if (bias == nullptr) {
+        auto output_pipeline = MakeOutputPipelineWithOutBias(result_offset_data, 
+            integer_multiplier, right_shift);
+        gemmlowp::GemmWithOutputPipeline<std::uint8_t, std::uint8_t,
+                                         gemmlowp::DefaultL8R8BitDepthParams>(
+            &gemm_context, lhs, rhs, &result, -filter_offset_data, -input_offset_data,
+            output_pipeline);        
+      } else {
+        auto output_pipeline = MakeOutputPipelineWithBias(bias->template Data<int32_t>() + group_id * bias_offset, 
+            static_cast<int>(M / group_), result_offset_data, integer_multiplier, right_shift);
+        gemmlowp::GemmWithOutputPipeline<std::uint8_t, std::uint8_t,
+                                         gemmlowp::DefaultL8R8BitDepthParams>(
+            &gemm_context, lhs, rhs, &result, -filter_offset_data, -input_offset_data,
+            output_pipeline);
+      }      
+    }
+
+    Xdata += X_offset * group_;
+    Ydata += Y_offset * group_;
+  }
+
+  return Status::OK();
+}
+
+void QLinearConv::QuantizeMultiplier(float fp_multiplier, std::int32_t* integer_multiplier, int* right_shift) const {
+  uint32_t* fp_as_bits = reinterpret_cast<uint32_t*>(&fp_multiplier);
+  auto current_exponent = (*fp_as_bits >> 23);
+  // bring multiplier in [.5,1) range and calculate the shift
+  auto bumped_multiplier_as_bits =
+      (*fp_as_bits & UINT32_C(0x007fffff)) | UINT32_C(0x3f000000);
+  float* bumped_multiplier =
+      reinterpret_cast<float*>(&bumped_multiplier_as_bits);
+  auto shift = 126 - current_exponent;
+  // convert to fixed point number
+  std::int64_t int_multiplier =
+      static_cast<std::int64_t>(std::round(*bumped_multiplier * (1ll << 31)));
+
+  *integer_multiplier = static_cast<int32_t>(int_multiplier);
+  *right_shift = shift;
+}
+
+void QLinearConv::ScaleAndZeropointPairValidationHelper(const Tensor* scale, const Tensor* zeropoint) const {
+  ORT_ENFORCE(scale->Shape().NumDimensions() == 0 ||
+                  (scale->Shape().NumDimensions() == 1 && scale->Shape().GetDims().size() == 1),
+              "scale must be a scalar");
+  ORT_ENFORCE(zeropoint->Shape().NumDimensions() == 0 ||
+                  (zeropoint->Shape().NumDimensions() == 1 && zeropoint->Shape().GetDims().size() == 1),
+              "zeropoint must be a scalar");
+}
+
+ONNX_OPERATOR_KERNEL_EX(
+    QLinearConv,
+	kMSDomain,
+    1,
+	kCpuExecutionProvider,
+    KernelDefBuilder()
+	.TypeConstraint("T1", DataTypeImpl::GetTensorType<uint8_t>())
+	.TypeConstraint("T2", DataTypeImpl::GetTensorType<uint8_t>())
+	.TypeConstraint("T3", DataTypeImpl::GetTensorType<uint8_t>()),
+    QLinearConv);
+}  // namespace contrib
+}  // namespace onnxruntime
--- a/onnxruntime/core/providers/cpu/nn/qlinearconv.h
+++ b/onnxruntime/core/providers/cpu/nn/qlinearconv.h
@ -0,0 +1,57 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "core/providers/cpu/nn/conv_base.h"
+#include "core/util/gemmlowp_common_wrapper.h"
+
+namespace onnxruntime {
+namespace contrib {
+class QLinearConv : public OpKernel, public ConvBase {
+ public:
+  explicit QLinearConv(const OpKernelInfo& info) : OpKernel(info), ConvBase(info) {
+  }
+
+  Status Compute(OpKernelContext* context) const override;
+
+  void QuantizeMultiplier(float fp_multiplier, std::int32_t* integer_multiplier, int* right_shift) const;
+
+  void ScaleAndZeropointPairValidationHelper(const Tensor* scale, const Tensor* zeropoint) const;  
+};
+
+typedef gemmlowp::VectorMap<const std::int32_t, gemmlowp::VectorShape::Col> ColVectorMap;
+
+inline std::tuple<gemmlowp::OutputStageBiasAddition<ColVectorMap>,
+                  gemmlowp::OutputStageQuantizeDownInt32ByFixedPoint,
+                  gemmlowp::OutputStageSaturatingCastToUint8>
+MakeOutputPipelineWithBias(const int32_t* bias,
+                           int rows,
+                           std::int32_t result_offset,
+                           std::int32_t result_mult_int,
+                           std::int32_t result_shift) {
+  ColVectorMap bias_vector(bias, rows);
+  gemmlowp::OutputStageBiasAddition<ColVectorMap> bias_addition_stage;
+  bias_addition_stage.bias_vector = bias_vector;
+  gemmlowp::OutputStageQuantizeDownInt32ByFixedPoint quantize_down_stage;
+  quantize_down_stage.result_offset_after_shift = result_offset;
+  quantize_down_stage.result_fixedpoint_multiplier = result_mult_int;
+  quantize_down_stage.result_shift = result_shift;
+  gemmlowp::OutputStageSaturatingCastToUint8 saturating_cast_stage;
+  return std::make_tuple(bias_addition_stage, quantize_down_stage, saturating_cast_stage);
+}
+
+inline std::tuple<gemmlowp::OutputStageQuantizeDownInt32ByFixedPoint,
+                  gemmlowp::OutputStageSaturatingCastToUint8>
+MakeOutputPipelineWithOutBias(std::int32_t result_offset,
+                              std::int32_t result_mult_int,
+                              std::int32_t result_shift) {
+  gemmlowp::OutputStageQuantizeDownInt32ByFixedPoint quantize_down_stage;
+  quantize_down_stage.result_offset_after_shift = result_offset;
+  quantize_down_stage.result_fixedpoint_multiplier = result_mult_int;
+  quantize_down_stage.result_shift = result_shift;
+  gemmlowp::OutputStageSaturatingCastToUint8 saturating_cast_stage;
+  return std::make_tuple(quantize_down_stage, saturating_cast_stage);
+}
+}
+}  // namespace onnxruntime
--- a/onnxruntime/test/providers/cpu/nn/qlinearconv_op_test.cc
+++ b/onnxruntime/test/providers/cpu/nn/qlinearconv_op_test.cc
@ -0,0 +1,180 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "gtest/gtest.h"
+#include "test/providers/provider_test_utils.h"
+using namespace std;
+namespace onnxruntime {
+namespace test {
+
+namespace {
+
+void FindMinMax(const vector<float>& vec, float* min,
+                float* max) {
+  *min = *max = 0;
+  *min = *std::min_element(vec.begin(), vec.end());
+  *max = *std::max_element(vec.begin(), vec.end());
+}
+
+// uses quantization range 0-255
+void FindScaleAndZeroPoint(float min, float max, float* scale, uint8_t* zero_point) {
+  min = std::min(min, 0.f);
+  max = std::max(max, 0.f);
+  float qmin = 0;
+  float qmax = 255;
+
+  *scale = (max - min) / (qmax - qmin);
+  const auto initial_zero_point = qmin - min / *scale;
+  *zero_point = static_cast<uint8_t>(std::round(std::max(0.f, std::min(255.f, initial_zero_point))));
+}
+
+void Quantize(float scale, uint8_t zero_point,
+              const std::vector<float>& input, std::vector<uint8_t>* input_quantized) {
+  for (size_t i = 0; i < input.size(); i++) {
+    const float clamped_val = std::max(0.f, std::min(255.f, std::round(static_cast<float>(input[i]) / scale) + zero_point));
+    (*input_quantized)[i] = static_cast<uint8_t>(clamped_val);
+  }
+}
+
+TEST(ConvTest, QLinearConv2DTest) {
+  OpTester test("QLinearConv", 1, onnxruntime::kMSDomain);
+
+  vector<float> X = {0.45246148109436035f, 0.15498268604278564f, 0.11199361085891724f, -0.39421093463897705f,
+                     0.2626858949661255f, 0.13414543867111206f, -0.27184486389160156f, -0.43028733134269714f,
+                     -0.26825493574142456f, 0.3893144130706787f, -0.13631996512413025f, -0.009590476751327515f,
+                     -0.48771554231643677f, -0.25256502628326416f, -0.2812897562980652f, 0.4043201804161072f,
+                     0.07795023918151855f, 0.326981782913208f, 0.13114392757415771f, -0.4416425824165344f,
+                     0.12446999549865723f, 0.36739975214004517f, 0.1698915958404541f, 0.2008744478225708f,
+                     0.23339951038360596f, 0.38613730669021606f, 0.11117297410964966f, 0.3877097964286804f,
+                     0.20812749862670898f, -0.34297940135002136f, -0.029246658086776733f, -0.20483523607254028f,
+                     -0.19244328141212463f, -0.11104947328567505f, -0.32830488681793213f, -0.01800677180290222f,
+                     0.3618946671485901f, -0.40949052572250366f, -0.18248388171195984f, -0.3349453806877136f,
+                     -0.34091079235076904f, 0.006497859954833984f, 0.4537564516067505f, 0.08006560802459717f,
+                     -0.14788749814033508f, 0.034442365169525146f, -0.33322954177856445f, 0.06049239635467529f,
+                     0.42619407176971436f};
+  vector<int64_t> X_shape = {1, 1, 7, 7};
+
+  vector<float> W = {-0.4406261742115021f};
+  vector<int64_t> W_shape = {1, 1, 1, 1};
+
+  auto expected_vals = {-0.19936637580394745f, -0.06828942894935608f, -0.04934731498360634f, 0.17369966208934784f,
+                        -0.11574628204107285f, -0.05910799279808998f, 0.1197819635272026f, 0.18959586322307587f,
+                        0.1182001456618309f, -0.17154212296009064f, 0.06006614491343498f, 0.0042258151806890965f,
+                        0.21490024030208588f, 0.11128675937652588f, 0.12394362688064575f, -0.17815405130386353f,
+                        -0.034346915781497955f, -0.14407673478126526f, -0.05778544768691063f, 0.19459928572177887f,
+                        -0.05484473705291748f, -0.16188594698905945f, -0.07485868036746979f, -0.08851054310798645f,
+                        -0.10284193605184555f, -0.17014220356941223f, -0.04898572340607643f, -0.17083507776260376f,
+                        -0.09170642495155334f, 0.1511256992816925f, 0.012886842712759972f, 0.09025576710700989f,
+                        0.08479554951190948f, 0.0489313043653965f, 0.14465972781181335f, 0.007934254594147205f,
+                        -0.15946026146411896f, 0.1804322451353073f, 0.08040717244148254f, 0.1475857049226761f,
+                        0.15021422505378723f, -0.0028631272725760937f, -0.19993697106838226f, -0.03527900204062462f,
+                        0.06516310572624207f, -0.015176207758486271f, 0.14682966470718384f, -0.02665453404188156f,
+                        -0.18779225647449493f};
+  vector<int64_t> Y_shape = {1, 1, 7, 7};
+
+  // Calculate quantization params and quantize the inputs and expected output
+  float lhs_min, lhs_max, rhs_min, rhs_max, result_min, result_max;
+  FindMinMax(X, &lhs_min, &lhs_max);
+  FindMinMax(W, &rhs_min, &rhs_max);
+  FindMinMax(expected_vals, &result_min, &result_max);
+
+  float lhs_scale, rhs_scale, result_scale;
+  uint8_t lhs_zero_point, rhs_zero_point, result_zero_point;
+  FindScaleAndZeroPoint(lhs_min, lhs_max, &lhs_scale, &lhs_zero_point);
+  FindScaleAndZeroPoint(rhs_min, rhs_max, &rhs_scale, &rhs_zero_point);
+  FindScaleAndZeroPoint(result_min, result_max, &result_scale, &result_zero_point);
+
+  vector<uint8_t> x_quantized(X.size()), w_quantized(W.size()), result_quantized(expected_vals.size());
+  Quantize(lhs_scale, lhs_zero_point, X, &x_quantized);
+  Quantize(rhs_scale, rhs_zero_point, W, &w_quantized);
+  Quantize(result_scale, result_zero_point, expected_vals, &result_quantized);
+
+  test.AddInput<uint8_t>("x", X_shape, x_quantized);
+  test.AddInput<float>("x_scale", {}, {lhs_scale});
+  test.AddInput<uint8_t>("x_zero_point", {}, {lhs_zero_point});
+
+  test.AddInput<uint8_t>("w", W_shape, w_quantized);
+  test.AddInput<float>("w_scale", {}, {rhs_scale});
+  test.AddInput<uint8_t>("w_zero_point", {}, {rhs_zero_point});
+
+  test.AddInput<float>("y_scale", {}, {result_scale});
+  test.AddInput<uint8_t>("y_zero_point", {}, {result_zero_point});
+
+  test.AddOutput<uint8_t>("y", Y_shape, result_quantized);
+
+  test.Run();
+}
+
+TEST(ConvTest, QLinearConv3DTest) {
+  OpTester test("QLinearConv", 1, onnxruntime::kMSDomain);
+
+  vector<float> X = {0.010772407054901123f, -0.43806642293930054f, 0.455391526222229f, -0.28657248616218567f,
+                     0.45676887035369873f, -0.0320507287979126f, 0.4229400157928467f, -0.18730869889259338f,
+                     -0.45851585268974304f, 0.042054951190948486f, -0.13332295417785645f, -0.25374430418014526f,
+                     -0.23845627903938293f, 0.12214112281799316f, -0.1778157651424408f, 0.1891845464706421f,
+                     0.37962496280670166f, -0.033982306718826294f, 0.12737131118774414f, -0.040284961462020874f,
+                     0.46427029371261597f, -0.22687292098999023f, 0.17398333549499512f, -0.3014046251773834f,
+                     -0.4043419063091278f, -0.33206477761268616f, 0.04655301570892334f, -0.4947906732559204f,
+                     0.0755157470703125f, 0.1173025369644165f, 0.47043120861053467f, 0.4824737310409546f,
+                     -0.37734976410865784f, -0.056491583585739136f, -0.10790631175041199f, 0.043476223945617676f,
+                     0.24469023942947388f, -0.4100031852722168f, 0.0616222620010376f, 0.2296960949897766f,
+                     0.27883386611938477f, 0.08150351047515869f, 0.2453773021697998f, 0.08250969648361206f,
+                     -0.1471814215183258f, -0.43011274933815f, 0.027180075645446777f, 0.3605625033378601f,
+                     0.24954384565353394f, -0.22505927085876465f, -0.36272895336151123f, -0.47674262523651123f,
+                     0.11275297403335571f, 0.49773406982421875f, 0.2686365246772766f, 0.025525271892547607f,
+                     -0.3037869930267334f, 0.41126757860183716f, 0.36149072647094727f, 0.00883406400680542f,
+                     -0.07959523797035217f, 0.3601323366165161f, 0.17322391271591187f, -0.012007325887680054f};
+  vector<int64_t> X_shape = {1, 1, 4, 4, 4};
+  vector<float> W = {0.32824617624282837f};
+  vector<int64_t> W_shape = {1, 1, 1, 1, 1};
+  vector<int64_t> Y_shape = {1, 1, 4, 4, 4};
+  auto expected_vals = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+                        0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+                        0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0035360013134777546f, 0.14948052167892456f, 0.0f,
+                        0.0f, -0.15050607919692993f, -0.043762750923633575f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+                        0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -0.12386361509561539f, -0.03541983291506767f, 0.0f,
+                        0.0f, 0.09152615070343018f, 0.08054415881633759f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+                        0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+                        0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
+
+  vector<int64_t> pads = {2, 2, 2, 2, 2, 2};
+  vector<int64_t>strides = {2, 2, 2};
+
+  // Calculate quantization params and quantize the inputs and expected output
+  float lhs_min, lhs_max, rhs_min, rhs_max, result_min, result_max;
+  FindMinMax(X, &lhs_min, &lhs_max);
+  FindMinMax(W, &rhs_min, &rhs_max);
+  FindMinMax(expected_vals, &result_min, &result_max);
+
+  float lhs_scale, rhs_scale, result_scale;
+  uint8_t lhs_zero_point, rhs_zero_point, result_zero_point;
+  FindScaleAndZeroPoint(lhs_min, lhs_max, &lhs_scale, &lhs_zero_point);
+  FindScaleAndZeroPoint(rhs_min, rhs_max, &rhs_scale, &rhs_zero_point);
+  FindScaleAndZeroPoint(result_min, result_max, &result_scale, &result_zero_point);
+
+  vector<uint8_t> x_quantized(X.size()), w_quantized(W.size()), result_quantized(expected_vals.size());
+  Quantize(lhs_scale, lhs_zero_point, X, &x_quantized);
+  Quantize(rhs_scale, rhs_zero_point, W, &w_quantized);
+  Quantize(result_scale, result_zero_point, expected_vals, &result_quantized);
+
+  test.AddAttribute("pads", pads);
+  test.AddAttribute("strides", strides);
+
+  test.AddInput<uint8_t>("x", X_shape, x_quantized);
+  test.AddInput<float>("x_scale", {}, {lhs_scale});
+  test.AddInput<uint8_t>("x_zero_point", {}, {lhs_zero_point});
+
+  test.AddInput<uint8_t>("w", W_shape, w_quantized);
+  test.AddInput<float>("w_scale", {}, {rhs_scale});
+  test.AddInput<uint8_t>("w_zero_point", {}, {rhs_zero_point});
+
+  test.AddInput<float>("y_scale", {}, {result_scale});
+  test.AddInput<uint8_t>("y_zero_point", {}, {result_zero_point});
+
+  test.AddOutput<uint8_t>("y", Y_shape, result_quantized);
+
+  test.Run();      
+}
+}  // namespace
+}  // namespace test
+}  // namespace onnxruntime