Merge u8u8/u8s8 QLinearConv implementations (#5662)

Combine the u8u8/u8s8 implementations for x86/x64 builds and add special case handling for 1D convolutions.
2026-07-16 18:31:27 +00:00 · 2020-11-02 21:38:39 -08:00 · 2020-11-02 21:38:39 -08:00 · 182d9c48e4
commit 182d9c48e4
parent c875fe0919
3 changed files with 548 additions and 501 deletions
--- a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc
+++ b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc
@ -4,7 +4,6 @@
 #include "core/providers/cpu/cpu_execution_provider.h"
 #include "core/framework/op_kernel.h"
 #include "core/framework/kernel_registry.h"
-#include "core/mlas/inc/mlas.h"

 #ifndef DISABLE_CONTRIB_OPS
 #include "contrib_ops/cpu/cpu_contrib_kernels.h"
@ -284,8 +283,7 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOn
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, QLinearMatMul);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, uint8_t, MatMulInteger);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, ConvInteger);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, uint8_t, QLinearConv);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, int8_t, QLinearConv);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, QLinearConv);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 10, Slice);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 11, Dropout);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 10, NonMaxSuppression);
@ -989,12 +987,7 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) {
    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, uint8_t,
                                                                MatMulInteger)>,
    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, ConvInteger)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, uint8_t,
-                                                                QLinearConv)>,
-#if defined(MLAS_TARGET_AMD64_IX86)
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, int8_t,
-                                                                QLinearConv)>,
-#endif
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, QLinearConv)>,
    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 10,
                                                                    Slice)>,
    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 11,
--- a/onnxruntime/core/providers/cpu/nn/qlinearconv.cc
+++ b/onnxruntime/core/providers/cpu/nn/qlinearconv.cc
--- a/onnxruntime/test/providers/cpu/nn/qlinearconv_op_test.cc
+++ b/onnxruntime/test/providers/cpu/nn/qlinearconv_op_test.cc
@ -401,6 +401,7 @@ class QLinearConvOpTester {
    const int64_t kernel_size = std::accumulate(
        kernel_shape, kernel_shape + kernel_rank, 1LL, std::multiplies<int64_t>());
    const int32_t X_zero_point = X_.zero_point_;
+    const int32_t W_zero_point = W_.zero_point_;

    const T1* Xdata = X_.data_.data();
    T1* Ydata = Y_data.data();
@ -434,7 +435,7 @@ class QLinearConvOpTester {
                  input_offset *= input_shape[axis];
                  input_offset += input_dim;
                }
-                int32_t w_value = static_cast<int32_t>(*weight_data++);
+                int32_t w_value = static_cast<int32_t>(*weight_data++) - W_zero_point;
                if (!is_padding) {
                  int32_t x_value = static_cast<int32_t>(input_image[input_offset]) - X_zero_point;
                  sum += x_value * w_value;
@ -507,7 +508,11 @@ class QLinearConvOpTester {
  }

  void GenerateRandomWeights(const std::vector<int64_t>& shape, float scale, T2 zero_point) {
-    GenerateRandom(W_, shape, scale, zero_point, -63, 63);
+    if (std::is_signed<T2>::value) {
+      GenerateRandom(W_, shape, scale, zero_point, -63, 63);
+    } else {
+      GenerateRandom(W_, shape, scale, zero_point, 0, 255);
+    }
  }

  void SetWeightScales(const std::vector<float>& scales) {
@ -600,6 +605,15 @@ TEST(QLinearConvTest, Conv2D_U8S8_Pointwise) {
  test.Run();
 }

+TEST(QLinearConvTest, Conv2D_U8U8_Pointwise) {
+  QLinearConvOpTester<uint8_t, uint8_t> test;
+  test.GenerateRandomInput({3, 24, 19, 19}, .05f, 4);
+  test.GenerateRandomWeights({32, 24, 1, 1}, .105f, 126);
+  test.GenerateRandomBias();
+  test.SetOutputScaleAndZeroPoint(.75f, 114);
+  test.Run();
+}
+
 TEST(QLinearConvTest, Conv3D_U8S8_Pointwise) {
  QLinearConvOpTester<uint8_t, int8_t> test;
  test.GenerateRandomInput({2, 2, 15, 11, 6}, .05f, 4);
@ -708,7 +722,7 @@ TEST(QLinearConvTest, Conv2D_U8S8_Groups_PerChannel) {
  test.Run();
 }

-TEST(QLinearConvTest, Conv2D_U8S8_Depthwise5x5) {
+TEST(QLinearConvTest, Conv2D_U8S8_Depthwise) {
  QLinearConvOpTester<uint8_t, int8_t> test;
  test.GenerateRandomInput({1, 24, 25, 25}, .03f, 12);
  test.GenerateRandomWeights({24, 1, 5, 5}, .10f, 0);
@ -719,12 +733,22 @@ TEST(QLinearConvTest, Conv2D_U8S8_Depthwise5x5) {
  test.Run();
 }

-TEST(QLinearConvTest, Conv2D_U8S8_Depthwise1x1) {
+TEST(QLinearConvTest, Conv2D_U8U8_Depthwise) {
+  QLinearConvOpTester<uint8_t, uint8_t> test;
+  test.GenerateRandomInput({1, 30, 25, 25}, .03f, 12);
+  test.GenerateRandomWeights({30, 1, 3, 3}, .10f, 167);
+  test.GenerateRandomBias();
+  test.SetPads({2, 0, 2, 0});
+  test.SetGroups(30);
+  test.SetOutputScaleAndZeroPoint(.76f, 88);
+  test.Run();
+}
+
+TEST(QLinearConvTest, Conv2D_U8S8_DepthwisePointwise) {
  // Tests the combination of using the depthwise convolution path along with the
  // pointed convolution optimization that avoids im2col.
  QLinearConvOpTester<uint8_t, int8_t> test;
  test.GenerateRandomInput({1, 27, 18, 18}, .03f, 12);
-  test.GenerateRandomInput({1, 27, 4, 4}, .03f, 12);
  test.GenerateRandomWeights({27, 1, 1, 1}, .05f, 0);
  test.GenerateRandomBias();
  test.SetGroups(27);
@ -732,6 +756,16 @@ TEST(QLinearConvTest, Conv2D_U8S8_Depthwise1x1) {
  test.Run();
 }

+TEST(QLinearConvTest, Conv3D_U8S8_Depthwise) {
+  QLinearConvOpTester<uint8_t, int8_t> test;
+  test.GenerateRandomInput({1, 16, 15, 11, 13}, .02f, 135);
+  test.GenerateRandomWeights({16, 1, 3, 3, 3}, .09f, 0);
+  test.GenerateRandomBias();
+  test.SetGroups(16);
+  test.SetOutputScaleAndZeroPoint(.85f, 112);
+  test.Run();
+}
+
 #endif

 }  // namespace