Merge u8u8/u8s8 QLinearConv implementations (#5662)

Combine the u8u8/u8s8 implementations for x86/x64 builds and add special case handling for 1D convolutions.
This commit is contained in:
Tracy Sharpe 2020-11-02 21:38:39 -08:00 committed by GitHub
parent c875fe0919
commit 182d9c48e4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 548 additions and 501 deletions

View file

@ -4,7 +4,6 @@
#include "core/providers/cpu/cpu_execution_provider.h"
#include "core/framework/op_kernel.h"
#include "core/framework/kernel_registry.h"
#include "core/mlas/inc/mlas.h"
#ifndef DISABLE_CONTRIB_OPS
#include "contrib_ops/cpu/cpu_contrib_kernels.h"
@ -284,8 +283,7 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOn
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, QLinearMatMul);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, uint8_t, MatMulInteger);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, ConvInteger);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, uint8_t, QLinearConv);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, int8_t, QLinearConv);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, QLinearConv);
class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 10, Slice);
class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 11, Dropout);
class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 10, NonMaxSuppression);
@ -989,12 +987,7 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) {
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, uint8_t,
MatMulInteger)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, ConvInteger)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, uint8_t,
QLinearConv)>,
#if defined(MLAS_TARGET_AMD64_IX86)
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, int8_t,
QLinearConv)>,
#endif
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, QLinearConv)>,
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 10,
Slice)>,
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 11,

File diff suppressed because it is too large Load diff

View file

@ -401,6 +401,7 @@ class QLinearConvOpTester {
const int64_t kernel_size = std::accumulate(
kernel_shape, kernel_shape + kernel_rank, 1LL, std::multiplies<int64_t>());
const int32_t X_zero_point = X_.zero_point_;
const int32_t W_zero_point = W_.zero_point_;
const T1* Xdata = X_.data_.data();
T1* Ydata = Y_data.data();
@ -434,7 +435,7 @@ class QLinearConvOpTester {
input_offset *= input_shape[axis];
input_offset += input_dim;
}
int32_t w_value = static_cast<int32_t>(*weight_data++);
int32_t w_value = static_cast<int32_t>(*weight_data++) - W_zero_point;
if (!is_padding) {
int32_t x_value = static_cast<int32_t>(input_image[input_offset]) - X_zero_point;
sum += x_value * w_value;
@ -507,7 +508,11 @@ class QLinearConvOpTester {
}
void GenerateRandomWeights(const std::vector<int64_t>& shape, float scale, T2 zero_point) {
GenerateRandom(W_, shape, scale, zero_point, -63, 63);
if (std::is_signed<T2>::value) {
GenerateRandom(W_, shape, scale, zero_point, -63, 63);
} else {
GenerateRandom(W_, shape, scale, zero_point, 0, 255);
}
}
void SetWeightScales(const std::vector<float>& scales) {
@ -600,6 +605,15 @@ TEST(QLinearConvTest, Conv2D_U8S8_Pointwise) {
test.Run();
}
TEST(QLinearConvTest, Conv2D_U8U8_Pointwise) {
QLinearConvOpTester<uint8_t, uint8_t> test;
test.GenerateRandomInput({3, 24, 19, 19}, .05f, 4);
test.GenerateRandomWeights({32, 24, 1, 1}, .105f, 126);
test.GenerateRandomBias();
test.SetOutputScaleAndZeroPoint(.75f, 114);
test.Run();
}
TEST(QLinearConvTest, Conv3D_U8S8_Pointwise) {
QLinearConvOpTester<uint8_t, int8_t> test;
test.GenerateRandomInput({2, 2, 15, 11, 6}, .05f, 4);
@ -708,7 +722,7 @@ TEST(QLinearConvTest, Conv2D_U8S8_Groups_PerChannel) {
test.Run();
}
TEST(QLinearConvTest, Conv2D_U8S8_Depthwise5x5) {
TEST(QLinearConvTest, Conv2D_U8S8_Depthwise) {
QLinearConvOpTester<uint8_t, int8_t> test;
test.GenerateRandomInput({1, 24, 25, 25}, .03f, 12);
test.GenerateRandomWeights({24, 1, 5, 5}, .10f, 0);
@ -719,12 +733,22 @@ TEST(QLinearConvTest, Conv2D_U8S8_Depthwise5x5) {
test.Run();
}
TEST(QLinearConvTest, Conv2D_U8S8_Depthwise1x1) {
TEST(QLinearConvTest, Conv2D_U8U8_Depthwise) {
QLinearConvOpTester<uint8_t, uint8_t> test;
test.GenerateRandomInput({1, 30, 25, 25}, .03f, 12);
test.GenerateRandomWeights({30, 1, 3, 3}, .10f, 167);
test.GenerateRandomBias();
test.SetPads({2, 0, 2, 0});
test.SetGroups(30);
test.SetOutputScaleAndZeroPoint(.76f, 88);
test.Run();
}
TEST(QLinearConvTest, Conv2D_U8S8_DepthwisePointwise) {
// Tests the combination of using the depthwise convolution path along with the
// pointed convolution optimization that avoids im2col.
QLinearConvOpTester<uint8_t, int8_t> test;
test.GenerateRandomInput({1, 27, 18, 18}, .03f, 12);
test.GenerateRandomInput({1, 27, 4, 4}, .03f, 12);
test.GenerateRandomWeights({27, 1, 1, 1}, .05f, 0);
test.GenerateRandomBias();
test.SetGroups(27);
@ -732,6 +756,16 @@ TEST(QLinearConvTest, Conv2D_U8S8_Depthwise1x1) {
test.Run();
}
TEST(QLinearConvTest, Conv3D_U8S8_Depthwise) {
QLinearConvOpTester<uint8_t, int8_t> test;
test.GenerateRandomInput({1, 16, 15, 11, 13}, .02f, 135);
test.GenerateRandomWeights({16, 1, 3, 3, 3}, .09f, 0);
test.GenerateRandomBias();
test.SetGroups(16);
test.SetOutputScaleAndZeroPoint(.85f, 112);
test.Run();
}
#endif
} // namespace