mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-01 23:30:35 +00:00
Merge u8u8/u8s8 QLinearConv implementations (#5662)
Combine the u8u8/u8s8 implementations for x86/x64 builds and add special case handling for 1D convolutions.
This commit is contained in:
parent
c875fe0919
commit
182d9c48e4
3 changed files with 548 additions and 501 deletions
|
|
@ -4,7 +4,6 @@
|
|||
#include "core/providers/cpu/cpu_execution_provider.h"
|
||||
#include "core/framework/op_kernel.h"
|
||||
#include "core/framework/kernel_registry.h"
|
||||
#include "core/mlas/inc/mlas.h"
|
||||
|
||||
#ifndef DISABLE_CONTRIB_OPS
|
||||
#include "contrib_ops/cpu/cpu_contrib_kernels.h"
|
||||
|
|
@ -284,8 +283,7 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOn
|
|||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, QLinearMatMul);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, uint8_t, MatMulInteger);
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, ConvInteger);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, uint8_t, QLinearConv);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, int8_t, QLinearConv);
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, QLinearConv);
|
||||
class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 10, Slice);
|
||||
class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 11, Dropout);
|
||||
class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 10, NonMaxSuppression);
|
||||
|
|
@ -989,12 +987,7 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) {
|
|||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, uint8_t,
|
||||
MatMulInteger)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, ConvInteger)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, uint8_t,
|
||||
QLinearConv)>,
|
||||
#if defined(MLAS_TARGET_AMD64_IX86)
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, int8_t,
|
||||
QLinearConv)>,
|
||||
#endif
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, QLinearConv)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 10,
|
||||
Slice)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 11,
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -401,6 +401,7 @@ class QLinearConvOpTester {
|
|||
const int64_t kernel_size = std::accumulate(
|
||||
kernel_shape, kernel_shape + kernel_rank, 1LL, std::multiplies<int64_t>());
|
||||
const int32_t X_zero_point = X_.zero_point_;
|
||||
const int32_t W_zero_point = W_.zero_point_;
|
||||
|
||||
const T1* Xdata = X_.data_.data();
|
||||
T1* Ydata = Y_data.data();
|
||||
|
|
@ -434,7 +435,7 @@ class QLinearConvOpTester {
|
|||
input_offset *= input_shape[axis];
|
||||
input_offset += input_dim;
|
||||
}
|
||||
int32_t w_value = static_cast<int32_t>(*weight_data++);
|
||||
int32_t w_value = static_cast<int32_t>(*weight_data++) - W_zero_point;
|
||||
if (!is_padding) {
|
||||
int32_t x_value = static_cast<int32_t>(input_image[input_offset]) - X_zero_point;
|
||||
sum += x_value * w_value;
|
||||
|
|
@ -507,7 +508,11 @@ class QLinearConvOpTester {
|
|||
}
|
||||
|
||||
void GenerateRandomWeights(const std::vector<int64_t>& shape, float scale, T2 zero_point) {
|
||||
GenerateRandom(W_, shape, scale, zero_point, -63, 63);
|
||||
if (std::is_signed<T2>::value) {
|
||||
GenerateRandom(W_, shape, scale, zero_point, -63, 63);
|
||||
} else {
|
||||
GenerateRandom(W_, shape, scale, zero_point, 0, 255);
|
||||
}
|
||||
}
|
||||
|
||||
void SetWeightScales(const std::vector<float>& scales) {
|
||||
|
|
@ -600,6 +605,15 @@ TEST(QLinearConvTest, Conv2D_U8S8_Pointwise) {
|
|||
test.Run();
|
||||
}
|
||||
|
||||
TEST(QLinearConvTest, Conv2D_U8U8_Pointwise) {
|
||||
QLinearConvOpTester<uint8_t, uint8_t> test;
|
||||
test.GenerateRandomInput({3, 24, 19, 19}, .05f, 4);
|
||||
test.GenerateRandomWeights({32, 24, 1, 1}, .105f, 126);
|
||||
test.GenerateRandomBias();
|
||||
test.SetOutputScaleAndZeroPoint(.75f, 114);
|
||||
test.Run();
|
||||
}
|
||||
|
||||
TEST(QLinearConvTest, Conv3D_U8S8_Pointwise) {
|
||||
QLinearConvOpTester<uint8_t, int8_t> test;
|
||||
test.GenerateRandomInput({2, 2, 15, 11, 6}, .05f, 4);
|
||||
|
|
@ -708,7 +722,7 @@ TEST(QLinearConvTest, Conv2D_U8S8_Groups_PerChannel) {
|
|||
test.Run();
|
||||
}
|
||||
|
||||
TEST(QLinearConvTest, Conv2D_U8S8_Depthwise5x5) {
|
||||
TEST(QLinearConvTest, Conv2D_U8S8_Depthwise) {
|
||||
QLinearConvOpTester<uint8_t, int8_t> test;
|
||||
test.GenerateRandomInput({1, 24, 25, 25}, .03f, 12);
|
||||
test.GenerateRandomWeights({24, 1, 5, 5}, .10f, 0);
|
||||
|
|
@ -719,12 +733,22 @@ TEST(QLinearConvTest, Conv2D_U8S8_Depthwise5x5) {
|
|||
test.Run();
|
||||
}
|
||||
|
||||
TEST(QLinearConvTest, Conv2D_U8S8_Depthwise1x1) {
|
||||
TEST(QLinearConvTest, Conv2D_U8U8_Depthwise) {
|
||||
QLinearConvOpTester<uint8_t, uint8_t> test;
|
||||
test.GenerateRandomInput({1, 30, 25, 25}, .03f, 12);
|
||||
test.GenerateRandomWeights({30, 1, 3, 3}, .10f, 167);
|
||||
test.GenerateRandomBias();
|
||||
test.SetPads({2, 0, 2, 0});
|
||||
test.SetGroups(30);
|
||||
test.SetOutputScaleAndZeroPoint(.76f, 88);
|
||||
test.Run();
|
||||
}
|
||||
|
||||
TEST(QLinearConvTest, Conv2D_U8S8_DepthwisePointwise) {
|
||||
// Tests the combination of using the depthwise convolution path along with the
|
||||
// pointed convolution optimization that avoids im2col.
|
||||
QLinearConvOpTester<uint8_t, int8_t> test;
|
||||
test.GenerateRandomInput({1, 27, 18, 18}, .03f, 12);
|
||||
test.GenerateRandomInput({1, 27, 4, 4}, .03f, 12);
|
||||
test.GenerateRandomWeights({27, 1, 1, 1}, .05f, 0);
|
||||
test.GenerateRandomBias();
|
||||
test.SetGroups(27);
|
||||
|
|
@ -732,6 +756,16 @@ TEST(QLinearConvTest, Conv2D_U8S8_Depthwise1x1) {
|
|||
test.Run();
|
||||
}
|
||||
|
||||
TEST(QLinearConvTest, Conv3D_U8S8_Depthwise) {
|
||||
QLinearConvOpTester<uint8_t, int8_t> test;
|
||||
test.GenerateRandomInput({1, 16, 15, 11, 13}, .02f, 135);
|
||||
test.GenerateRandomWeights({16, 1, 3, 3, 3}, .09f, 0);
|
||||
test.GenerateRandomBias();
|
||||
test.SetGroups(16);
|
||||
test.SetOutputScaleAndZeroPoint(.85f, 112);
|
||||
test.Run();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace
|
||||
|
|
|
|||
Loading…
Reference in a new issue