mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-01 23:30:35 +00:00
Add int32_t support for DeQuantizeLinear (#5994)
* Add int32_t support for DeQuantizeLinear * DequantizeLinear with int32 should have not zero point
This commit is contained in:
parent
c727a28735
commit
4fdfbfd4b4
3 changed files with 48 additions and 4 deletions
|
|
@ -289,6 +289,7 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOn
|
|||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, ThresholdedRelu);
|
||||
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 12, uint8_t, DequantizeLinear);
|
||||
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 12, int8_t, DequantizeLinear);
|
||||
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 12, int32_t, DequantizeLinear);
|
||||
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 12, uint8_t, QuantizeLinear);
|
||||
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 12, int8_t, QuantizeLinear);
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, QLinearMatMul);
|
||||
|
|
@ -446,6 +447,7 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, Ca
|
|||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, Clip);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, uint8_t, DequantizeLinear);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, int8_t, DequantizeLinear);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, int32_t, DequantizeLinear);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, float, Expand);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, double, Expand);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, int8_t, Expand);
|
||||
|
|
@ -1066,6 +1068,8 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) {
|
|||
DequantizeLinear)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 12, int8_t,
|
||||
DequantizeLinear)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 12, int32_t,
|
||||
DequantizeLinear)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 12, uint8_t,
|
||||
QuantizeLinear)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 12, int8_t,
|
||||
|
|
@ -1362,6 +1366,8 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) {
|
|||
DequantizeLinear)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, int8_t,
|
||||
DequantizeLinear)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, int32_t,
|
||||
DequantizeLinear)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, uint8_t,
|
||||
QuantizeLinear)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, int8_t,
|
||||
|
|
|
|||
|
|
@ -59,11 +59,13 @@ static void PrepareForQDQ(const TensorShape& input_shape,
|
|||
|
||||
REGISTER_DEQUANTIZELINEAR(int8_t)
|
||||
REGISTER_DEQUANTIZELINEAR(uint8_t)
|
||||
REGISTER_DEQUANTIZELINEAR(int32_t)
|
||||
REGISTER_DEQUANTIZELINEAR_VERSIONED(int8_t)
|
||||
REGISTER_DEQUANTIZELINEAR_VERSIONED(uint8_t)
|
||||
REGISTER_DEQUANTIZELINEAR_VERSIONED(int32_t)
|
||||
|
||||
template <typename T>
|
||||
// formula is Y = (X - ZeroPoint) * Scale
|
||||
template <typename T>
|
||||
Status DequantizeLinear<T>::Compute(OpKernelContext* ctx) const {
|
||||
auto& x = *ctx->Input<Tensor>(0);
|
||||
auto& x_scale = *ctx->Input<Tensor>(1);
|
||||
|
|
@ -78,11 +80,19 @@ Status DequantizeLinear<T>::Compute(OpKernelContext* ctx) const {
|
|||
|
||||
PrepareForQDQ(x.Shape(), x_scale, x_zero_point, axis_, N, broadcast_dim, block_size);
|
||||
|
||||
const T* zero_point = x_zero_point ? x_zero_point->template Data<T>() : nullptr;
|
||||
const float* scale = x_scale.template Data<float>();
|
||||
const T* input = x.template Data<T>();
|
||||
float* output = y.template MutableData<float>();
|
||||
|
||||
const T* zero_point = x_zero_point ? x_zero_point->template Data<T>() : nullptr;
|
||||
if (std::is_same<T, int32_t>::value) {
|
||||
ORT_ENFORCE(zero_point == nullptr ||
|
||||
std::all_of(zero_point,
|
||||
zero_point + x_zero_point->Shape().Size(),
|
||||
[](int32_t zp) { return zp == 0; }),
|
||||
"DequantizeLinear with type int32 should have no zero point or all zero points should be 0");
|
||||
}
|
||||
|
||||
for (size_t n = 0; n < static_cast<size_t>(N); n++) {
|
||||
for (size_t bd = 0; bd < static_cast<size_t>(broadcast_dim); bd++) {
|
||||
auto zp = zero_point ? static_cast<int32_t>(zero_point[bd]) : 0;
|
||||
|
|
@ -123,8 +133,8 @@ REGISTER_QUANTIZELINEAR(uint8_t)
|
|||
REGISTER_QUANTIZELINEAR_VERSIONED(int8_t)
|
||||
REGISTER_QUANTIZELINEAR_VERSIONED(uint8_t)
|
||||
|
||||
template <typename T>
|
||||
// formula is Y = X / Scale + ZeroPoint
|
||||
template <typename T>
|
||||
Status QuantizeLinear<T>::Compute(OpKernelContext* ctx) const {
|
||||
auto& x = *ctx->Input<Tensor>(0);
|
||||
auto& y_scale = *ctx->Input<Tensor>(1);
|
||||
|
|
|
|||
|
|
@ -28,6 +28,16 @@ TEST(DequantizeLinearOpTest, Int8) {
|
|||
test.Run();
|
||||
}
|
||||
|
||||
// scalar zero & scale with int8
|
||||
TEST(DequantizeLinearOpTest, Int32) {
|
||||
OpTester test("DequantizeLinear", 10);
|
||||
std::vector<int64_t> dims{4};
|
||||
test.AddInput<int32_t>("x", dims, {-30, -3, 100, 127});
|
||||
test.AddInput<float>("x_scale", {}, {2.0f});
|
||||
test.AddOutput<float>("y", dims, {-60.f, -6.f, 200.f, 254.f});
|
||||
test.Run();
|
||||
}
|
||||
|
||||
// 2d inputs
|
||||
TEST(DequantizeLinearOpTest, 2D) {
|
||||
OpTester test("DequantizeLinear", 10);
|
||||
|
|
@ -134,7 +144,7 @@ TEST(DequantizeLinearOpTest, Per_Channel_Axis_0) {
|
|||
}
|
||||
|
||||
// 1d zero & scale with int8 broadcast axis 1
|
||||
TEST(DequantizeLinearOpTest, Per_Channel_Axis_1) {
|
||||
TEST(DequantizeLinearOpTest, Per_Channel_Axis_1_int8) {
|
||||
OpTester test("DequantizeLinear", 13);
|
||||
std::vector<int64_t> dims{3, 4};
|
||||
test.AddInput<int8_t>("X", dims,
|
||||
|
|
@ -151,6 +161,24 @@ TEST(DequantizeLinearOpTest, Per_Channel_Axis_1) {
|
|||
test.Run();
|
||||
}
|
||||
|
||||
// 1d zero & scale with int32 broadcast axis 1
|
||||
TEST(DequantizeLinearOpTest, Per_Channel_Axis_1_int32) {
|
||||
OpTester test("DequantizeLinear", 13);
|
||||
std::vector<int64_t> dims{3, 4};
|
||||
test.AddInput<int32_t>("X", dims,
|
||||
{0, 1, 2, 3,
|
||||
0, 2, 4, 6,
|
||||
0, 10, 20, 30});
|
||||
test.AddAttribute<int64_t>("axis", 1);
|
||||
test.AddInput<float>("scale", {4}, {1, 2, 4, 8});
|
||||
test.AddInput<int32_t>("zero_point", {4}, {0, 0, 0, 0});
|
||||
test.AddOutput<float>("Y", dims,
|
||||
{0, 2, 8, 24,
|
||||
0, 4, 16, 48,
|
||||
0, 20, 80, 240});
|
||||
test.Run();
|
||||
}
|
||||
|
||||
// 1d zero & scale with uint8 broadcast axis -2 (-2 resolves to axis 0)
|
||||
TEST(DequantizeLinearOpTest, Per_Channel_Neg_2) {
|
||||
OpTester test("DequantizeLinear", 13);
|
||||
|
|
|
|||
Loading…
Reference in a new issue