mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-31 23:27:43 +00:00
Implement QLinearLeakyRelu (#3648)
* Implement QLinearRelu and its unit test. * Add logic to compute table during constructor when all parameters is constant. * Fix test case rounding result related with rounding mode.
This commit is contained in:
parent
5e0928a777
commit
3c4f3d01cd
5 changed files with 236 additions and 0 deletions
|
|
@ -32,6 +32,8 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1,
|
|||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int8_t, DequantizeLinear);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, uint8_t, QuantizeLinear);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int8_t, QuantizeLinear);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, uint8_t, QLinearLeakyRelu);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int8_t, QLinearLeakyRelu);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, CDist);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double, CDist);
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, Gelu);
|
||||
|
|
@ -109,6 +111,8 @@ Status RegisterCpuContribKernels(KernelRegistry& kernel_registry) {
|
|||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int8_t, DequantizeLinear)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, uint8_t, QuantizeLinear)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int8_t, QuantizeLinear)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, uint8_t, QLinearLeakyRelu)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int8_t, QLinearLeakyRelu)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, CDist)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double, CDist)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, BiasGelu)>,
|
||||
|
|
|
|||
121
onnxruntime/contrib_ops/cpu/qlinear_lookup_table.cc
Normal file
121
onnxruntime/contrib_ops/cpu/qlinear_lookup_table.cc
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "qlinear_lookup_table.h"
|
||||
#include "core/providers/common.h"
|
||||
#include "core/mlas/inc/mlas.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace contrib {
|
||||
|
||||
static void QLinearLookupTableTransform(const uint8_t* x, const uint8_t table[256], uint8_t* y, size_t n) {
|
||||
for (; n >= 4; n -= 4) {
|
||||
const size_t x_value0 = x[0];
|
||||
const size_t x_value1 = x[1];
|
||||
const size_t x_value2 = x[2];
|
||||
const size_t x_value3 = x[3];
|
||||
x += 4;
|
||||
const uint8_t table_value0 = table[x_value0];
|
||||
const uint8_t table_value1 = table[x_value1];
|
||||
const uint8_t table_value2 = table[x_value2];
|
||||
const uint8_t table_value3 = table[x_value3];
|
||||
|
||||
y[0] = table_value0;
|
||||
y[1] = table_value1;
|
||||
y[2] = table_value2;
|
||||
y[3] = table_value3;
|
||||
y += 4;
|
||||
}
|
||||
for (; n != 0; --n) {
|
||||
const size_t x_value0 = *x++;
|
||||
const uint8_t table_value0 = table[x_value0];
|
||||
*y++ = table_value0;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void BuildQLinearLeakyReluLookupTable(uint8_t table[256],
|
||||
const Tensor* tensor_x_scale,
|
||||
const Tensor* tensor_x_zero_point,
|
||||
const Tensor* tensor_y_scale,
|
||||
const Tensor* tensor_y_zero_point,
|
||||
float alpha) {
|
||||
ORT_ENFORCE(IsScalarOr1ElementVector(tensor_x_scale),
|
||||
"QLinearLeakyRelu : input X_scale must be a scalar or 1D tensor of size 1");
|
||||
ORT_ENFORCE(tensor_x_zero_point == nullptr || IsScalarOr1ElementVector(tensor_x_zero_point),
|
||||
"QLinearLeakyRelu : input X_zero_point must be a scalar or 1D tensor of size 1");
|
||||
ORT_ENFORCE(IsScalarOr1ElementVector(tensor_y_scale),
|
||||
"QLinearLeakyRelu : input Y_scale must be a scalar or 1D tensor of size 1");
|
||||
ORT_ENFORCE(tensor_y_zero_point == nullptr || IsScalarOr1ElementVector(tensor_y_zero_point),
|
||||
"QLinearLeakyRelu : input Y_zero_point must be a scalar or 1D tensor of size 1");
|
||||
|
||||
const float X_scale = *(tensor_x_scale->Data<float>());
|
||||
const T X_zero_point = (tensor_x_zero_point == nullptr) ? static_cast<T>(0) : *(tensor_x_zero_point->template Data<T>());
|
||||
const float Y_scale = *(tensor_y_scale->Data<float>());
|
||||
const T Y_zero_point = (tensor_y_zero_point == nullptr) ? static_cast<T>(0) : *(tensor_y_zero_point->template Data<T>());
|
||||
|
||||
float dequantized_vector[256];
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
T x = static_cast<T>(i);
|
||||
float x_dequantized = X_scale * (static_cast<int>(x) - static_cast<int>(X_zero_point));
|
||||
dequantized_vector[i] = x_dequantized >= 0.0f ? x_dequantized : alpha * x_dequantized;
|
||||
}
|
||||
MlasQuantizeLinear(dequantized_vector, (T*)table, 256, Y_scale, Y_zero_point);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
QLinearLeakyRelu<T>::QLinearLeakyRelu(const OpKernelInfo& info)
|
||||
: OpKernel(info), alpha_(info.GetAttrOrDefault("alpha", 0.01f)) {
|
||||
const Tensor* tensor_x_scale = nullptr;
|
||||
const Tensor* tensor_x_zero_point = nullptr;
|
||||
const Tensor* tensor_y_scale = nullptr;
|
||||
const Tensor* tensor_y_zero_point = nullptr;
|
||||
|
||||
bool get_x_scale = info.TryGetConstantInput(1, &tensor_x_scale);
|
||||
bool get_x_zero_point = !info.node().InputDefs()[2]->Exists() || info.TryGetConstantInput(2, &tensor_x_zero_point);
|
||||
bool get_y_scale = info.TryGetConstantInput(3, &tensor_y_scale);
|
||||
bool get_y_zero_point = !info.node().InputDefs()[4]->Exists() || info.TryGetConstantInput(4, &tensor_y_zero_point);
|
||||
is_fixed_parameters_ = get_x_scale && get_x_zero_point && get_y_scale && get_y_zero_point;
|
||||
|
||||
if (is_fixed_parameters_) {
|
||||
BuildQLinearLeakyReluLookupTable<T>(
|
||||
fixed_lookup_table_, tensor_x_scale, tensor_x_zero_point,
|
||||
tensor_y_scale, tensor_y_zero_point, alpha_);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Status QLinearLeakyRelu<T>::Compute(OpKernelContext* context) const {
|
||||
const auto& X = *context->Input<Tensor>(0);
|
||||
const auto& input_shape = X.Shape();
|
||||
const auto N = input_shape.Size();
|
||||
auto& Y = *context->Output(0, input_shape);
|
||||
|
||||
uint8_t table[256];
|
||||
if (!is_fixed_parameters_) {
|
||||
BuildQLinearLeakyReluLookupTable<T>(
|
||||
table, context->Input<Tensor>(1), context->Input<Tensor>(2),
|
||||
context->Input<Tensor>(3), context->Input<Tensor>(4), alpha_);
|
||||
}
|
||||
|
||||
QLinearLookupTableTransform(
|
||||
reinterpret_cast<const uint8_t*>(X.template Data<T>()),
|
||||
is_fixed_parameters_ ? fixed_lookup_table_ : table,
|
||||
reinterpret_cast<uint8_t*>(Y.template MutableData<T>()),
|
||||
static_cast<size_t>(N));
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
#define REGISTER_QLINEAR_LOOKUPTABLE_TYPED_KERNEL(op_name, version, data_type, KERNEL_CLASS) \
|
||||
ONNX_CPU_OPERATOR_TYPED_MS_KERNEL( \
|
||||
op_name, version, data_type, \
|
||||
KernelDefBuilder() \
|
||||
.TypeConstraint("T", DataTypeImpl::GetTensorType<data_type>()), \
|
||||
KERNEL_CLASS<data_type>);
|
||||
|
||||
REGISTER_QLINEAR_LOOKUPTABLE_TYPED_KERNEL(QLinearLeakyRelu, 1, int8_t, QLinearLeakyRelu);
|
||||
REGISTER_QLINEAR_LOOKUPTABLE_TYPED_KERNEL(QLinearLeakyRelu, 1, uint8_t, QLinearLeakyRelu);
|
||||
|
||||
} // namespace contrib
|
||||
} // namespace onnxruntime
|
||||
26
onnxruntime/contrib_ops/cpu/qlinear_lookup_table.h
Normal file
26
onnxruntime/contrib_ops/cpu/qlinear_lookup_table.h
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "core/common/common.h"
|
||||
#include "core/framework/op_kernel.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace contrib {
|
||||
|
||||
template <typename T>
|
||||
class QLinearLeakyRelu final : public OpKernel {
|
||||
public:
|
||||
QLinearLeakyRelu(const OpKernelInfo& info);
|
||||
|
||||
Status Compute(OpKernelContext* context) const override;
|
||||
|
||||
private:
|
||||
const float alpha_;
|
||||
bool is_fixed_parameters_; // Fixed Scale and Zero Point for both x and y
|
||||
uint8_t fixed_lookup_table_[256]; // when is const paramter, table value is here.
|
||||
};
|
||||
|
||||
} // namespace contrib
|
||||
} // namespace onnxruntime
|
||||
|
|
@ -2078,6 +2078,37 @@ Output = Dequantize(Input) -> AveragePool on fp32 data -> Quantize(output)
|
|||
ONNX_NAMESPACE::convPoolShapeInference(ctx, false, true, 0, 5);
|
||||
});
|
||||
|
||||
const char* QLinearLeakyReluDoc_ver1 = R"DOC(
|
||||
QLinearLeakyRelu takes quantized input data (Tensor), an argument alpha, and quantize parameter for output,
|
||||
and produces one output data (Tensor<T>) where the function `f(x) = quantize(alpha * dequantize(x)) for dequantize(x) < 0`,
|
||||
`f(x) = quantize(dequantize(x)) for dequantize(x) >= 0`, is applied to the data tensor elementwise.
|
||||
)DOC";
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SCHEMA(QLinearLeakyRelu)
|
||||
.SetDomain(kMSDomain)
|
||||
.SinceVersion(1)
|
||||
.SetDoc(QLinearLeakyReluDoc_ver1)
|
||||
.Attr("alpha", "Coefficient of leakage.", AttributeProto::FLOAT, 0.01f)
|
||||
.Input(0, "X", "Input tensor", "T")
|
||||
.Input(1, "X_scale",
|
||||
"Input X's scale. It's a scalar, which means a per-tensor/layer quantization.",
|
||||
"tensor(float)")
|
||||
.Input(2, "X_zero_point",
|
||||
"Input X's zero point. Default value is 0 if it's not specified. It's a scalar, which means a per-tensor/layer quantization.",
|
||||
"T", OpSchema::Optional)
|
||||
.Input(3, "Y_scale",
|
||||
"Output Y's scale. It's a scalar, which means a per-tensor/layer quantization.",
|
||||
"tensor(float)")
|
||||
.Input(4, "Y_zero_point",
|
||||
"Output Y's zero point. Default value is 0 if it's not specified. It's a scalar, which means a per-tensor/layer quantization.",
|
||||
"T", OpSchema::Optional)
|
||||
.Output(0, "Y", "Output tensor", "T")
|
||||
.TypeConstraint(
|
||||
"T",
|
||||
{"tensor(uint8)", "tensor(int8)"},
|
||||
"Constrain input and output types to 8 bit tensors.")
|
||||
.TypeAndShapeInferenceFunction(ONNX_NAMESPACE::propagateShapeAndTypeFromFirstInput);
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SCHEMA(MurmurHash3)
|
||||
.SetDomain(kMSDomain)
|
||||
.SinceVersion(1)
|
||||
|
|
|
|||
54
onnxruntime/test/contrib_ops/qlinear_lookup_table_test.cc
Normal file
54
onnxruntime/test/contrib_ops/qlinear_lookup_table_test.cc
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
#include "gtest/gtest.h"
|
||||
#include "test/common/tensor_op_test_utils.h"
|
||||
#include "test/providers/provider_test_utils.h"
|
||||
|
||||
#include <cfenv>
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace test {
|
||||
|
||||
TEST(QLinearLookupTableBasedOperatorTests, QLinearLeakyRelu_Int8) {
|
||||
OpTester test("QLinearLeakyRelu", 1, onnxruntime::kMSDomain);
|
||||
test.AddAttribute<float>("alpha", 0.1f);
|
||||
float X_scale = 0.25f;
|
||||
//int8_t X_zero_point = 0;
|
||||
float Y_scale = 0.1f;
|
||||
int8_t Y_zero_point = -100;
|
||||
|
||||
std::vector<int64_t> dims = {16};
|
||||
test.AddInput<int8_t>("X", dims, {0, 16, 17, 18, 19, 90, 91, 127, -128, -110, -108, -100, -16, -17, -18, -1});
|
||||
test.AddInput<float>("X_scale", {}, {X_scale});
|
||||
test.AddMissingOptionalInput<int8_t>(); // optional "X_zero_point" using default value here
|
||||
test.AddInput<float>("Y_scale", {}, {Y_scale});
|
||||
test.AddInput<int8_t>("Y_zero_point", {}, {Y_zero_point});
|
||||
test.AddOutput<int8_t>("Y", dims, {-100, -60, -58, -55, -52, 125, 127, 127, -128, -128, -127, -125, -104, -104, -104, -100});
|
||||
auto origin_round_mode = std::fegetround();
|
||||
std::fesetround(FE_TONEAREST);
|
||||
test.Run();
|
||||
std::fesetround(origin_round_mode);
|
||||
}
|
||||
|
||||
|
||||
TEST(QLinearLookupTableBasedOperatorTests, QLinearLeakyRelu_UInt8) {
|
||||
OpTester test("QLinearLeakyRelu", 1, onnxruntime::kMSDomain);
|
||||
test.AddAttribute<float>("alpha", 0.1f);
|
||||
float X_scale = 0.25f;
|
||||
uint8_t X_zero_point = 128;
|
||||
float Y_scale = 0.1f;
|
||||
uint8_t Y_zero_point = 30;
|
||||
|
||||
std::vector<int64_t> dims = {16};
|
||||
test.AddInput<uint8_t>("X", dims, {0, 16, 17, 18, 19, 90, 91, 127, 128, 136, 137, 138, 216, 217, 218, 255});
|
||||
test.AddInput<float>("X_scale", {}, {X_scale});
|
||||
test.AddInput<uint8_t>("X_zero_point", {}, {X_zero_point});
|
||||
test.AddInput<float>("Y_scale", {}, {Y_scale});
|
||||
test.AddInput<uint8_t>("Y_zero_point", {}, {Y_zero_point});
|
||||
test.AddOutput<uint8_t>("Y", dims, {0, 2, 2, 2, 3, 20, 21, 30, 30, 50, 52, 55, 250, 252, 255, 255});
|
||||
auto origin_round_mode = std::fegetround();
|
||||
std::fesetround(FE_TONEAREST);
|
||||
test.Run();
|
||||
std::fesetround(origin_round_mode);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
Loading…
Reference in a new issue