Implement QLinearLeakyRelu (#3648)

* Implement QLinearRelu and its unit test.
* Add logic to compute table during constructor when all parameters is constant.
* Fix test case rounding result related with rounding mode.
This commit is contained in:
Zhang Lei 2020-05-14 14:52:55 -07:00 committed by GitHub
parent 5e0928a777
commit 3c4f3d01cd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 236 additions and 0 deletions

View file

@ -32,6 +32,8 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1,
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int8_t, DequantizeLinear);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, uint8_t, QuantizeLinear);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int8_t, QuantizeLinear);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, uint8_t, QLinearLeakyRelu);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int8_t, QLinearLeakyRelu);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, CDist);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double, CDist);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, Gelu);
@ -109,6 +111,8 @@ Status RegisterCpuContribKernels(KernelRegistry& kernel_registry) {
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int8_t, DequantizeLinear)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, uint8_t, QuantizeLinear)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int8_t, QuantizeLinear)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, uint8_t, QLinearLeakyRelu)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int8_t, QLinearLeakyRelu)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, CDist)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double, CDist)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, BiasGelu)>,

View file

@ -0,0 +1,121 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "qlinear_lookup_table.h"
#include "core/providers/common.h"
#include "core/mlas/inc/mlas.h"
namespace onnxruntime {
namespace contrib {
static void QLinearLookupTableTransform(const uint8_t* x, const uint8_t table[256], uint8_t* y, size_t n) {
for (; n >= 4; n -= 4) {
const size_t x_value0 = x[0];
const size_t x_value1 = x[1];
const size_t x_value2 = x[2];
const size_t x_value3 = x[3];
x += 4;
const uint8_t table_value0 = table[x_value0];
const uint8_t table_value1 = table[x_value1];
const uint8_t table_value2 = table[x_value2];
const uint8_t table_value3 = table[x_value3];
y[0] = table_value0;
y[1] = table_value1;
y[2] = table_value2;
y[3] = table_value3;
y += 4;
}
for (; n != 0; --n) {
const size_t x_value0 = *x++;
const uint8_t table_value0 = table[x_value0];
*y++ = table_value0;
}
}
template <typename T>
static void BuildQLinearLeakyReluLookupTable(uint8_t table[256],
const Tensor* tensor_x_scale,
const Tensor* tensor_x_zero_point,
const Tensor* tensor_y_scale,
const Tensor* tensor_y_zero_point,
float alpha) {
ORT_ENFORCE(IsScalarOr1ElementVector(tensor_x_scale),
"QLinearLeakyRelu : input X_scale must be a scalar or 1D tensor of size 1");
ORT_ENFORCE(tensor_x_zero_point == nullptr || IsScalarOr1ElementVector(tensor_x_zero_point),
"QLinearLeakyRelu : input X_zero_point must be a scalar or 1D tensor of size 1");
ORT_ENFORCE(IsScalarOr1ElementVector(tensor_y_scale),
"QLinearLeakyRelu : input Y_scale must be a scalar or 1D tensor of size 1");
ORT_ENFORCE(tensor_y_zero_point == nullptr || IsScalarOr1ElementVector(tensor_y_zero_point),
"QLinearLeakyRelu : input Y_zero_point must be a scalar or 1D tensor of size 1");
const float X_scale = *(tensor_x_scale->Data<float>());
const T X_zero_point = (tensor_x_zero_point == nullptr) ? static_cast<T>(0) : *(tensor_x_zero_point->template Data<T>());
const float Y_scale = *(tensor_y_scale->Data<float>());
const T Y_zero_point = (tensor_y_zero_point == nullptr) ? static_cast<T>(0) : *(tensor_y_zero_point->template Data<T>());
float dequantized_vector[256];
for (int i = 0; i < 256; ++i) {
T x = static_cast<T>(i);
float x_dequantized = X_scale * (static_cast<int>(x) - static_cast<int>(X_zero_point));
dequantized_vector[i] = x_dequantized >= 0.0f ? x_dequantized : alpha * x_dequantized;
}
MlasQuantizeLinear(dequantized_vector, (T*)table, 256, Y_scale, Y_zero_point);
}
template <typename T>
QLinearLeakyRelu<T>::QLinearLeakyRelu(const OpKernelInfo& info)
: OpKernel(info), alpha_(info.GetAttrOrDefault("alpha", 0.01f)) {
const Tensor* tensor_x_scale = nullptr;
const Tensor* tensor_x_zero_point = nullptr;
const Tensor* tensor_y_scale = nullptr;
const Tensor* tensor_y_zero_point = nullptr;
bool get_x_scale = info.TryGetConstantInput(1, &tensor_x_scale);
bool get_x_zero_point = !info.node().InputDefs()[2]->Exists() || info.TryGetConstantInput(2, &tensor_x_zero_point);
bool get_y_scale = info.TryGetConstantInput(3, &tensor_y_scale);
bool get_y_zero_point = !info.node().InputDefs()[4]->Exists() || info.TryGetConstantInput(4, &tensor_y_zero_point);
is_fixed_parameters_ = get_x_scale && get_x_zero_point && get_y_scale && get_y_zero_point;
if (is_fixed_parameters_) {
BuildQLinearLeakyReluLookupTable<T>(
fixed_lookup_table_, tensor_x_scale, tensor_x_zero_point,
tensor_y_scale, tensor_y_zero_point, alpha_);
}
}
template <typename T>
Status QLinearLeakyRelu<T>::Compute(OpKernelContext* context) const {
const auto& X = *context->Input<Tensor>(0);
const auto& input_shape = X.Shape();
const auto N = input_shape.Size();
auto& Y = *context->Output(0, input_shape);
uint8_t table[256];
if (!is_fixed_parameters_) {
BuildQLinearLeakyReluLookupTable<T>(
table, context->Input<Tensor>(1), context->Input<Tensor>(2),
context->Input<Tensor>(3), context->Input<Tensor>(4), alpha_);
}
QLinearLookupTableTransform(
reinterpret_cast<const uint8_t*>(X.template Data<T>()),
is_fixed_parameters_ ? fixed_lookup_table_ : table,
reinterpret_cast<uint8_t*>(Y.template MutableData<T>()),
static_cast<size_t>(N));
return Status::OK();
}
#define REGISTER_QLINEAR_LOOKUPTABLE_TYPED_KERNEL(op_name, version, data_type, KERNEL_CLASS) \
ONNX_CPU_OPERATOR_TYPED_MS_KERNEL( \
op_name, version, data_type, \
KernelDefBuilder() \
.TypeConstraint("T", DataTypeImpl::GetTensorType<data_type>()), \
KERNEL_CLASS<data_type>);
REGISTER_QLINEAR_LOOKUPTABLE_TYPED_KERNEL(QLinearLeakyRelu, 1, int8_t, QLinearLeakyRelu);
REGISTER_QLINEAR_LOOKUPTABLE_TYPED_KERNEL(QLinearLeakyRelu, 1, uint8_t, QLinearLeakyRelu);
} // namespace contrib
} // namespace onnxruntime

View file

@ -0,0 +1,26 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#pragma once
#include "core/common/common.h"
#include "core/framework/op_kernel.h"
namespace onnxruntime {
namespace contrib {
template <typename T>
class QLinearLeakyRelu final : public OpKernel {
public:
QLinearLeakyRelu(const OpKernelInfo& info);
Status Compute(OpKernelContext* context) const override;
private:
const float alpha_;
bool is_fixed_parameters_; // Fixed Scale and Zero Point for both x and y
uint8_t fixed_lookup_table_[256]; // when is const paramter, table value is here.
};
} // namespace contrib
} // namespace onnxruntime

View file

@ -2078,6 +2078,37 @@ Output = Dequantize(Input) -> AveragePool on fp32 data -> Quantize(output)
ONNX_NAMESPACE::convPoolShapeInference(ctx, false, true, 0, 5);
});
const char* QLinearLeakyReluDoc_ver1 = R"DOC(
QLinearLeakyRelu takes quantized input data (Tensor), an argument alpha, and quantize parameter for output,
and produces one output data (Tensor<T>) where the function `f(x) = quantize(alpha * dequantize(x)) for dequantize(x) < 0`,
`f(x) = quantize(dequantize(x)) for dequantize(x) >= 0`, is applied to the data tensor elementwise.
)DOC";
ONNX_CONTRIB_OPERATOR_SCHEMA(QLinearLeakyRelu)
.SetDomain(kMSDomain)
.SinceVersion(1)
.SetDoc(QLinearLeakyReluDoc_ver1)
.Attr("alpha", "Coefficient of leakage.", AttributeProto::FLOAT, 0.01f)
.Input(0, "X", "Input tensor", "T")
.Input(1, "X_scale",
"Input X's scale. It's a scalar, which means a per-tensor/layer quantization.",
"tensor(float)")
.Input(2, "X_zero_point",
"Input X's zero point. Default value is 0 if it's not specified. It's a scalar, which means a per-tensor/layer quantization.",
"T", OpSchema::Optional)
.Input(3, "Y_scale",
"Output Y's scale. It's a scalar, which means a per-tensor/layer quantization.",
"tensor(float)")
.Input(4, "Y_zero_point",
"Output Y's zero point. Default value is 0 if it's not specified. It's a scalar, which means a per-tensor/layer quantization.",
"T", OpSchema::Optional)
.Output(0, "Y", "Output tensor", "T")
.TypeConstraint(
"T",
{"tensor(uint8)", "tensor(int8)"},
"Constrain input and output types to 8 bit tensors.")
.TypeAndShapeInferenceFunction(ONNX_NAMESPACE::propagateShapeAndTypeFromFirstInput);
ONNX_CONTRIB_OPERATOR_SCHEMA(MurmurHash3)
.SetDomain(kMSDomain)
.SinceVersion(1)

View file

@ -0,0 +1,54 @@
#include "gtest/gtest.h"
#include "test/common/tensor_op_test_utils.h"
#include "test/providers/provider_test_utils.h"
#include <cfenv>
namespace onnxruntime {
namespace test {
TEST(QLinearLookupTableBasedOperatorTests, QLinearLeakyRelu_Int8) {
OpTester test("QLinearLeakyRelu", 1, onnxruntime::kMSDomain);
test.AddAttribute<float>("alpha", 0.1f);
float X_scale = 0.25f;
//int8_t X_zero_point = 0;
float Y_scale = 0.1f;
int8_t Y_zero_point = -100;
std::vector<int64_t> dims = {16};
test.AddInput<int8_t>("X", dims, {0, 16, 17, 18, 19, 90, 91, 127, -128, -110, -108, -100, -16, -17, -18, -1});
test.AddInput<float>("X_scale", {}, {X_scale});
test.AddMissingOptionalInput<int8_t>(); // optional "X_zero_point" using default value here
test.AddInput<float>("Y_scale", {}, {Y_scale});
test.AddInput<int8_t>("Y_zero_point", {}, {Y_zero_point});
test.AddOutput<int8_t>("Y", dims, {-100, -60, -58, -55, -52, 125, 127, 127, -128, -128, -127, -125, -104, -104, -104, -100});
auto origin_round_mode = std::fegetround();
std::fesetround(FE_TONEAREST);
test.Run();
std::fesetround(origin_round_mode);
}
TEST(QLinearLookupTableBasedOperatorTests, QLinearLeakyRelu_UInt8) {
OpTester test("QLinearLeakyRelu", 1, onnxruntime::kMSDomain);
test.AddAttribute<float>("alpha", 0.1f);
float X_scale = 0.25f;
uint8_t X_zero_point = 128;
float Y_scale = 0.1f;
uint8_t Y_zero_point = 30;
std::vector<int64_t> dims = {16};
test.AddInput<uint8_t>("X", dims, {0, 16, 17, 18, 19, 90, 91, 127, 128, 136, 137, 138, 216, 217, 218, 255});
test.AddInput<float>("X_scale", {}, {X_scale});
test.AddInput<uint8_t>("X_zero_point", {}, {X_zero_point});
test.AddInput<float>("Y_scale", {}, {Y_scale});
test.AddInput<uint8_t>("Y_zero_point", {}, {Y_zero_point});
test.AddOutput<uint8_t>("Y", dims, {0, 2, 2, 2, 3, 20, 21, 30, 30, 50, 52, 55, 250, 252, 255, 255});
auto origin_round_mode = std::fegetround();
std::fesetround(FE_TONEAREST);
test.Run();
std::fesetround(origin_round_mode);
}
}
}