From 3c4f3d01cd2a79a3fb7f93edcb6d4b58fe4993ac Mon Sep 17 00:00:00 2001 From: Zhang Lei Date: Thu, 14 May 2020 14:52:55 -0700 Subject: [PATCH] Implement QLinearLeakyRelu (#3648) * Implement QLinearRelu and its unit test. * Add logic to compute table during constructor when all parameters is constant. * Fix test case rounding result related with rounding mode. --- .../contrib_ops/cpu/cpu_contrib_kernels.cc | 4 + .../contrib_ops/cpu/qlinear_lookup_table.cc | 121 ++++++++++++++++++ .../contrib_ops/cpu/qlinear_lookup_table.h | 26 ++++ .../core/graph/contrib_ops/contrib_defs.cc | 31 +++++ .../contrib_ops/qlinear_lookup_table_test.cc | 54 ++++++++ 5 files changed, 236 insertions(+) create mode 100644 onnxruntime/contrib_ops/cpu/qlinear_lookup_table.cc create mode 100644 onnxruntime/contrib_ops/cpu/qlinear_lookup_table.h create mode 100644 onnxruntime/test/contrib_ops/qlinear_lookup_table_test.cc diff --git a/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc b/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc index 444904aa2c..a81a675fed 100644 --- a/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc +++ b/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc @@ -32,6 +32,8 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int8_t, DequantizeLinear); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, uint8_t, QuantizeLinear); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int8_t, QuantizeLinear); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, uint8_t, QLinearLeakyRelu); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int8_t, QLinearLeakyRelu); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, CDist); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double, CDist); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, Gelu); @@ -109,6 +111,8 @@ Status RegisterCpuContribKernels(KernelRegistry& kernel_registry) { BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, diff --git a/onnxruntime/contrib_ops/cpu/qlinear_lookup_table.cc b/onnxruntime/contrib_ops/cpu/qlinear_lookup_table.cc new file mode 100644 index 0000000000..ee52faae74 --- /dev/null +++ b/onnxruntime/contrib_ops/cpu/qlinear_lookup_table.cc @@ -0,0 +1,121 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "qlinear_lookup_table.h" +#include "core/providers/common.h" +#include "core/mlas/inc/mlas.h" + +namespace onnxruntime { +namespace contrib { + +static void QLinearLookupTableTransform(const uint8_t* x, const uint8_t table[256], uint8_t* y, size_t n) { + for (; n >= 4; n -= 4) { + const size_t x_value0 = x[0]; + const size_t x_value1 = x[1]; + const size_t x_value2 = x[2]; + const size_t x_value3 = x[3]; + x += 4; + const uint8_t table_value0 = table[x_value0]; + const uint8_t table_value1 = table[x_value1]; + const uint8_t table_value2 = table[x_value2]; + const uint8_t table_value3 = table[x_value3]; + + y[0] = table_value0; + y[1] = table_value1; + y[2] = table_value2; + y[3] = table_value3; + y += 4; + } + for (; n != 0; --n) { + const size_t x_value0 = *x++; + const uint8_t table_value0 = table[x_value0]; + *y++ = table_value0; + } +} + +template +static void BuildQLinearLeakyReluLookupTable(uint8_t table[256], + const Tensor* tensor_x_scale, + const Tensor* tensor_x_zero_point, + const Tensor* tensor_y_scale, + const Tensor* tensor_y_zero_point, + float alpha) { + ORT_ENFORCE(IsScalarOr1ElementVector(tensor_x_scale), + "QLinearLeakyRelu : input X_scale must be a scalar or 1D tensor of size 1"); + ORT_ENFORCE(tensor_x_zero_point == nullptr || IsScalarOr1ElementVector(tensor_x_zero_point), + "QLinearLeakyRelu : input X_zero_point must be a scalar or 1D tensor of size 1"); + ORT_ENFORCE(IsScalarOr1ElementVector(tensor_y_scale), + "QLinearLeakyRelu : input Y_scale must be a scalar or 1D tensor of size 1"); + ORT_ENFORCE(tensor_y_zero_point == nullptr || IsScalarOr1ElementVector(tensor_y_zero_point), + "QLinearLeakyRelu : input Y_zero_point must be a scalar or 1D tensor of size 1"); + + const float X_scale = *(tensor_x_scale->Data()); + const T X_zero_point = (tensor_x_zero_point == nullptr) ? static_cast(0) : *(tensor_x_zero_point->template Data()); + const float Y_scale = *(tensor_y_scale->Data()); + const T Y_zero_point = (tensor_y_zero_point == nullptr) ? static_cast(0) : *(tensor_y_zero_point->template Data()); + + float dequantized_vector[256]; + for (int i = 0; i < 256; ++i) { + T x = static_cast(i); + float x_dequantized = X_scale * (static_cast(x) - static_cast(X_zero_point)); + dequantized_vector[i] = x_dequantized >= 0.0f ? x_dequantized : alpha * x_dequantized; + } + MlasQuantizeLinear(dequantized_vector, (T*)table, 256, Y_scale, Y_zero_point); +} + +template +QLinearLeakyRelu::QLinearLeakyRelu(const OpKernelInfo& info) + : OpKernel(info), alpha_(info.GetAttrOrDefault("alpha", 0.01f)) { + const Tensor* tensor_x_scale = nullptr; + const Tensor* tensor_x_zero_point = nullptr; + const Tensor* tensor_y_scale = nullptr; + const Tensor* tensor_y_zero_point = nullptr; + + bool get_x_scale = info.TryGetConstantInput(1, &tensor_x_scale); + bool get_x_zero_point = !info.node().InputDefs()[2]->Exists() || info.TryGetConstantInput(2, &tensor_x_zero_point); + bool get_y_scale = info.TryGetConstantInput(3, &tensor_y_scale); + bool get_y_zero_point = !info.node().InputDefs()[4]->Exists() || info.TryGetConstantInput(4, &tensor_y_zero_point); + is_fixed_parameters_ = get_x_scale && get_x_zero_point && get_y_scale && get_y_zero_point; + + if (is_fixed_parameters_) { + BuildQLinearLeakyReluLookupTable( + fixed_lookup_table_, tensor_x_scale, tensor_x_zero_point, + tensor_y_scale, tensor_y_zero_point, alpha_); + } +} + +template +Status QLinearLeakyRelu::Compute(OpKernelContext* context) const { + const auto& X = *context->Input(0); + const auto& input_shape = X.Shape(); + const auto N = input_shape.Size(); + auto& Y = *context->Output(0, input_shape); + + uint8_t table[256]; + if (!is_fixed_parameters_) { + BuildQLinearLeakyReluLookupTable( + table, context->Input(1), context->Input(2), + context->Input(3), context->Input(4), alpha_); + } + + QLinearLookupTableTransform( + reinterpret_cast(X.template Data()), + is_fixed_parameters_ ? fixed_lookup_table_ : table, + reinterpret_cast(Y.template MutableData()), + static_cast(N)); + + return Status::OK(); +} + +#define REGISTER_QLINEAR_LOOKUPTABLE_TYPED_KERNEL(op_name, version, data_type, KERNEL_CLASS) \ + ONNX_CPU_OPERATOR_TYPED_MS_KERNEL( \ + op_name, version, data_type, \ + KernelDefBuilder() \ + .TypeConstraint("T", DataTypeImpl::GetTensorType()), \ + KERNEL_CLASS); + +REGISTER_QLINEAR_LOOKUPTABLE_TYPED_KERNEL(QLinearLeakyRelu, 1, int8_t, QLinearLeakyRelu); +REGISTER_QLINEAR_LOOKUPTABLE_TYPED_KERNEL(QLinearLeakyRelu, 1, uint8_t, QLinearLeakyRelu); + +} // namespace contrib +} // namespace onnxruntime diff --git a/onnxruntime/contrib_ops/cpu/qlinear_lookup_table.h b/onnxruntime/contrib_ops/cpu/qlinear_lookup_table.h new file mode 100644 index 0000000000..214f56f6f4 --- /dev/null +++ b/onnxruntime/contrib_ops/cpu/qlinear_lookup_table.h @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/common/common.h" +#include "core/framework/op_kernel.h" + +namespace onnxruntime { +namespace contrib { + +template +class QLinearLeakyRelu final : public OpKernel { + public: + QLinearLeakyRelu(const OpKernelInfo& info); + + Status Compute(OpKernelContext* context) const override; + + private: + const float alpha_; + bool is_fixed_parameters_; // Fixed Scale and Zero Point for both x and y + uint8_t fixed_lookup_table_[256]; // when is const paramter, table value is here. +}; + +} // namespace contrib +} // namespace onnxruntime diff --git a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc index 047dec091e..ebe34fbd29 100644 --- a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc +++ b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc @@ -2078,6 +2078,37 @@ Output = Dequantize(Input) -> AveragePool on fp32 data -> Quantize(output) ONNX_NAMESPACE::convPoolShapeInference(ctx, false, true, 0, 5); }); + const char* QLinearLeakyReluDoc_ver1 = R"DOC( +QLinearLeakyRelu takes quantized input data (Tensor), an argument alpha, and quantize parameter for output, +and produces one output data (Tensor) where the function `f(x) = quantize(alpha * dequantize(x)) for dequantize(x) < 0`, +`f(x) = quantize(dequantize(x)) for dequantize(x) >= 0`, is applied to the data tensor elementwise. +)DOC"; + + ONNX_CONTRIB_OPERATOR_SCHEMA(QLinearLeakyRelu) + .SetDomain(kMSDomain) + .SinceVersion(1) + .SetDoc(QLinearLeakyReluDoc_ver1) + .Attr("alpha", "Coefficient of leakage.", AttributeProto::FLOAT, 0.01f) + .Input(0, "X", "Input tensor", "T") + .Input(1, "X_scale", + "Input X's scale. It's a scalar, which means a per-tensor/layer quantization.", + "tensor(float)") + .Input(2, "X_zero_point", + "Input X's zero point. Default value is 0 if it's not specified. It's a scalar, which means a per-tensor/layer quantization.", + "T", OpSchema::Optional) + .Input(3, "Y_scale", + "Output Y's scale. It's a scalar, which means a per-tensor/layer quantization.", + "tensor(float)") + .Input(4, "Y_zero_point", + "Output Y's zero point. Default value is 0 if it's not specified. It's a scalar, which means a per-tensor/layer quantization.", + "T", OpSchema::Optional) + .Output(0, "Y", "Output tensor", "T") + .TypeConstraint( + "T", + {"tensor(uint8)", "tensor(int8)"}, + "Constrain input and output types to 8 bit tensors.") + .TypeAndShapeInferenceFunction(ONNX_NAMESPACE::propagateShapeAndTypeFromFirstInput); + ONNX_CONTRIB_OPERATOR_SCHEMA(MurmurHash3) .SetDomain(kMSDomain) .SinceVersion(1) diff --git a/onnxruntime/test/contrib_ops/qlinear_lookup_table_test.cc b/onnxruntime/test/contrib_ops/qlinear_lookup_table_test.cc new file mode 100644 index 0000000000..2c7a642bcd --- /dev/null +++ b/onnxruntime/test/contrib_ops/qlinear_lookup_table_test.cc @@ -0,0 +1,54 @@ +#include "gtest/gtest.h" +#include "test/common/tensor_op_test_utils.h" +#include "test/providers/provider_test_utils.h" + +#include + +namespace onnxruntime { +namespace test { + +TEST(QLinearLookupTableBasedOperatorTests, QLinearLeakyRelu_Int8) { + OpTester test("QLinearLeakyRelu", 1, onnxruntime::kMSDomain); + test.AddAttribute("alpha", 0.1f); + float X_scale = 0.25f; + //int8_t X_zero_point = 0; + float Y_scale = 0.1f; + int8_t Y_zero_point = -100; + + std::vector dims = {16}; + test.AddInput("X", dims, {0, 16, 17, 18, 19, 90, 91, 127, -128, -110, -108, -100, -16, -17, -18, -1}); + test.AddInput("X_scale", {}, {X_scale}); + test.AddMissingOptionalInput(); // optional "X_zero_point" using default value here + test.AddInput("Y_scale", {}, {Y_scale}); + test.AddInput("Y_zero_point", {}, {Y_zero_point}); + test.AddOutput("Y", dims, {-100, -60, -58, -55, -52, 125, 127, 127, -128, -128, -127, -125, -104, -104, -104, -100}); + auto origin_round_mode = std::fegetround(); + std::fesetround(FE_TONEAREST); + test.Run(); + std::fesetround(origin_round_mode); +} + + +TEST(QLinearLookupTableBasedOperatorTests, QLinearLeakyRelu_UInt8) { + OpTester test("QLinearLeakyRelu", 1, onnxruntime::kMSDomain); + test.AddAttribute("alpha", 0.1f); + float X_scale = 0.25f; + uint8_t X_zero_point = 128; + float Y_scale = 0.1f; + uint8_t Y_zero_point = 30; + + std::vector dims = {16}; + test.AddInput("X", dims, {0, 16, 17, 18, 19, 90, 91, 127, 128, 136, 137, 138, 216, 217, 218, 255}); + test.AddInput("X_scale", {}, {X_scale}); + test.AddInput("X_zero_point", {}, {X_zero_point}); + test.AddInput("Y_scale", {}, {Y_scale}); + test.AddInput("Y_zero_point", {}, {Y_zero_point}); + test.AddOutput("Y", dims, {0, 2, 2, 2, 3, 20, 21, 30, 30, 50, 52, 55, 250, 252, 255, 255}); + auto origin_round_mode = std::fegetround(); + std::fesetround(FE_TONEAREST); + test.Run(); + std::fesetround(origin_round_mode); +} + +} +}