From d32c540b2d1f3c0b60f36da1bc8f59f4c99cc29e Mon Sep 17 00:00:00 2001 From: Adrian Lizarraga Date: Sat, 29 Apr 2023 13:23:42 -0700 Subject: [PATCH] [QNN EP] Support LRN operator (#15741) ### Description Adds support for the LRN operator to QNN EP. ### Motivation and Context Enables basic models like googlenet and alexnet to run entirely on QNN EP. --- .../selectors_actions/shared/utils.cc | 3 +- .../qnn/builder/op_builder_factory.cc | 4 + .../qnn/builder/op_builder_factory.h | 2 + .../qnn/builder/opbuilder/base_op_builder.h | 7 +- .../qnn/builder/opbuilder/lrn_op_builder.cc | 185 ++++++++++++++++++ .../qnn/builder/qnn_model_wrapper.cc | 4 +- onnxruntime/test/providers/qnn/lrn_op_test.cc | 152 ++++++++++++++ 7 files changed, 352 insertions(+), 5 deletions(-) create mode 100644 onnxruntime/core/providers/qnn/builder/opbuilder/lrn_op_builder.cc create mode 100644 onnxruntime/test/providers/qnn/lrn_op_test.cc diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc index 895524aea3..22eb2b0538 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc @@ -58,7 +58,8 @@ static const OpVersionsAndSelector::OpVersionsMap GetUnaryOpVersionsMap() { {"Sqrt", {}}, {"Atan", {}}, {"Tanh", {}}, - {"Exp", {}}}; + {"Exp", {}}, + {"LRN", {}}}; } static const OpVersionsAndSelector::OpVersionsMap GetBinaryOpVersionsMap() { return {{"Add", {}}, diff --git a/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc b/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc index 36767f5ed3..b2636861b2 100644 --- a/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc +++ b/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc @@ -138,6 +138,10 @@ OpBuilderRegistrations::OpBuilderRegistrations() { { CreateBatchNormOpBuilder("BatchNormalization", *this); } + + { + CreateLRNOpBuilder("LRN", *this); + } } const IOpBuilder* GetOpBuilder(const std::string& onnx_op_type) { diff --git a/onnxruntime/core/providers/qnn/builder/op_builder_factory.h b/onnxruntime/core/providers/qnn/builder/op_builder_factory.h index 73a4e3344e..6205268714 100644 --- a/onnxruntime/core/providers/qnn/builder/op_builder_factory.h +++ b/onnxruntime/core/providers/qnn/builder/op_builder_factory.h @@ -84,5 +84,7 @@ void CreateReduceOpBuilder(const std::string& op_type, OpBuilderRegistrations& o void CreateBatchNormOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); +void CreateLRNOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); + } // namespace qnn } // namespace onnxruntime diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h index ce1fd90104..00def0eb1c 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h @@ -8,6 +8,8 @@ #include "core/providers/qnn/builder/op_builder.h" #include "core/framework/allocator.h" +#include "QnnOpDef.h" + namespace onnxruntime { namespace qnn { @@ -91,6 +93,7 @@ class BaseOpBuilder : public IOpBuilder { } static const std::string& GetQnnOpType(const std::string& onnx_op_type) { + // TODO: Use QNN operator names defined in "QnnOpDef.h" static const std::unordered_map onnx_op_type_to_qnn_op_type = { {"Add", "ElementWiseAdd"}, {"Mul", "ElementWiseMultiply"}, @@ -171,7 +174,9 @@ class BaseOpBuilder : public IOpBuilder { {"Tile", "Tile"}, {"TopK", "TopK"}, {"InstanceNormalization", "InstanceNorm"}, - {"BatchNormalization", "Batchnorm"}}; + {"BatchNormalization", "Batchnorm"}, + + {"LRN", QNN_OP_LRN}}; auto it = onnx_op_type_to_qnn_op_type.find(onnx_op_type); ORT_ENFORCE(it != onnx_op_type_to_qnn_op_type.end()); return it->second; diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/lrn_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/lrn_op_builder.cc new file mode 100644 index 0000000000..3436207270 --- /dev/null +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/lrn_op_builder.cc @@ -0,0 +1,185 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" +#include "core/providers/shared/utils/utils.h" +#include "core/providers/qnn/builder/qnn_model_wrapper.h" +#include "core/providers/qnn/builder/op_builder_factory.h" +#include "core/common/safeint.h" +#include "onnx/defs/data_type_utils.h" + +#include "QnnOpDef.h" // From QNN SDK: contains QNN constants (e.g., op names, param values). + +namespace onnxruntime { +namespace qnn { + +class LRNOpBuilder : public BaseOpBuilder { + public: + LRNOpBuilder() : BaseOpBuilder("LRNOpBuilder") {} + ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(LRNOpBuilder); + + Status IsOpSupported(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + const logging::Logger& logger, + bool is_quantized_model) const override final ORT_MUST_USE_RESULT; + + protected: + Status ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + std::vector&& input_names, + const logging::Logger& logger, + bool is_quantized_model, + bool do_op_validation) const override ORT_MUST_USE_RESULT; + + private: + static const OnnxAttrInfo onnx_alpha_attr; + static const OnnxAttrInfo onnx_beta_attr; + static const OnnxAttrInfo onnx_bias_attr; + static const OnnxAttrInfo onnx_size_attr; +}; + +const OnnxAttrInfo LRNOpBuilder::onnx_alpha_attr = {"alpha", 0.0001f}; +const OnnxAttrInfo LRNOpBuilder::onnx_beta_attr = {"beta", 0.75f}; +const OnnxAttrInfo LRNOpBuilder::onnx_bias_attr = {"bias", 1.0f}; +const OnnxAttrInfo LRNOpBuilder::onnx_size_attr = {"size", 0}; + +// The LRN operator is layout sensitive. ONNX LRN has layout NCHW, but QNN requires layout NHWC. +// The nodes from 1st call of GetCapability do not get layout transformer applied, so their shapes are still NCHW. +// The nodes from 2nd call of GetCapability get their layout transformed to NHWC. +// Therefore, we need to check the node domain to determine if the layout has been transformed. +Status LRNOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + const logging::Logger& logger, + bool is_quantized_model) const { + if (node_unit.Domain() == kMSInternalNHWCDomain) { + return AddToModelBuilder(qnn_model_wrapper, node_unit, logger, is_quantized_model, true); + } + + const auto float_elem_type = ONNX_NAMESPACE::Utils::DataTypeUtils::ToType("float"); + const auto& inputs = node_unit.Inputs(); + const auto& outputs = node_unit.Outputs(); + + ORT_RETURN_IF(inputs.size() != 1, "QNN EP: LRN operator must have 1 input."); + ORT_RETURN_IF(outputs.size() != 1, "QNN EP: LRN operator must have 1 output."); + + const auto& input = inputs[0]; + const auto& output = outputs[0]; + + // Check that the input type is float for CPU. + ONNX_NAMESPACE::DataType input_data_type = input.node_arg.Type(); + ORT_RETURN_IF(!is_quantized_model && input_data_type != float_elem_type, + "QNN EP: LRN operator does not support the input type '", input_data_type->c_str(), + "' on the CPU backend."); + + // Check that the output type is float for CPU. + ONNX_NAMESPACE::DataType output_data_type = output.node_arg.Type(); + ORT_RETURN_IF(!is_quantized_model && output_data_type != float_elem_type, + "QNN EP: LRN operator does not support the input type '", input_data_type->c_str(), + "' on the CPU backend."); + + // Check that the input and output have the same shape. + std::vector input_shape; + ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(input.node_arg, input_shape), "Cannot get shape of input 0"); + const size_t input_rank = input_shape.size(); + + ORT_RETURN_IF(input_rank <= 2 || input_rank > 4, "QNN EP: LRN operator only supports input ranks of size 3 or 4."); + + std::vector output_shape; + ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(output.node_arg, output_shape), "Cannot get shape of output 0"); + + ORT_RETURN_IF(output_shape != input_shape, "QNN EP: LRN operator's output must have the same shape as the input."); + + NodeAttrHelper node_helper(node_unit); + + // 'size' attribute must be odd and > 0. + const int64_t onnx_size = GetOnnxAttr(node_helper, onnx_size_attr); + ORT_RETURN_IF(onnx_size % 2 == 0, "QNN EP: LRN operator's size attribute must be odd."); + + // 'alpha' attribute must be > 0.0f. + const float onnx_alpha = GetOnnxAttr(node_helper, onnx_alpha_attr); + ORT_RETURN_IF(onnx_alpha <= 0.0f, "QNN EP: LRN operator's alpha attribute must be greater than zero."); + + // 'alpha' attribute must be > 0.0f. + const float onnx_beta = GetOnnxAttr(node_helper, onnx_beta_attr); + ORT_RETURN_IF(onnx_beta <= 0.0f, "QNN EP: LRN operator's beta attribute must be greater than zero."); + + return Status::OK(); +} + +Status LRNOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + std::vector&& input_names, + const logging::Logger& logger, + bool is_quantized_model, + bool do_op_validation) const { + std::vector param_tensor_names; + NodeAttrHelper node_helper(node_unit); + + const int64_t onnx_size = GetOnnxAttr(node_helper, onnx_size_attr); + + // Parameter 'radius' + { + Qnn_Scalar_t qnn_radius = QNN_SCALAR_INIT; + qnn_radius.dataType = QNN_DATATYPE_INT_32; + qnn_radius.int32Value = SafeInt((onnx_size - 1) / 2); // Convert ONNX size into QNN radius. + + QnnParamWrapper qnn_param(node_unit.Index(), node_unit.Name(), QNN_OP_LRN_PARAM_RADIUS, qnn_radius); + param_tensor_names.push_back(qnn_param.GetParamTensorName()); + qnn_model_wrapper.AddParamWrapper(std::move(qnn_param)); + } + + // Parameter 'alpha' + { + float onnx_alpha = GetOnnxAttr(node_helper, onnx_alpha_attr); + Qnn_Scalar_t qnn_alpha = QNN_SCALAR_INIT; + qnn_alpha.dataType = QNN_DATATYPE_FLOAT_32; + qnn_alpha.floatValue = onnx_alpha / static_cast(onnx_size); // QNN doesn't scale alpha by size. + + QnnParamWrapper qnn_param(node_unit.Index(), node_unit.Name(), QNN_OP_LRN_PARAM_ALPHA, qnn_alpha); + param_tensor_names.push_back(qnn_param.GetParamTensorName()); + qnn_model_wrapper.AddParamWrapper(std::move(qnn_param)); + } + + // Parameter 'beta' + { + Qnn_Scalar_t qnn_beta = QNN_SCALAR_INIT; + qnn_beta.dataType = QNN_DATATYPE_FLOAT_32; + qnn_beta.floatValue = GetOnnxAttr(node_helper, onnx_beta_attr); + + QnnParamWrapper qnn_param(node_unit.Index(), node_unit.Name(), QNN_OP_LRN_PARAM_BETA, qnn_beta); + param_tensor_names.push_back(qnn_param.GetParamTensorName()); + qnn_model_wrapper.AddParamWrapper(std::move(qnn_param)); + } + + // Parameter 'bias' + { + Qnn_Scalar_t qnn_bias = QNN_SCALAR_INIT; + qnn_bias.dataType = QNN_DATATYPE_FLOAT_32; + qnn_bias.floatValue = GetOnnxAttr(node_helper, onnx_bias_attr); + + QnnParamWrapper qnn_param(node_unit.Index(), node_unit.Name(), QNN_OP_LRN_PARAM_BIAS, qnn_bias); + param_tensor_names.push_back(qnn_param.GetParamTensorName()); + qnn_model_wrapper.AddParamWrapper(std::move(qnn_param)); + } + + // Parameter 'region' + { + Qnn_Scalar_t qnn_region = QNN_SCALAR_INIT; + qnn_region.dataType = QNN_DATATYPE_UINT_32; + qnn_region.uint32Value = QNN_OP_LRN_REGION_ACROSS_CHANNEL; // ONNX's LRN only supports "across channel". + + QnnParamWrapper qnn_param(node_unit.Index(), node_unit.Name(), QNN_OP_LRN_PARAM_REGION, qnn_region); + param_tensor_names.push_back(qnn_param.GetParamTensorName()); + qnn_model_wrapper.AddParamWrapper(std::move(qnn_param)); + } + + return ProcessOutputs(qnn_model_wrapper, node_unit, std::move(input_names), std::move(param_tensor_names), + logger, is_quantized_model, do_op_validation); +} + +void CreateLRNOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) { + op_registrations.AddOpBuilder(op_type, std::make_unique()); +} + +} // namespace qnn +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc index 3a10ee0edd..62a199efee 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc @@ -424,11 +424,9 @@ Status QnnModelWrapper::UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& std::vector& unpacked_tensor) const { if (initializer.data_location() == onnx::TensorProto_DataLocation_EXTERNAL) { return onnxruntime::utils::UnpackInitializerData(initializer, graph_viewer_.ModelPath(), unpacked_tensor); - } else { - return onnxruntime::utils::UnpackInitializerData(initializer, unpacked_tensor); } - return Status::OK(); + return onnxruntime::utils::UnpackInitializerData(initializer, unpacked_tensor); } } // namespace qnn diff --git a/onnxruntime/test/providers/qnn/lrn_op_test.cc b/onnxruntime/test/providers/qnn/lrn_op_test.cc new file mode 100644 index 0000000000..8bf901cd56 --- /dev/null +++ b/onnxruntime/test/providers/qnn/lrn_op_test.cc @@ -0,0 +1,152 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#if !defined(ORT_MINIMAL_BUILD) + +#include +#include + +#include "test/optimizer/qdq_test_utils.h" +#include "test/providers/qnn/qnn_test_utils.h" + +#include "onnx/onnx_pb.h" + +#include "gtest/gtest.h" + +namespace onnxruntime { +namespace test { + +// Creates a graph with a single LRN operator. Used for testing CPU backend. +static GetTestModelFn BuildLRNTestCase(const std::vector& shape, int64_t size, + float alpha = 0.0001f, float beta = 0.75f, float bias = 1.0f) { + return [shape, size, alpha, beta, bias](ModelTestBuilder& builder) { + auto* input = builder.MakeInput(shape, 0.0f, 20.0f); + auto* output = builder.MakeOutput(); + + Node& lrn_node = builder.AddNode("LRN", {input}, {output}); + lrn_node.AddAttribute("size", size); + lrn_node.AddAttribute("alpha", alpha); + lrn_node.AddAttribute("beta", beta); + lrn_node.AddAttribute("bias", bias); + }; +} + +// Q/DQ scaled used to build Q/DQ test model. This is a global constant +// because results from HTP backend are off by exactly this amount. +static constexpr float qdq_scale = 0.0038f; + +// Creates a graph with a single Q/DQ LRN operator. Used for testing HTP backend. +template +static GetTestModelFn BuildQDQLRNTestCase(const std::vector& shape, int64_t size, + float alpha = 0.0001f, float beta = 0.75f, float bias = 1.0f) { + return [shape, size, alpha, beta, bias](ModelTestBuilder& builder) { + const InputQType zero_point = std::numeric_limits::max() / 2; + + auto* input = builder.MakeInput(shape, -1.0f, 1.0f); + auto* output = builder.MakeOutput(); + + // input -> Q -> DQ -> LRN + auto* qdq_output = AddQDQNodePair(builder, input, qdq_scale, zero_point); + auto* lrn_output = builder.MakeIntermediate(); + + Node& lrn_node = builder.AddNode("LRN", {qdq_output}, {lrn_output}); + lrn_node.AddAttribute("size", size); + lrn_node.AddAttribute("alpha", alpha); + lrn_node.AddAttribute("beta", beta); + lrn_node.AddAttribute("bias", bias); + + // -> Q -> DQ -> output + auto* q_output = builder.MakeIntermediate(); + builder.AddQuantizeLinearNode(lrn_output, qdq_scale, zero_point, q_output); + builder.AddDequantizeLinearNode(q_output, qdq_scale, zero_point, output); + }; +} + +// Runs an LRN model on the QNN CPU backend. Checks the graph node assignment, and that inference +// outputs for QNN EP and CPU EP match. +static void RunCPULRNOpTest(const std::vector& shape, int64_t size, + ExpectedEPNodeAssignment expected_ep_assignment, const char* test_description, + float alpha = 0.0001f, float beta = 0.75f, float bias = 1.0f, int opset = 13) { + ProviderOptions provider_options; + float fp32_abs_err = 1e-5f; // default tolerance + +#if defined(_WIN32) + provider_options["backend_path"] = "QnnCpu.dll"; +#else + provider_options["backend_path"] = "libQnnCpu.so"; + fp32_abs_err = 1.5e-5f; // On linux we need slightly larger tolerance. +#endif + + constexpr int expected_nodes_in_partition = 1; + RunQnnModelTest(BuildLRNTestCase(shape, size, alpha, beta, bias), + provider_options, + opset, + expected_ep_assignment, + expected_nodes_in_partition, + test_description, + fp32_abs_err); +} + +// Runs an LRN model on the QNN HTP backend. Checks the graph node assignment, and that inference +// outputs for QNN EP and CPU EP match. +template +static void RunQDQLRNOpTest(const std::vector& shape, int64_t size, + ExpectedEPNodeAssignment expected_ep_assignment, const char* test_description, + float alpha = 0.0001f, float beta = 0.75f, float bias = 1.0f, + int opset = 13, float fp32_abs_err = qdq_scale) { + ProviderOptions provider_options; +#if defined(_WIN32) + provider_options["backend_path"] = "QnnHtp.dll"; +#else + provider_options["backend_path"] = "libQnnHtp.so"; +#endif + + constexpr int expected_nodes_in_partition = 1; + RunQnnModelTest(BuildQDQLRNTestCase(shape, size, alpha, beta, bias), + provider_options, + opset, + expected_ep_assignment, + expected_nodes_in_partition, + test_description, + fp32_abs_err + 0.0001f); +} + +// +// CPU tests: +// + +TEST_F(QnnCPUBackendTests, TestCPULRNSize3) { + RunCPULRNOpTest({1, 128, 4, 5}, 3, ExpectedEPNodeAssignment::All, "TestCPULRNSize3"); +} + +TEST_F(QnnCPUBackendTests, TestCPULRNSize5) { + RunCPULRNOpTest({1, 128, 4, 5}, 5, ExpectedEPNodeAssignment::All, "TestCPULRNSize5"); +} + +TEST_F(QnnCPUBackendTests, TestCPULRN_size_larger_than_channel) { + RunCPULRNOpTest({1, 128, 4, 5}, 255, ExpectedEPNodeAssignment::All, "TestCPULRN_size_larger_than_channel"); +} + +#if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) +// +// HTP tests: +// + +TEST_F(QnnHTPBackendTests, TestHTPLRNSize3) { + RunQDQLRNOpTest({1, 128, 4, 5}, 3, ExpectedEPNodeAssignment::All, "TestHTPLRNSize3"); +} + +TEST_F(QnnHTPBackendTests, TestHTPLRNSize5) { + RunQDQLRNOpTest({1, 128, 4, 5}, 5, ExpectedEPNodeAssignment::All, "TestHTPLRNSize5"); +} + +TEST_F(QnnHTPBackendTests, TestHTPLRN_size_larger_than_channel) { + RunQDQLRNOpTest({1, 128, 4, 5}, 255, ExpectedEPNodeAssignment::All, "TestHTPLRN_size_larger_than_channel"); +} + +#endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) + +} // namespace test +} // namespace onnxruntime + +#endif // !defined(ORT_MINIMAL_BUILD)