[QNN EP] Op support: LayerNorm, Asin, Sign (#16740)

### Description
Add op support for LayerNorm, Asin, Sign.
Enable QDQ node unit support for Sin Op

---------

Co-authored-by: Adrian Lizarraga <adlizarraga@microsoft.com>
This commit is contained in:
Hector Li 2023-07-20 20:57:48 -07:00 committed by GitHub
parent b508c7236f
commit 4d569f6586
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 382 additions and 86 deletions

View file

@ -330,23 +330,29 @@ bool WhereNodeGroupSelector::Check(const GraphViewer& graph_viewer, const Node&
dt_input_1 == dt_output;
}
bool InstanceNormalizationNodeGroupSelector::Check(const GraphViewer& graph_viewer,
const Node& node,
const std::vector<const Node*>& dq_nodes,
const std::vector<const Node*>& q_nodes) const {
bool InstanceAndLayerNormalizationNodeGroupSelector::Check(const GraphViewer& graph_viewer,
const Node& node,
const std::vector<const Node*>& dq_nodes,
const std::vector<const Node*>& q_nodes) const {
if (!CheckQDQNodes(graph_viewer, node, dq_nodes, q_nodes)) {
return false;
}
int32_t dt_input = dq_nodes[0]->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
int32_t dt_scale = dq_nodes[1]->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
int32_t dt_bias = dq_nodes[2]->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
int32_t dt_bias = 0;
bool has_bias = false;
// bias is optional for LayerNorm
if (dq_nodes.size() > 2) {
has_bias = true;
dt_bias = dq_nodes[2]->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
}
int32_t dt_output = q_nodes[0]->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
// Input, output, and scale need to be the same type. The bias is int32.
return (dt_input == dt_output) &&
(dt_input == dt_scale) &&
(dt_bias == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32);
(has_bias ? dt_bias == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32 : true);
}
bool BatchNormalizationNodeGroupSelector::Check(const GraphViewer& graph_viewer,

View file

@ -139,7 +139,7 @@ class GemmNodeGroupSelector : public NodeGroupSelector {
// Input: DQ nodes for input, scale, and B
// Output: Q node for output
class InstanceNormalizationNodeGroupSelector : public NodeGroupSelector {
class InstanceAndLayerNormalizationNodeGroupSelector : public NodeGroupSelector {
private:
bool Check(const GraphViewer& graph_viewer, const Node& node,
const std::vector<const Node*>& dq_nodes,
@ -264,7 +264,7 @@ class GemmSelector : public BaseSelector {
class InstanceNormalizationSelector : public BaseSelector {
public:
InstanceNormalizationSelector()
: BaseSelector(std::make_unique<InstanceNormalizationNodeGroupSelector>()) {}
: BaseSelector(std::make_unique<InstanceAndLayerNormalizationNodeGroupSelector>()) {}
};
// DQ nodes for X, W and optionally B, (mean, var not required) -> node -> Q

View file

@ -62,6 +62,9 @@ static const OpVersionsAndSelector::OpVersionsMap GetUnaryOpVersionsMap() {
{"Softmax", {}},
{"Sqrt", {}},
{"Atan", {}},
{"Asin", {}},
{"Sin", {}},
{"Sign", {}},
{"Tanh", {}},
{"Exp", {}},
{"LRN", {}}};
@ -88,8 +91,9 @@ static const OpVersionsAndSelector::OpVersionsMap GetMatMulOpVersionsMap() {
static const OpVersionsAndSelector::OpVersionsMap GetGemmOpVersionsMap() {
return {{"Gemm", {}}};
}
static const OpVersionsAndSelector::OpVersionsMap GetInstanceNormalizationOpVersionsMap() {
return {{"InstanceNormalization", {}}};
static const OpVersionsAndSelector::OpVersionsMap GetInstanceAndLayerNormalizationOpVersionsMap() {
return {{"InstanceNormalization", {}},
{"LayerNormalization", {}}};
}
static const OpVersionsAndSelector::OpVersionsMap GetBatchNormalizationOpVersionsMap() {
return {{"BatchNormalization", {}}};
@ -167,10 +171,10 @@ void RegisterGemmSelector(Selectors& qdq_selectors) {
std::move(selector));
}
void RegisterInstanceNormalizationSelector(Selectors& qdq_selectors) {
void RegisterInstanceAndLayerNormalizationSelector(Selectors& qdq_selectors) {
/* register selector for InstanceNormalization op */
std::unique_ptr<NodeGroupSelector> selector = std::make_unique<InstanceNormalizationNodeGroupSelector>();
qdq_selectors.RegisterSelector(GetInstanceNormalizationOpVersionsMap(),
std::unique_ptr<NodeGroupSelector> selector = std::make_unique<InstanceAndLayerNormalizationNodeGroupSelector>();
qdq_selectors.RegisterSelector(GetInstanceAndLayerNormalizationOpVersionsMap(),
std::move(selector));
}
@ -198,7 +202,7 @@ void SelectorManager::CreateSelectors() {
RegisterConvTransposeSelector(qdq_selectors_);
RegisterMatMulSelector(qdq_selectors_);
RegisterGemmSelector(qdq_selectors_);
RegisterInstanceNormalizationSelector(qdq_selectors_);
RegisterInstanceAndLayerNormalizationSelector(qdq_selectors_);
RegisterBatchNormalizationSelector(qdq_selectors_);
RegisterLogicalComparisonSelectors(qdq_selectors_);
}

View file

@ -15,12 +15,14 @@ namespace qnn {
OpBuilderRegistrations::OpBuilderRegistrations() {
{
CreateSimpleOpBuilder("Add", *this);
CreateSimpleOpBuilder("Asin", *this);
CreateSimpleOpBuilder("Atan", *this);
CreateSimpleOpBuilder("Mul", *this);
CreateSimpleOpBuilder("Abs", *this);
CreateSimpleOpBuilder("And", *this);
CreateSimpleOpBuilder("Ceil", *this);
CreateSimpleOpBuilder("Cos", *this);
CreateSimpleOpBuilder("Sign", *this);
CreateSimpleOpBuilder("Div", *this);
CreateSimpleOpBuilder("Equal", *this);
CreateSimpleOpBuilder("Exp", *this);
@ -136,6 +138,10 @@ OpBuilderRegistrations::OpBuilderRegistrations() {
CreateBatchNormOpBuilder("BatchNormalization", *this);
}
{
CreateLayerNormOpBuilder("LayerNormalization", *this);
}
{
CreateLRNOpBuilder("LRN", *this);
}

View file

@ -82,6 +82,8 @@ void CreateReduceOpBuilder(const std::string& op_type, OpBuilderRegistrations& o
void CreateBatchNormOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
void CreateLayerNormOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
void CreateLRNOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
} // namespace qnn

View file

@ -81,86 +81,89 @@ class BaseOpBuilder : public IOpBuilder {
static const std::string& GetQnnOpType(const std::string& onnx_op_type) {
// TODO: Use QNN operator names defined in "QnnOpDef.h"
static const std::unordered_map<std::string, std::string> onnx_op_type_to_qnn_op_type = {
{"Add", "ElementWiseAdd"},
{"Mul", "ElementWiseMultiply"},
{"Abs", "ElementWiseAbs"},
{"And", "ElementWiseAnd"},
{"Atan", "ElementWiseAtan"},
{"Ceil", "ElementWiseCeil"},
{"Cast", "Cast"},
{"Clip", "ReluMinMax"},
{"Cos", "ElementWiseCos"},
{"Div", "ElementWiseDivide"},
{"Equal", "ElementWiseEqual"},
{"Exp", "ElementWiseExp"},
{"Floor", "ElementWiseFloor"},
{"Gather", "Gather"},
{"Greater", "ElementWiseGreater"},
{"GreaterOrEqual", "ElementWiseGreaterEqual"},
{"Less", "ElementWiseLess"},
{"LessOrEqual", "ElementWiseLessEqual"},
{"Log", "ElementWiseLog"},
{"Max", "ElementWiseMaximum"},
{"Min", "ElementWiseMinimum"},
{"Neg", "ElementWiseNeg"},
{"Not", "ElementWiseNot"},
{"Or", "ElementWiseOr"},
{"Pow", "ElementWisePower"},
{"PRelu", "Prelu"},
{"LeakyRelu", "Prelu"},
{"ReduceMax", "ReduceMax"},
{"ReduceMean", "ReduceMean"},
{"ReduceMin", "ReduceMin"},
{"ReduceProd", "ReduceProd"},
{"ReduceSum", "ReduceSum"},
{"Round", "ElementWiseRound"},
{"Where", "ElementWiseSelect"},
{"Sigmoid", "Sigmoid"},
{"Sin", "ElementWiseSin"},
{"Slice", "StridedSlice"},
{"Split", "Split"},
{"Softmax", "Softmax"},
{"Sqrt", "ElementWiseSquareRoot"},
{"Sub", "ElementWiseSubtract"},
{"Tanh", "Tanh"},
{"Transpose", "Transpose"},
{"Add", QNN_OP_ELEMENT_WISE_ADD},
{"Mul", QNN_OP_ELEMENT_WISE_MULTIPLY},
{"Abs", QNN_OP_ELEMENT_WISE_ABS},
{"And", QNN_OP_ELEMENT_WISE_AND},
{"Asin", QNN_OP_ELEMENT_WISE_ASIN},
{"Atan", QNN_OP_ELEMENT_WISE_ATAN},
{"Ceil", QNN_OP_ELEMENT_WISE_CEIL},
{"Sign", QNN_OP_ELEMENT_WISE_SIGN},
{"Cast", QNN_OP_CAST},
{"Clip", QNN_OP_RELU_MIN_MAX},
{"Cos", QNN_OP_ELEMENT_WISE_COS},
{"Div", QNN_OP_ELEMENT_WISE_DIVIDE},
{"Equal", QNN_OP_ELEMENT_WISE_EQUAL},
{"Exp", QNN_OP_ELEMENT_WISE_EXP},
{"Floor", QNN_OP_ELEMENT_WISE_FLOOR},
{"Gather", QNN_OP_GATHER},
{"Greater", QNN_OP_ELEMENT_WISE_GREATER},
{"GreaterOrEqual", QNN_OP_ELEMENT_WISE_GREATER_EQUAL},
{"Less", QNN_OP_ELEMENT_WISE_LESS},
{"LessOrEqual", QNN_OP_ELEMENT_WISE_LESS_EQUAL},
{"Log", QNN_OP_ELEMENT_WISE_LOG},
{"Max", QNN_OP_ELEMENT_WISE_MAXIMUM},
{"Min", QNN_OP_ELEMENT_WISE_MINIMUM},
{"Neg", QNN_OP_ELEMENT_WISE_NEG},
{"Not", QNN_OP_ELEMENT_WISE_NOT},
{"Or", QNN_OP_ELEMENT_WISE_OR},
{"Pow", QNN_OP_ELEMENT_WISE_POWER},
{"PRelu", QNN_OP_PRELU},
{"LeakyRelu", QNN_OP_PRELU},
{"ReduceMax", QNN_OP_REDUCE_MAX},
{"ReduceMean", QNN_OP_REDUCE_MEAN},
{"ReduceMin", QNN_OP_REDUCE_MIN},
{"ReduceProd", QNN_OP_REDUCE_PROD},
{"ReduceSum", QNN_OP_REDUCE_SUM},
{"Round", QNN_OP_ELEMENT_WISE_ROUND},
{"Where", QNN_OP_ELEMENT_WISE_SELECT},
{"Sigmoid", QNN_OP_SIGMOID},
{"Sin", QNN_OP_ELEMENT_WISE_SIN},
{"Slice", QNN_OP_STRIDED_SLICE},
{"Split", QNN_OP_SPLIT},
{"Softmax", QNN_OP_SOFTMAX},
{"Sqrt", QNN_OP_ELEMENT_WISE_SQUARE_ROOT},
{"Sub", QNN_OP_ELEMENT_WISE_SUBTRACT},
{"Tanh", QNN_OP_TANH},
{"Transpose", QNN_OP_TRANSPOSE},
{"DequantizeLinear", "Dequantize"},
{"QuantizeLinear", "Quantize"},
{"DequantizeLinear", QNN_OP_DEQUANTIZE},
{"QuantizeLinear", QNN_OP_QUANTIZE},
{"MatMul", "MatMul"},
{"MatMul", QNN_OP_MAT_MUL},
{"Elu", "Elu"},
{"Relu", "Relu"},
{"Gelu", "Gelu"},
{"Sigmoid", "Sigmoid"},
{"Elu", QNN_OP_ELU},
{"Relu", QNN_OP_RELU},
{"Gelu", QNN_OP_GELU},
{"Sigmoid", QNN_OP_SIGMOID},
{"HardSwish", "HardSwish"},
{"HardSwish", QNN_OP_HARD_SWISH},
{"Conv", "Conv2d"},
{"ConvTranspose", "TransposeConv2d"},
{"Conv", QNN_OP_CONV_2D},
{"ConvTranspose", QNN_OP_TRANSPOSE_CONV_2D},
{"GlobalAveragePool", "PoolAvg2d"},
{"AveragePool", "PoolAvg2d"},
{"MaxPool", "PoolMax2d"},
{"GlobalAveragePool", QNN_OP_POOL_AVG_2D},
{"AveragePool", QNN_OP_POOL_AVG_2D},
{"MaxPool", QNN_OP_POOL_MAX_2D},
{"Reshape", "Reshape"},
{"Resize", "Resize"},
{"Flatten", "Reshape"},
{"Squeeze", "Reshape"},
{"Unsqueeze", "Reshape"},
{"Reshape", QNN_OP_RESHAPE},
{"Resize", QNN_OP_RESIZE},
{"Flatten", QNN_OP_RESHAPE},
{"Squeeze", QNN_OP_RESHAPE},
{"Unsqueeze", QNN_OP_RESHAPE},
{"LogSoftmax", "LogSoftmax"},
{"Concat", "Concat"},
{"LogSoftmax", QNN_OP_LOG_SOFTMAX},
{"Concat", QNN_OP_CONCAT},
{"Gemm", "FullyConnected"},
{"Gemm", QNN_OP_FULLY_CONNECTED},
{"ArgMax", "Argmax"},
{"ArgMin", "Argmin"},
{"Tile", "Tile"},
{"TopK", "TopK"},
{"InstanceNormalization", "InstanceNorm"},
{"BatchNormalization", "Batchnorm"},
{"ArgMax", QNN_OP_ARGMAX},
{"ArgMin", QNN_OP_ARGMIN},
{"Tile", QNN_OP_TILE},
{"TopK", QNN_OP_TOP_K},
{"InstanceNormalization", QNN_OP_INSTANCE_NORM},
{"BatchNormalization", QNN_OP_BATCHNORM},
{"LayerNormalization", QNN_OP_LAYER_NORM},
{"LRN", QNN_OP_LRN}};
auto it = onnx_op_type_to_qnn_op_type.find(onnx_op_type);
@ -262,7 +265,8 @@ class BaseOpBuilder : public IOpBuilder {
static const std::unordered_map<std::string, std::pair<size_t, size_t>> input_output_count_qnn_required = {
{"GlobalAveragePool", {0, 1}},
{"MaxPool", {0, 1}},
{"BatchNormalization", {3, 1}}};
{"BatchNormalization", {3, 1}},
{"LayerNormalization", {0, 1}}};
auto pos = input_output_count_qnn_required.find(onnx_op_type);
if (pos == input_output_count_qnn_required.end()) {

View file

@ -0,0 +1,112 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "core/providers/common.h"
#include "core/providers/shared/utils/utils.h"
#include "core/framework/tensorprotoutils.h"
#include "core/providers/qnn/builder/qnn_model_wrapper.h"
#include "core/providers/qnn/builder/op_builder_factory.h"
#include "core/common/safeint.h"
#include "onnx/defs/data_type_utils.h"
#include "base_op_builder.h"
namespace onnxruntime {
namespace qnn {
class LayerNormOpBuilder : public BaseOpBuilder {
public:
LayerNormOpBuilder() : BaseOpBuilder("LayerNormOpBuilder") {}
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(LayerNormOpBuilder);
Status IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
const NodeUnit& node_unit,
const logging::Logger& logger,
bool is_quantized_model) const override final ORT_MUST_USE_RESULT;
protected:
Status ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
const NodeUnit& node_unit,
std::vector<std::string>&& input_names,
const logging::Logger& logger,
bool is_quantized_model,
bool do_op_validation) const override ORT_MUST_USE_RESULT;
};
// Instance normalization op is sensitive to data layout.
// The nodes from 1st call of GetCapability do not get layout transformer applied, so their shapes are still NCHW.
// The nodes from 2nd call of GetCapability get their layout transformed to NHWC.
// Therefore, we need to check the node domain to determine if the layout has been transformed.
Status LayerNormOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
const NodeUnit& node_unit,
const logging::Logger& logger,
bool is_quantized_model) const {
const auto float_elem_type = ONNX_NAMESPACE::Utils::DataTypeUtils::ToType("float");
// Check input type is float for CPU.
const auto& inputs = node_unit.Inputs();
ONNX_NAMESPACE::DataType input_data_type = inputs[0].node_arg.Type();
ORT_RETURN_IF(!is_quantized_model && input_data_type != float_elem_type, "QNN LayerNorm data type ", input_data_type->c_str(), " is not supported in CPU backend.");
// Also check output type is float for CPU.
const auto& outputs = node_unit.Outputs();
ONNX_NAMESPACE::DataType output_data_type = outputs[0].node_arg.Type();
ORT_RETURN_IF(!is_quantized_model && output_data_type != float_elem_type, "QNN LayerNorm data type ", output_data_type->c_str(), " is not supported in CPU backend.");
ORT_RETURN_IF(outputs.size() > 1, "QNN LayerNorm only support 1 output.");
return AddToModelBuilder(qnn_model_wrapper, node_unit, logger, is_quantized_model, true);
}
Status LayerNormOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
const NodeUnit& node_unit,
std::vector<std::string>&& input_names,
const logging::Logger& logger,
bool is_quantized_model,
bool do_op_validation) const {
NodeAttrHelper node_helper(node_unit);
std::vector<std::string> param_tensor_names;
const float epsilon = node_helper.Get("epsilon", 1e-05f); // Default is 1e-05 according to ONNX spec.
Qnn_Scalar_t epsilon_param = QNN_SCALAR_INIT;
epsilon_param.dataType = QNN_DATATYPE_FLOAT_32;
epsilon_param.floatValue = epsilon;
QnnParamWrapper epsilon_param_wrapper(node_unit.Index(),
node_unit.Name(),
QNN_OP_LAYER_NORM_PARAM_EPSILON,
epsilon_param);
param_tensor_names.push_back(epsilon_param_wrapper.GetParamTensorName());
qnn_model_wrapper.AddParamWrapper(std::move(epsilon_param_wrapper));
std::vector<uint32_t> input_shape;
ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(node_unit.Inputs()[0].node_arg, input_shape), "Cannot get shape of input 0");
const size_t input_rank = input_shape.size();
int32_t default_axis = -1;
Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, default_axis));
size_t axes_rank = input_rank - static_cast<size_t>(default_axis);
std::vector<uint32_t> axes(axes_rank, 0);
std::vector<uint32_t> axes_shape{SafeInt<uint32_t>(axes_rank)};
axes[0] = static_cast<uint32_t>(default_axis);
for (size_t i = 1; i < axes.size(); ++i) {
axes[i] = axes[i - 1] + 1;
}
QnnParamWrapper axes_param(node_unit.Index(), node_unit.Name(), QNN_OP_LAYER_NORM_PARAM_AXES,
std::move(axes_shape), std::move(axes));
param_tensor_names.push_back(axes_param.GetParamTensorName());
qnn_model_wrapper.AddParamWrapper(std::move(axes_param));
ORT_RETURN_IF_ERROR(ProcessOutputs(qnn_model_wrapper, node_unit,
std::move(input_names),
std::move(param_tensor_names),
logger, is_quantized_model, do_op_validation, GetQnnOpType(node_unit.OpType())));
return Status::OK();
}
void CreateLayerNormOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {
op_registrations.AddOpBuilder(op_type, std::make_unique<LayerNormOpBuilder>());
}
} // namespace qnn
} // namespace onnxruntime

View file

@ -439,6 +439,7 @@ typedef struct GraphConfigInfo {
namespace qnn_def {
const std::string package_name = "qti.aisw";
// TODO: remove these parameter name, re-use from QnnOpDef.h
const std::string dilation = "dilation";
const std::string pad_amount = "pad_amount";
const std::string stride = "stride";

View file

@ -140,7 +140,8 @@ TEST(MathOpTest, Sign_int64) {
std::vector<int64_t> output;
TestImpl<int64_t>(input.cbegin(), input.cend(), std::back_inserter(output));
test.AddOutput<int64_t>("output", input_dims, output);
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
// TODO: QNN execute error, need further investigation
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider, kQnnExecutionProvider});
}
TEST(MathOpTest, Sign_float) {

View file

@ -0,0 +1,139 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#if !defined(ORT_MINIMAL_BUILD)
#include <string>
#include "core/graph/graph.h"
#include "test/optimizer/qdq_test_utils.h"
#include "test/providers/qnn/qnn_test_utils.h"
#include "gtest/gtest.h"
namespace onnxruntime {
namespace test {
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
static void RunLayerNormCpuTest(const std::vector<int64_t>& shape) {
ProviderOptions provider_options;
#if defined(_WIN32)
provider_options["backend_path"] = "QnnCpu.dll";
#else
provider_options["backend_path"] = "libQnnCpu.so";
#endif
auto BuildLayerNormTestCase = [](const std::vector<int64_t>& shape) -> GetTestModelFn {
return [shape](ModelTestBuilder& builder) {
// Random input data
auto input = builder.MakeInput<float>(shape, 0.0f, 10.0f);
auto scale = builder.MakeInput<float>(shape, 0.0f, 10.0f);
auto* output = builder.MakeOutput();
Node& layer_norm_node = builder.AddNode("LayerNormalization", {input, scale}, {output});
layer_norm_node.AddAttribute("axis", static_cast<int64_t>(0));
};
};
constexpr int expected_nodes_in_partition = 1;
RunQnnModelTest(BuildLayerNormTestCase(shape),
provider_options,
13,
ExpectedEPNodeAssignment::All,
expected_nodes_in_partition);
}
TEST_F(QnnCPUBackendTests, TestLayerNorm) {
RunLayerNormCpuTest({2, 3});
}
TEST_F(QnnCPUBackendTests, TestLayerNorm1D) {
RunLayerNormCpuTest({1, 2, 3});
}
TEST_F(QnnCPUBackendTests, TestLayerNorm2D) {
RunLayerNormCpuTest({1, 2, 3, 3});
}
TEST_F(QnnCPUBackendTests, TestLayerNorm3D) {
RunLayerNormCpuTest({1, 2, 3, 3, 4});
}
template <typename InputQType, typename ScaleQType>
GetQDQTestCaseFn BuildQDQLayerNormTestCase(const std::vector<int64_t>& input_shape,
const std::vector<int64_t>& scale_shape,
int64_t axis_value = 0) {
return [input_shape, scale_shape, axis_value](ModelTestBuilder& builder) {
const InputQType quant_zero_point = 0;
// const float quant_scale = 1.0f;
auto* input = builder.MakeInput<InputQType>(input_shape, std::numeric_limits<InputQType>::min(),
std::numeric_limits<InputQType>::max());
auto* dq_input = builder.MakeIntermediate();
builder.AddDequantizeLinearNode<InputQType>(input, 0.0039f, quant_zero_point, dq_input);
auto* dq_scale_output = builder.MakeIntermediate();
auto* scale = builder.MakeInitializer<ScaleQType>(scale_shape, static_cast<ScaleQType>(1), static_cast<ScaleQType>(127));
builder.AddDequantizeLinearNode<ScaleQType>(scale, 0.0028f, quant_zero_point, dq_scale_output);
auto* layernorm_output = builder.MakeIntermediate();
Node& layer_norm_node = builder.AddNode("LayerNormalization", {dq_input, dq_scale_output}, {layernorm_output});
layer_norm_node.AddAttribute("axis", axis_value);
auto* q_output = builder.MakeIntermediate();
builder.AddQuantizeLinearNode<InputQType>(layernorm_output, 0.00377f, quant_zero_point, q_output);
auto* final_output = builder.MakeOutput();
builder.AddDequantizeLinearNode<InputQType>(q_output, 0.00377f,
quant_zero_point,
final_output);
};
}
/**
* Runs an LayerNormalization model on the QNN HTP backend. Checks the graph node assignment, and that inference
* outputs for QNN and CPU match.
*
* \param input_shape The input's shape.
* \param scale_shape The scale's shape.
* \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None).
* \param num_modes_in_graph The number of expected nodes in the graph.
* \param axis_value The axis value.
*/
static void RunLayerNormQDQTest(const std::vector<int64_t>& input_shape,
const std::vector<int64_t>& scale_shape,
ExpectedEPNodeAssignment expected_ep_assignment,
int64_t axis_value = 0) {
ProviderOptions provider_options;
#if defined(_WIN32)
provider_options["backend_path"] = "QnnHtp.dll";
#else
provider_options["backend_path"] = "libQnnHtp.so";
#endif
// Runs model with DQ-> InstanceNorm -> Q and compares the outputs of the CPU and QNN EPs.
RunQnnModelTest(BuildQDQLayerNormTestCase<uint8_t, uint8_t>(input_shape, scale_shape, axis_value),
provider_options,
11,
expected_ep_assignment);
}
// Check that QNN compiles DQ -> LayerNormalization -> Q as a single unit.
// Use an input of rank 3.
// Failed QNN op validation: QnnDsp <E> Param[0] has incorrect Value 3
TEST_F(QnnHTPBackendTests, DISABLED_TestQDQLayerNorm1DAxis0) {
RunLayerNormQDQTest({1, 2, 3}, {1, 2, 3}, ExpectedEPNodeAssignment::All);
}
// Failed QNN FinalizeGraphs: QnnDsp <E> Failed to finalize graph (id: 1) with err 1002
TEST_F(QnnHTPBackendTests, DISABLED_TestQDQLayerNorm1DAxis2) {
RunLayerNormQDQTest({1, 2, 3}, {3}, ExpectedEPNodeAssignment::All, -1);
}
#endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
} // namespace test
} // namespace onnxruntime
#endif

View file

@ -154,6 +154,27 @@ TEST_F(QnnHTPBackendTests, TestQDQAtanTest) {
"Atan", {}, 11, ExpectedEPNodeAssignment::All);
}
// Check that QNN compiles DQ -> Asin -> Q as a single unit.
// Use an input of rank 3.
TEST_F(QnnHTPBackendTests, TestQDQAsinTest) {
RunQDQSingleInputOpTest(TestInputDef<uint8_t>({1, 2, 3}, false, 0, 1), // input range 0 ~ 1
"Asin", {}, 11, ExpectedEPNodeAssignment::All);
}
// Check that QNN compiles DQ -> Sign -> Q as a single unit.
// Use an input of rank 3.
TEST_F(QnnHTPBackendTests, TestQDQSignTest) {
RunQDQSingleInputOpTest(TestInputDef<uint8_t>({1, 2, 3}, false, UInt8Limits::min(), UInt8Limits::max()),
"Sign", {}, 11, ExpectedEPNodeAssignment::All);
}
// Check that QNN compiles DQ -> Sign -> Q as a single unit.
// Use an input of rank 3.
TEST_F(QnnHTPBackendTests, TestQDQSinTest) {
RunQDQSingleInputOpTest(TestInputDef<uint8_t>({1, 2, 3}, false, UInt8Limits::min(), UInt8Limits::max()),
"Sin", {}, 11, ExpectedEPNodeAssignment::All);
}
// Check that QNN compiles DQ -> Softmax -> Q as a single unit.
// Test that the default axis (-1) for SoftMax opset 13 works.
TEST_F(QnnHTPBackendTests, TestQDQSoftmax13_DefaultAxis) {