mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-06 00:03:22 +00:00
[QNN EP] Op support: LayerNorm, Asin, Sign (#16740)
### Description Add op support for LayerNorm, Asin, Sign. Enable QDQ node unit support for Sin Op --------- Co-authored-by: Adrian Lizarraga <adlizarraga@microsoft.com>
This commit is contained in:
parent
b508c7236f
commit
4d569f6586
11 changed files with 382 additions and 86 deletions
|
|
@ -330,23 +330,29 @@ bool WhereNodeGroupSelector::Check(const GraphViewer& graph_viewer, const Node&
|
|||
dt_input_1 == dt_output;
|
||||
}
|
||||
|
||||
bool InstanceNormalizationNodeGroupSelector::Check(const GraphViewer& graph_viewer,
|
||||
const Node& node,
|
||||
const std::vector<const Node*>& dq_nodes,
|
||||
const std::vector<const Node*>& q_nodes) const {
|
||||
bool InstanceAndLayerNormalizationNodeGroupSelector::Check(const GraphViewer& graph_viewer,
|
||||
const Node& node,
|
||||
const std::vector<const Node*>& dq_nodes,
|
||||
const std::vector<const Node*>& q_nodes) const {
|
||||
if (!CheckQDQNodes(graph_viewer, node, dq_nodes, q_nodes)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
int32_t dt_input = dq_nodes[0]->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
|
||||
int32_t dt_scale = dq_nodes[1]->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
|
||||
int32_t dt_bias = dq_nodes[2]->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
|
||||
int32_t dt_bias = 0;
|
||||
bool has_bias = false;
|
||||
// bias is optional for LayerNorm
|
||||
if (dq_nodes.size() > 2) {
|
||||
has_bias = true;
|
||||
dt_bias = dq_nodes[2]->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
|
||||
}
|
||||
int32_t dt_output = q_nodes[0]->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
|
||||
|
||||
// Input, output, and scale need to be the same type. The bias is int32.
|
||||
return (dt_input == dt_output) &&
|
||||
(dt_input == dt_scale) &&
|
||||
(dt_bias == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32);
|
||||
(has_bias ? dt_bias == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32 : true);
|
||||
}
|
||||
|
||||
bool BatchNormalizationNodeGroupSelector::Check(const GraphViewer& graph_viewer,
|
||||
|
|
|
|||
|
|
@ -139,7 +139,7 @@ class GemmNodeGroupSelector : public NodeGroupSelector {
|
|||
|
||||
// Input: DQ nodes for input, scale, and B
|
||||
// Output: Q node for output
|
||||
class InstanceNormalizationNodeGroupSelector : public NodeGroupSelector {
|
||||
class InstanceAndLayerNormalizationNodeGroupSelector : public NodeGroupSelector {
|
||||
private:
|
||||
bool Check(const GraphViewer& graph_viewer, const Node& node,
|
||||
const std::vector<const Node*>& dq_nodes,
|
||||
|
|
@ -264,7 +264,7 @@ class GemmSelector : public BaseSelector {
|
|||
class InstanceNormalizationSelector : public BaseSelector {
|
||||
public:
|
||||
InstanceNormalizationSelector()
|
||||
: BaseSelector(std::make_unique<InstanceNormalizationNodeGroupSelector>()) {}
|
||||
: BaseSelector(std::make_unique<InstanceAndLayerNormalizationNodeGroupSelector>()) {}
|
||||
};
|
||||
|
||||
// DQ nodes for X, W and optionally B, (mean, var not required) -> node -> Q
|
||||
|
|
|
|||
|
|
@ -62,6 +62,9 @@ static const OpVersionsAndSelector::OpVersionsMap GetUnaryOpVersionsMap() {
|
|||
{"Softmax", {}},
|
||||
{"Sqrt", {}},
|
||||
{"Atan", {}},
|
||||
{"Asin", {}},
|
||||
{"Sin", {}},
|
||||
{"Sign", {}},
|
||||
{"Tanh", {}},
|
||||
{"Exp", {}},
|
||||
{"LRN", {}}};
|
||||
|
|
@ -88,8 +91,9 @@ static const OpVersionsAndSelector::OpVersionsMap GetMatMulOpVersionsMap() {
|
|||
static const OpVersionsAndSelector::OpVersionsMap GetGemmOpVersionsMap() {
|
||||
return {{"Gemm", {}}};
|
||||
}
|
||||
static const OpVersionsAndSelector::OpVersionsMap GetInstanceNormalizationOpVersionsMap() {
|
||||
return {{"InstanceNormalization", {}}};
|
||||
static const OpVersionsAndSelector::OpVersionsMap GetInstanceAndLayerNormalizationOpVersionsMap() {
|
||||
return {{"InstanceNormalization", {}},
|
||||
{"LayerNormalization", {}}};
|
||||
}
|
||||
static const OpVersionsAndSelector::OpVersionsMap GetBatchNormalizationOpVersionsMap() {
|
||||
return {{"BatchNormalization", {}}};
|
||||
|
|
@ -167,10 +171,10 @@ void RegisterGemmSelector(Selectors& qdq_selectors) {
|
|||
std::move(selector));
|
||||
}
|
||||
|
||||
void RegisterInstanceNormalizationSelector(Selectors& qdq_selectors) {
|
||||
void RegisterInstanceAndLayerNormalizationSelector(Selectors& qdq_selectors) {
|
||||
/* register selector for InstanceNormalization op */
|
||||
std::unique_ptr<NodeGroupSelector> selector = std::make_unique<InstanceNormalizationNodeGroupSelector>();
|
||||
qdq_selectors.RegisterSelector(GetInstanceNormalizationOpVersionsMap(),
|
||||
std::unique_ptr<NodeGroupSelector> selector = std::make_unique<InstanceAndLayerNormalizationNodeGroupSelector>();
|
||||
qdq_selectors.RegisterSelector(GetInstanceAndLayerNormalizationOpVersionsMap(),
|
||||
std::move(selector));
|
||||
}
|
||||
|
||||
|
|
@ -198,7 +202,7 @@ void SelectorManager::CreateSelectors() {
|
|||
RegisterConvTransposeSelector(qdq_selectors_);
|
||||
RegisterMatMulSelector(qdq_selectors_);
|
||||
RegisterGemmSelector(qdq_selectors_);
|
||||
RegisterInstanceNormalizationSelector(qdq_selectors_);
|
||||
RegisterInstanceAndLayerNormalizationSelector(qdq_selectors_);
|
||||
RegisterBatchNormalizationSelector(qdq_selectors_);
|
||||
RegisterLogicalComparisonSelectors(qdq_selectors_);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,12 +15,14 @@ namespace qnn {
|
|||
OpBuilderRegistrations::OpBuilderRegistrations() {
|
||||
{
|
||||
CreateSimpleOpBuilder("Add", *this);
|
||||
CreateSimpleOpBuilder("Asin", *this);
|
||||
CreateSimpleOpBuilder("Atan", *this);
|
||||
CreateSimpleOpBuilder("Mul", *this);
|
||||
CreateSimpleOpBuilder("Abs", *this);
|
||||
CreateSimpleOpBuilder("And", *this);
|
||||
CreateSimpleOpBuilder("Ceil", *this);
|
||||
CreateSimpleOpBuilder("Cos", *this);
|
||||
CreateSimpleOpBuilder("Sign", *this);
|
||||
CreateSimpleOpBuilder("Div", *this);
|
||||
CreateSimpleOpBuilder("Equal", *this);
|
||||
CreateSimpleOpBuilder("Exp", *this);
|
||||
|
|
@ -136,6 +138,10 @@ OpBuilderRegistrations::OpBuilderRegistrations() {
|
|||
CreateBatchNormOpBuilder("BatchNormalization", *this);
|
||||
}
|
||||
|
||||
{
|
||||
CreateLayerNormOpBuilder("LayerNormalization", *this);
|
||||
}
|
||||
|
||||
{
|
||||
CreateLRNOpBuilder("LRN", *this);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -82,6 +82,8 @@ void CreateReduceOpBuilder(const std::string& op_type, OpBuilderRegistrations& o
|
|||
|
||||
void CreateBatchNormOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
|
||||
|
||||
void CreateLayerNormOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
|
||||
|
||||
void CreateLRNOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
|
||||
|
||||
} // namespace qnn
|
||||
|
|
|
|||
|
|
@ -81,86 +81,89 @@ class BaseOpBuilder : public IOpBuilder {
|
|||
static const std::string& GetQnnOpType(const std::string& onnx_op_type) {
|
||||
// TODO: Use QNN operator names defined in "QnnOpDef.h"
|
||||
static const std::unordered_map<std::string, std::string> onnx_op_type_to_qnn_op_type = {
|
||||
{"Add", "ElementWiseAdd"},
|
||||
{"Mul", "ElementWiseMultiply"},
|
||||
{"Abs", "ElementWiseAbs"},
|
||||
{"And", "ElementWiseAnd"},
|
||||
{"Atan", "ElementWiseAtan"},
|
||||
{"Ceil", "ElementWiseCeil"},
|
||||
{"Cast", "Cast"},
|
||||
{"Clip", "ReluMinMax"},
|
||||
{"Cos", "ElementWiseCos"},
|
||||
{"Div", "ElementWiseDivide"},
|
||||
{"Equal", "ElementWiseEqual"},
|
||||
{"Exp", "ElementWiseExp"},
|
||||
{"Floor", "ElementWiseFloor"},
|
||||
{"Gather", "Gather"},
|
||||
{"Greater", "ElementWiseGreater"},
|
||||
{"GreaterOrEqual", "ElementWiseGreaterEqual"},
|
||||
{"Less", "ElementWiseLess"},
|
||||
{"LessOrEqual", "ElementWiseLessEqual"},
|
||||
{"Log", "ElementWiseLog"},
|
||||
{"Max", "ElementWiseMaximum"},
|
||||
{"Min", "ElementWiseMinimum"},
|
||||
{"Neg", "ElementWiseNeg"},
|
||||
{"Not", "ElementWiseNot"},
|
||||
{"Or", "ElementWiseOr"},
|
||||
{"Pow", "ElementWisePower"},
|
||||
{"PRelu", "Prelu"},
|
||||
{"LeakyRelu", "Prelu"},
|
||||
{"ReduceMax", "ReduceMax"},
|
||||
{"ReduceMean", "ReduceMean"},
|
||||
{"ReduceMin", "ReduceMin"},
|
||||
{"ReduceProd", "ReduceProd"},
|
||||
{"ReduceSum", "ReduceSum"},
|
||||
{"Round", "ElementWiseRound"},
|
||||
{"Where", "ElementWiseSelect"},
|
||||
{"Sigmoid", "Sigmoid"},
|
||||
{"Sin", "ElementWiseSin"},
|
||||
{"Slice", "StridedSlice"},
|
||||
{"Split", "Split"},
|
||||
{"Softmax", "Softmax"},
|
||||
{"Sqrt", "ElementWiseSquareRoot"},
|
||||
{"Sub", "ElementWiseSubtract"},
|
||||
{"Tanh", "Tanh"},
|
||||
{"Transpose", "Transpose"},
|
||||
{"Add", QNN_OP_ELEMENT_WISE_ADD},
|
||||
{"Mul", QNN_OP_ELEMENT_WISE_MULTIPLY},
|
||||
{"Abs", QNN_OP_ELEMENT_WISE_ABS},
|
||||
{"And", QNN_OP_ELEMENT_WISE_AND},
|
||||
{"Asin", QNN_OP_ELEMENT_WISE_ASIN},
|
||||
{"Atan", QNN_OP_ELEMENT_WISE_ATAN},
|
||||
{"Ceil", QNN_OP_ELEMENT_WISE_CEIL},
|
||||
{"Sign", QNN_OP_ELEMENT_WISE_SIGN},
|
||||
{"Cast", QNN_OP_CAST},
|
||||
{"Clip", QNN_OP_RELU_MIN_MAX},
|
||||
{"Cos", QNN_OP_ELEMENT_WISE_COS},
|
||||
{"Div", QNN_OP_ELEMENT_WISE_DIVIDE},
|
||||
{"Equal", QNN_OP_ELEMENT_WISE_EQUAL},
|
||||
{"Exp", QNN_OP_ELEMENT_WISE_EXP},
|
||||
{"Floor", QNN_OP_ELEMENT_WISE_FLOOR},
|
||||
{"Gather", QNN_OP_GATHER},
|
||||
{"Greater", QNN_OP_ELEMENT_WISE_GREATER},
|
||||
{"GreaterOrEqual", QNN_OP_ELEMENT_WISE_GREATER_EQUAL},
|
||||
{"Less", QNN_OP_ELEMENT_WISE_LESS},
|
||||
{"LessOrEqual", QNN_OP_ELEMENT_WISE_LESS_EQUAL},
|
||||
{"Log", QNN_OP_ELEMENT_WISE_LOG},
|
||||
{"Max", QNN_OP_ELEMENT_WISE_MAXIMUM},
|
||||
{"Min", QNN_OP_ELEMENT_WISE_MINIMUM},
|
||||
{"Neg", QNN_OP_ELEMENT_WISE_NEG},
|
||||
{"Not", QNN_OP_ELEMENT_WISE_NOT},
|
||||
{"Or", QNN_OP_ELEMENT_WISE_OR},
|
||||
{"Pow", QNN_OP_ELEMENT_WISE_POWER},
|
||||
{"PRelu", QNN_OP_PRELU},
|
||||
{"LeakyRelu", QNN_OP_PRELU},
|
||||
{"ReduceMax", QNN_OP_REDUCE_MAX},
|
||||
{"ReduceMean", QNN_OP_REDUCE_MEAN},
|
||||
{"ReduceMin", QNN_OP_REDUCE_MIN},
|
||||
{"ReduceProd", QNN_OP_REDUCE_PROD},
|
||||
{"ReduceSum", QNN_OP_REDUCE_SUM},
|
||||
{"Round", QNN_OP_ELEMENT_WISE_ROUND},
|
||||
{"Where", QNN_OP_ELEMENT_WISE_SELECT},
|
||||
{"Sigmoid", QNN_OP_SIGMOID},
|
||||
{"Sin", QNN_OP_ELEMENT_WISE_SIN},
|
||||
{"Slice", QNN_OP_STRIDED_SLICE},
|
||||
{"Split", QNN_OP_SPLIT},
|
||||
{"Softmax", QNN_OP_SOFTMAX},
|
||||
{"Sqrt", QNN_OP_ELEMENT_WISE_SQUARE_ROOT},
|
||||
{"Sub", QNN_OP_ELEMENT_WISE_SUBTRACT},
|
||||
{"Tanh", QNN_OP_TANH},
|
||||
{"Transpose", QNN_OP_TRANSPOSE},
|
||||
|
||||
{"DequantizeLinear", "Dequantize"},
|
||||
{"QuantizeLinear", "Quantize"},
|
||||
{"DequantizeLinear", QNN_OP_DEQUANTIZE},
|
||||
{"QuantizeLinear", QNN_OP_QUANTIZE},
|
||||
|
||||
{"MatMul", "MatMul"},
|
||||
{"MatMul", QNN_OP_MAT_MUL},
|
||||
|
||||
{"Elu", "Elu"},
|
||||
{"Relu", "Relu"},
|
||||
{"Gelu", "Gelu"},
|
||||
{"Sigmoid", "Sigmoid"},
|
||||
{"Elu", QNN_OP_ELU},
|
||||
{"Relu", QNN_OP_RELU},
|
||||
{"Gelu", QNN_OP_GELU},
|
||||
{"Sigmoid", QNN_OP_SIGMOID},
|
||||
|
||||
{"HardSwish", "HardSwish"},
|
||||
{"HardSwish", QNN_OP_HARD_SWISH},
|
||||
|
||||
{"Conv", "Conv2d"},
|
||||
{"ConvTranspose", "TransposeConv2d"},
|
||||
{"Conv", QNN_OP_CONV_2D},
|
||||
{"ConvTranspose", QNN_OP_TRANSPOSE_CONV_2D},
|
||||
|
||||
{"GlobalAveragePool", "PoolAvg2d"},
|
||||
{"AveragePool", "PoolAvg2d"},
|
||||
{"MaxPool", "PoolMax2d"},
|
||||
{"GlobalAveragePool", QNN_OP_POOL_AVG_2D},
|
||||
{"AveragePool", QNN_OP_POOL_AVG_2D},
|
||||
{"MaxPool", QNN_OP_POOL_MAX_2D},
|
||||
|
||||
{"Reshape", "Reshape"},
|
||||
{"Resize", "Resize"},
|
||||
{"Flatten", "Reshape"},
|
||||
{"Squeeze", "Reshape"},
|
||||
{"Unsqueeze", "Reshape"},
|
||||
{"Reshape", QNN_OP_RESHAPE},
|
||||
{"Resize", QNN_OP_RESIZE},
|
||||
{"Flatten", QNN_OP_RESHAPE},
|
||||
{"Squeeze", QNN_OP_RESHAPE},
|
||||
{"Unsqueeze", QNN_OP_RESHAPE},
|
||||
|
||||
{"LogSoftmax", "LogSoftmax"},
|
||||
{"Concat", "Concat"},
|
||||
{"LogSoftmax", QNN_OP_LOG_SOFTMAX},
|
||||
{"Concat", QNN_OP_CONCAT},
|
||||
|
||||
{"Gemm", "FullyConnected"},
|
||||
{"Gemm", QNN_OP_FULLY_CONNECTED},
|
||||
|
||||
{"ArgMax", "Argmax"},
|
||||
{"ArgMin", "Argmin"},
|
||||
{"Tile", "Tile"},
|
||||
{"TopK", "TopK"},
|
||||
{"InstanceNormalization", "InstanceNorm"},
|
||||
{"BatchNormalization", "Batchnorm"},
|
||||
{"ArgMax", QNN_OP_ARGMAX},
|
||||
{"ArgMin", QNN_OP_ARGMIN},
|
||||
{"Tile", QNN_OP_TILE},
|
||||
{"TopK", QNN_OP_TOP_K},
|
||||
{"InstanceNormalization", QNN_OP_INSTANCE_NORM},
|
||||
{"BatchNormalization", QNN_OP_BATCHNORM},
|
||||
{"LayerNormalization", QNN_OP_LAYER_NORM},
|
||||
|
||||
{"LRN", QNN_OP_LRN}};
|
||||
auto it = onnx_op_type_to_qnn_op_type.find(onnx_op_type);
|
||||
|
|
@ -262,7 +265,8 @@ class BaseOpBuilder : public IOpBuilder {
|
|||
static const std::unordered_map<std::string, std::pair<size_t, size_t>> input_output_count_qnn_required = {
|
||||
{"GlobalAveragePool", {0, 1}},
|
||||
{"MaxPool", {0, 1}},
|
||||
{"BatchNormalization", {3, 1}}};
|
||||
{"BatchNormalization", {3, 1}},
|
||||
{"LayerNormalization", {0, 1}}};
|
||||
|
||||
auto pos = input_output_count_qnn_required.find(onnx_op_type);
|
||||
if (pos == input_output_count_qnn_required.end()) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,112 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "core/providers/common.h"
|
||||
#include "core/providers/shared/utils/utils.h"
|
||||
#include "core/framework/tensorprotoutils.h"
|
||||
#include "core/providers/qnn/builder/qnn_model_wrapper.h"
|
||||
#include "core/providers/qnn/builder/op_builder_factory.h"
|
||||
#include "core/common/safeint.h"
|
||||
#include "onnx/defs/data_type_utils.h"
|
||||
|
||||
#include "base_op_builder.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace qnn {
|
||||
|
||||
class LayerNormOpBuilder : public BaseOpBuilder {
|
||||
public:
|
||||
LayerNormOpBuilder() : BaseOpBuilder("LayerNormOpBuilder") {}
|
||||
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(LayerNormOpBuilder);
|
||||
|
||||
Status IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
|
||||
const NodeUnit& node_unit,
|
||||
const logging::Logger& logger,
|
||||
bool is_quantized_model) const override final ORT_MUST_USE_RESULT;
|
||||
|
||||
protected:
|
||||
Status ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
|
||||
const NodeUnit& node_unit,
|
||||
std::vector<std::string>&& input_names,
|
||||
const logging::Logger& logger,
|
||||
bool is_quantized_model,
|
||||
bool do_op_validation) const override ORT_MUST_USE_RESULT;
|
||||
};
|
||||
|
||||
// Instance normalization op is sensitive to data layout.
|
||||
// The nodes from 1st call of GetCapability do not get layout transformer applied, so their shapes are still NCHW.
|
||||
// The nodes from 2nd call of GetCapability get their layout transformed to NHWC.
|
||||
// Therefore, we need to check the node domain to determine if the layout has been transformed.
|
||||
Status LayerNormOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
|
||||
const NodeUnit& node_unit,
|
||||
const logging::Logger& logger,
|
||||
bool is_quantized_model) const {
|
||||
const auto float_elem_type = ONNX_NAMESPACE::Utils::DataTypeUtils::ToType("float");
|
||||
|
||||
// Check input type is float for CPU.
|
||||
const auto& inputs = node_unit.Inputs();
|
||||
ONNX_NAMESPACE::DataType input_data_type = inputs[0].node_arg.Type();
|
||||
ORT_RETURN_IF(!is_quantized_model && input_data_type != float_elem_type, "QNN LayerNorm data type ", input_data_type->c_str(), " is not supported in CPU backend.");
|
||||
|
||||
// Also check output type is float for CPU.
|
||||
const auto& outputs = node_unit.Outputs();
|
||||
ONNX_NAMESPACE::DataType output_data_type = outputs[0].node_arg.Type();
|
||||
ORT_RETURN_IF(!is_quantized_model && output_data_type != float_elem_type, "QNN LayerNorm data type ", output_data_type->c_str(), " is not supported in CPU backend.");
|
||||
ORT_RETURN_IF(outputs.size() > 1, "QNN LayerNorm only support 1 output.");
|
||||
|
||||
return AddToModelBuilder(qnn_model_wrapper, node_unit, logger, is_quantized_model, true);
|
||||
}
|
||||
|
||||
Status LayerNormOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
|
||||
const NodeUnit& node_unit,
|
||||
std::vector<std::string>&& input_names,
|
||||
const logging::Logger& logger,
|
||||
bool is_quantized_model,
|
||||
bool do_op_validation) const {
|
||||
NodeAttrHelper node_helper(node_unit);
|
||||
std::vector<std::string> param_tensor_names;
|
||||
|
||||
const float epsilon = node_helper.Get("epsilon", 1e-05f); // Default is 1e-05 according to ONNX spec.
|
||||
Qnn_Scalar_t epsilon_param = QNN_SCALAR_INIT;
|
||||
epsilon_param.dataType = QNN_DATATYPE_FLOAT_32;
|
||||
epsilon_param.floatValue = epsilon;
|
||||
QnnParamWrapper epsilon_param_wrapper(node_unit.Index(),
|
||||
node_unit.Name(),
|
||||
QNN_OP_LAYER_NORM_PARAM_EPSILON,
|
||||
epsilon_param);
|
||||
param_tensor_names.push_back(epsilon_param_wrapper.GetParamTensorName());
|
||||
qnn_model_wrapper.AddParamWrapper(std::move(epsilon_param_wrapper));
|
||||
|
||||
std::vector<uint32_t> input_shape;
|
||||
ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(node_unit.Inputs()[0].node_arg, input_shape), "Cannot get shape of input 0");
|
||||
const size_t input_rank = input_shape.size();
|
||||
int32_t default_axis = -1;
|
||||
Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
|
||||
ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, default_axis));
|
||||
size_t axes_rank = input_rank - static_cast<size_t>(default_axis);
|
||||
std::vector<uint32_t> axes(axes_rank, 0);
|
||||
std::vector<uint32_t> axes_shape{SafeInt<uint32_t>(axes_rank)};
|
||||
axes[0] = static_cast<uint32_t>(default_axis);
|
||||
for (size_t i = 1; i < axes.size(); ++i) {
|
||||
axes[i] = axes[i - 1] + 1;
|
||||
}
|
||||
|
||||
QnnParamWrapper axes_param(node_unit.Index(), node_unit.Name(), QNN_OP_LAYER_NORM_PARAM_AXES,
|
||||
std::move(axes_shape), std::move(axes));
|
||||
param_tensor_names.push_back(axes_param.GetParamTensorName());
|
||||
qnn_model_wrapper.AddParamWrapper(std::move(axes_param));
|
||||
|
||||
ORT_RETURN_IF_ERROR(ProcessOutputs(qnn_model_wrapper, node_unit,
|
||||
std::move(input_names),
|
||||
std::move(param_tensor_names),
|
||||
logger, is_quantized_model, do_op_validation, GetQnnOpType(node_unit.OpType())));
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void CreateLayerNormOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {
|
||||
op_registrations.AddOpBuilder(op_type, std::make_unique<LayerNormOpBuilder>());
|
||||
}
|
||||
|
||||
} // namespace qnn
|
||||
} // namespace onnxruntime
|
||||
|
|
@ -439,6 +439,7 @@ typedef struct GraphConfigInfo {
|
|||
|
||||
namespace qnn_def {
|
||||
const std::string package_name = "qti.aisw";
|
||||
// TODO: remove these parameter name, re-use from QnnOpDef.h
|
||||
const std::string dilation = "dilation";
|
||||
const std::string pad_amount = "pad_amount";
|
||||
const std::string stride = "stride";
|
||||
|
|
|
|||
|
|
@ -140,7 +140,8 @@ TEST(MathOpTest, Sign_int64) {
|
|||
std::vector<int64_t> output;
|
||||
TestImpl<int64_t>(input.cbegin(), input.cend(), std::back_inserter(output));
|
||||
test.AddOutput<int64_t>("output", input_dims, output);
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
|
||||
// TODO: QNN execute error, need further investigation
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider, kQnnExecutionProvider});
|
||||
}
|
||||
|
||||
TEST(MathOpTest, Sign_float) {
|
||||
|
|
|
|||
139
onnxruntime/test/providers/qnn/layer_norm_test.cc
Normal file
139
onnxruntime/test/providers/qnn/layer_norm_test.cc
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#if !defined(ORT_MINIMAL_BUILD)
|
||||
|
||||
#include <string>
|
||||
#include "core/graph/graph.h"
|
||||
|
||||
#include "test/optimizer/qdq_test_utils.h"
|
||||
#include "test/providers/qnn/qnn_test_utils.h"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace test {
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
|
||||
|
||||
static void RunLayerNormCpuTest(const std::vector<int64_t>& shape) {
|
||||
ProviderOptions provider_options;
|
||||
#if defined(_WIN32)
|
||||
provider_options["backend_path"] = "QnnCpu.dll";
|
||||
#else
|
||||
provider_options["backend_path"] = "libQnnCpu.so";
|
||||
#endif
|
||||
|
||||
auto BuildLayerNormTestCase = [](const std::vector<int64_t>& shape) -> GetTestModelFn {
|
||||
return [shape](ModelTestBuilder& builder) {
|
||||
// Random input data
|
||||
auto input = builder.MakeInput<float>(shape, 0.0f, 10.0f);
|
||||
auto scale = builder.MakeInput<float>(shape, 0.0f, 10.0f);
|
||||
|
||||
auto* output = builder.MakeOutput();
|
||||
Node& layer_norm_node = builder.AddNode("LayerNormalization", {input, scale}, {output});
|
||||
|
||||
layer_norm_node.AddAttribute("axis", static_cast<int64_t>(0));
|
||||
};
|
||||
};
|
||||
|
||||
constexpr int expected_nodes_in_partition = 1;
|
||||
RunQnnModelTest(BuildLayerNormTestCase(shape),
|
||||
provider_options,
|
||||
13,
|
||||
ExpectedEPNodeAssignment::All,
|
||||
expected_nodes_in_partition);
|
||||
}
|
||||
|
||||
TEST_F(QnnCPUBackendTests, TestLayerNorm) {
|
||||
RunLayerNormCpuTest({2, 3});
|
||||
}
|
||||
|
||||
TEST_F(QnnCPUBackendTests, TestLayerNorm1D) {
|
||||
RunLayerNormCpuTest({1, 2, 3});
|
||||
}
|
||||
|
||||
TEST_F(QnnCPUBackendTests, TestLayerNorm2D) {
|
||||
RunLayerNormCpuTest({1, 2, 3, 3});
|
||||
}
|
||||
|
||||
TEST_F(QnnCPUBackendTests, TestLayerNorm3D) {
|
||||
RunLayerNormCpuTest({1, 2, 3, 3, 4});
|
||||
}
|
||||
|
||||
template <typename InputQType, typename ScaleQType>
|
||||
GetQDQTestCaseFn BuildQDQLayerNormTestCase(const std::vector<int64_t>& input_shape,
|
||||
const std::vector<int64_t>& scale_shape,
|
||||
int64_t axis_value = 0) {
|
||||
return [input_shape, scale_shape, axis_value](ModelTestBuilder& builder) {
|
||||
const InputQType quant_zero_point = 0;
|
||||
// const float quant_scale = 1.0f;
|
||||
|
||||
auto* input = builder.MakeInput<InputQType>(input_shape, std::numeric_limits<InputQType>::min(),
|
||||
std::numeric_limits<InputQType>::max());
|
||||
auto* dq_input = builder.MakeIntermediate();
|
||||
builder.AddDequantizeLinearNode<InputQType>(input, 0.0039f, quant_zero_point, dq_input);
|
||||
|
||||
auto* dq_scale_output = builder.MakeIntermediate();
|
||||
auto* scale = builder.MakeInitializer<ScaleQType>(scale_shape, static_cast<ScaleQType>(1), static_cast<ScaleQType>(127));
|
||||
builder.AddDequantizeLinearNode<ScaleQType>(scale, 0.0028f, quant_zero_point, dq_scale_output);
|
||||
|
||||
auto* layernorm_output = builder.MakeIntermediate();
|
||||
Node& layer_norm_node = builder.AddNode("LayerNormalization", {dq_input, dq_scale_output}, {layernorm_output});
|
||||
layer_norm_node.AddAttribute("axis", axis_value);
|
||||
|
||||
auto* q_output = builder.MakeIntermediate();
|
||||
builder.AddQuantizeLinearNode<InputQType>(layernorm_output, 0.00377f, quant_zero_point, q_output);
|
||||
|
||||
auto* final_output = builder.MakeOutput();
|
||||
builder.AddDequantizeLinearNode<InputQType>(q_output, 0.00377f,
|
||||
quant_zero_point,
|
||||
final_output);
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs an LayerNormalization model on the QNN HTP backend. Checks the graph node assignment, and that inference
|
||||
* outputs for QNN and CPU match.
|
||||
*
|
||||
* \param input_shape The input's shape.
|
||||
* \param scale_shape The scale's shape.
|
||||
* \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None).
|
||||
* \param num_modes_in_graph The number of expected nodes in the graph.
|
||||
* \param axis_value The axis value.
|
||||
*/
|
||||
static void RunLayerNormQDQTest(const std::vector<int64_t>& input_shape,
|
||||
const std::vector<int64_t>& scale_shape,
|
||||
ExpectedEPNodeAssignment expected_ep_assignment,
|
||||
int64_t axis_value = 0) {
|
||||
ProviderOptions provider_options;
|
||||
#if defined(_WIN32)
|
||||
provider_options["backend_path"] = "QnnHtp.dll";
|
||||
#else
|
||||
provider_options["backend_path"] = "libQnnHtp.so";
|
||||
#endif
|
||||
|
||||
// Runs model with DQ-> InstanceNorm -> Q and compares the outputs of the CPU and QNN EPs.
|
||||
RunQnnModelTest(BuildQDQLayerNormTestCase<uint8_t, uint8_t>(input_shape, scale_shape, axis_value),
|
||||
provider_options,
|
||||
11,
|
||||
expected_ep_assignment);
|
||||
}
|
||||
|
||||
// Check that QNN compiles DQ -> LayerNormalization -> Q as a single unit.
|
||||
// Use an input of rank 3.
|
||||
// Failed QNN op validation: QnnDsp <E> Param[0] has incorrect Value 3
|
||||
TEST_F(QnnHTPBackendTests, DISABLED_TestQDQLayerNorm1DAxis0) {
|
||||
RunLayerNormQDQTest({1, 2, 3}, {1, 2, 3}, ExpectedEPNodeAssignment::All);
|
||||
}
|
||||
|
||||
// Failed QNN FinalizeGraphs: QnnDsp <E> Failed to finalize graph (id: 1) with err 1002
|
||||
TEST_F(QnnHTPBackendTests, DISABLED_TestQDQLayerNorm1DAxis2) {
|
||||
RunLayerNormQDQTest({1, 2, 3}, {3}, ExpectedEPNodeAssignment::All, -1);
|
||||
}
|
||||
|
||||
#endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
|
||||
|
||||
} // namespace test
|
||||
} // namespace onnxruntime
|
||||
|
||||
#endif
|
||||
|
|
@ -154,6 +154,27 @@ TEST_F(QnnHTPBackendTests, TestQDQAtanTest) {
|
|||
"Atan", {}, 11, ExpectedEPNodeAssignment::All);
|
||||
}
|
||||
|
||||
// Check that QNN compiles DQ -> Asin -> Q as a single unit.
|
||||
// Use an input of rank 3.
|
||||
TEST_F(QnnHTPBackendTests, TestQDQAsinTest) {
|
||||
RunQDQSingleInputOpTest(TestInputDef<uint8_t>({1, 2, 3}, false, 0, 1), // input range 0 ~ 1
|
||||
"Asin", {}, 11, ExpectedEPNodeAssignment::All);
|
||||
}
|
||||
|
||||
// Check that QNN compiles DQ -> Sign -> Q as a single unit.
|
||||
// Use an input of rank 3.
|
||||
TEST_F(QnnHTPBackendTests, TestQDQSignTest) {
|
||||
RunQDQSingleInputOpTest(TestInputDef<uint8_t>({1, 2, 3}, false, UInt8Limits::min(), UInt8Limits::max()),
|
||||
"Sign", {}, 11, ExpectedEPNodeAssignment::All);
|
||||
}
|
||||
|
||||
// Check that QNN compiles DQ -> Sign -> Q as a single unit.
|
||||
// Use an input of rank 3.
|
||||
TEST_F(QnnHTPBackendTests, TestQDQSinTest) {
|
||||
RunQDQSingleInputOpTest(TestInputDef<uint8_t>({1, 2, 3}, false, UInt8Limits::min(), UInt8Limits::max()),
|
||||
"Sin", {}, 11, ExpectedEPNodeAssignment::All);
|
||||
}
|
||||
|
||||
// Check that QNN compiles DQ -> Softmax -> Q as a single unit.
|
||||
// Test that the default axis (-1) for SoftMax opset 13 works.
|
||||
TEST_F(QnnHTPBackendTests, TestQDQSoftmax13_DefaultAxis) {
|
||||
|
|
|
|||
Loading…
Reference in a new issue