[QNN] ReduceL2 Support (#22636)

Add ReduceL2 support to QNN EP. Some of the QNN AI Hub models contain
Reduce L2, such as openai_clip_CLIPTextEncoder and
openai_clip_CLIPIamgeEncoder, without this PR, the ReduceL2 will be
assigned to CPU and the graph will be split to 2 QNN graphs, which this
PR, all nodes will be in QNN EP.
This commit is contained in:
Vincent Wang 2024-11-28 10:09:13 +08:00 committed by GitHub
parent 08abab0b14
commit 42ecb05080
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 103 additions and 33 deletions

View file

@ -83,6 +83,7 @@ OpBuilderRegistrations::OpBuilderRegistrations() {
CreateReduceOpBuilder("ReduceMin", *this);
CreateReduceOpBuilder("ReduceProd", *this);
CreateReduceOpBuilder("ReduceSum", *this);
CreateReduceOpBuilder("ReduceL2", *this);
}
{

View file

@ -6,15 +6,15 @@
#include <array>
#include <vector>
#include "core/providers/common.h"
#include "core/providers/shared/utils/utils.h"
#include "core/framework/endian_utils.h"
#include "core/providers/qnn/builder/qnn_model_wrapper.h"
#include "core/providers/qnn/builder/op_builder_factory.h"
#include "core/common/safeint.h"
#include "onnx/defs/data_type_utils.h"
#include "base_op_builder.h"
#include "core/providers/common.h"
#include "core/framework/endian_utils.h"
#include "core/providers/shared/utils/utils.h"
#include "core/providers/qnn/builder/opbuilder/base_op_builder.h"
#include "core/providers/qnn/builder/op_builder_factory.h"
#include "core/providers/qnn/builder/qnn_model_wrapper.h"
#include "core/providers/qnn/builder/qnn_utils.h"
namespace onnxruntime {
namespace qnn {
@ -25,6 +25,7 @@ enum ReduceOpType {
REDUCE_OP_TYPE_MEAN,
REDUCE_OP_TYPE_PROD,
REDUCE_OP_TYPE_SUM,
REDUCE_OP_TYPE_L2,
REDUCE_OP_TYPE_COUNT,
REDUCE_OP_TYPE_UNKNOWN,
@ -41,6 +42,8 @@ ReduceOpType GetReduceOpType(const std::string& op_type) {
return REDUCE_OP_TYPE_PROD;
} else if (op_type == "ReduceSum") {
return REDUCE_OP_TYPE_SUM;
} else if (op_type == "ReduceL2") {
return REDUCE_OP_TYPE_L2;
} else {
return REDUCE_OP_TYPE_UNKNOWN;
}
@ -51,21 +54,16 @@ class ReduceOpBuilder : public BaseOpBuilder {
ReduceOpBuilder() : BaseOpBuilder("ReduceOpBuilder") {}
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(ReduceOpBuilder);
Status IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
const NodeUnit& node_unit,
Status IsOpSupported(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit,
const logging::Logger& logger) const override final ORT_MUST_USE_RESULT;
protected:
Status ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
const NodeUnit& node_unit,
const logging::Logger& logger,
Status ProcessInputs(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, const logging::Logger& logger,
std::vector<std::string>& input_names,
bool do_op_validation = false) const override ORT_MUST_USE_RESULT;
Status ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
const NodeUnit& node_unit,
std::vector<std::string>&& input_names,
const logging::Logger& logger,
Status ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit,
std::vector<std::string>&& input_names, const logging::Logger& logger,
bool do_op_validation) const override ORT_MUST_USE_RESULT;
private:
@ -84,7 +82,8 @@ const std::array<int, REDUCE_OP_TYPE_COUNT> ReduceOpBuilder::opset_with_axes_as_
18, // ReduceMin
18, // ReduceMean
18, // ReduceProd
13 // ReduceSum
13, // ReduceSum
18, // ReduceL2
};
Status ReduceOpBuilder::GetAxesSet(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit,
@ -175,8 +174,7 @@ Status ReduceOpBuilder::GetAxesSet(QnnModelWrapper& qnn_model_wrapper, const Nod
return Status::OK();
}
Status ReduceOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
const NodeUnit& node_unit,
Status ReduceOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit,
const logging::Logger& logger) const {
ReduceOpType reduce_op_type = GetReduceOpType(node_unit.OpType());
if (reduce_op_type == ReduceOpType::REDUCE_OP_TYPE_UNKNOWN) {
@ -188,13 +186,17 @@ Status ReduceOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "QNN EP: ReduceProd operator not supported by HTP backend.");
}
// ReduceL2 is composed by Mul->ReduceSum->Sqrt, it's not easy to set the quantization parameters for the activation
// tensors between, so we don't support ReduceL2 with quantized input for now.
if (reduce_op_type == ReduceOpType::REDUCE_OP_TYPE_L2 && node_unit.Inputs()[0].quant_param.has_value()) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "QNN EP: ReduceL2 operator does not support quantized input for now.");
}
return AddToModelBuilder(qnn_model_wrapper, node_unit, logger, true);
}
Status ReduceOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
const NodeUnit& node_unit,
const logging::Logger& logger,
std::vector<std::string>& input_names,
Status ReduceOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit,
const logging::Logger& logger, std::vector<std::string>& input_names,
bool do_op_validation) const {
ORT_UNUSED_PARAMETER(do_op_validation);
@ -207,11 +209,9 @@ Status ReduceOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
return Status::OK();
}
Status ReduceOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
const NodeUnit& node_unit,
Status ReduceOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit,
std::vector<std::string>&& input_names,
const logging::Logger& logger,
bool do_op_validation) const {
const logging::Logger& logger, bool do_op_validation) const {
NodeAttrHelper node_attr_helper(node_unit);
std::vector<std::string> param_tensor_names;
@ -229,8 +229,8 @@ Status ReduceOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w
std::transform(axes_set.begin(), axes_set.end(), axes_data.begin(),
[](AxesOnnxIntType item) { return SafeInt<AxesQnnIntType>(item); });
QnnParamWrapper axes_param(node_unit.Index(), node_unit.Name(), QNN_OP_REDUCE_MAX_PARAM_AXES,
std::move(axes_shape), std::move(axes_data));
QnnParamWrapper axes_param(node_unit.Index(), node_unit.Name(), QNN_OP_REDUCE_MAX_PARAM_AXES, std::move(axes_shape),
std::move(axes_data));
param_tensor_names.push_back(axes_param.GetParamTensorName());
qnn_model_wrapper.AddParamWrapper(std::move(axes_param));
@ -245,10 +245,57 @@ Status ReduceOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w
param_tensor_names.push_back(keep_dims_param.GetParamTensorName());
qnn_model_wrapper.AddParamWrapper(std::move(keep_dims_param));
ORT_RETURN_IF_ERROR(ProcessOutputs(qnn_model_wrapper, node_unit,
std::move(input_names),
std::move(param_tensor_names),
logger, do_op_validation, GetQnnOpType(node_unit.OpType())));
if (node_unit.OpType() == "ReduceL2") {
// If ReduceL2, QNN doesn't have a single Op for it, we need to add a
// ElementWiseMultiply->ReduceSum->ElementWiseSquareRoot node sequence.
const auto& input = node_unit.Inputs()[0];
const auto& output = node_unit.Outputs()[0];
std::vector<uint32_t> input_shape;
ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(input.node_arg, input_shape), "Cannot get input shape.");
std::vector<uint32_t> output_shape;
ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(output.node_arg, output_shape), "Cannot get output shape.");
ORT_ENFORCE(!input.quant_param.has_value(), "Input tensor must not be quantized.");
const auto* type_proto = output.node_arg.TypeAsProto();
Qnn_DataType_t qnn_data_type = QNN_DATATYPE_FLOAT_32;
ORT_RETURN_IF_ERROR(utils::GetQnnDataType(false, type_proto, qnn_data_type));
const std::string input_name = input_names[0];
// Step 1: y_pow2 = x * x, using ElementWiseMultiply instead of ElementWisePower so we don't need to add a new
// initializer tensor for the power value. The performance difference is negligible.
const std::string pow2_name = input_name + "_ort_qnn_ep_pow2";
QnnTensorWrapper pow2_tensorwrapper(pow2_name, QNN_TENSOR_TYPE_NATIVE, qnn_data_type, QnnQuantParamsWrapper(),
std::move(input_shape));
ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(pow2_tensorwrapper)), "AddTensorWrapper failed");
ORT_RETURN_IF_NOT(
qnn_model_wrapper.CreateQnnNode(pow2_name, QNN_OP_PACKAGE_NAME_QTI_AISW, QNN_OP_ELEMENT_WISE_MULTIPLY,
{input_name, input_name}, {pow2_name}, {}, do_op_validation),
"CreateQnnNode failed");
// Step 2: y_pow2_sum = ReduceSum(y_pow2)
const std::string reduce_name = input_name + "_ort_qnn_ep_pow2_sum";
QnnTensorWrapper reduce_tensorwrapper(reduce_name, QNN_TENSOR_TYPE_NATIVE, qnn_data_type, QnnQuantParamsWrapper(),
std::vector<uint32_t>(output_shape));
ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(reduce_tensorwrapper)), "AddTensorWrapper failed");
ORT_RETURN_IF_NOT(
qnn_model_wrapper.CreateQnnNode(utils::GetNodeName(node_unit), QNN_OP_PACKAGE_NAME_QTI_AISW, QNN_OP_REDUCE_SUM,
{pow2_name}, {reduce_name}, std::move(param_tensor_names), do_op_validation),
"CreateQnnNode failed");
// Step 3: y = Sqrt(y_pow2_sum)
Qnn_TensorType_t output_tensor_type =
qnn_model_wrapper.IsGraphOutput(output.node_arg.Name()) ? QNN_TENSOR_TYPE_APP_READ : QNN_TENSOR_TYPE_NATIVE;
QnnTensorWrapper sqrt_tensorwrapper(output.node_arg.Name(), output_tensor_type, qnn_data_type,
QnnQuantParamsWrapper(), std::move(output_shape));
ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(sqrt_tensorwrapper)), "AddTensorWrapper failed");
ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(input_name + "_ort_qnn_ep_pow2_sum_sqrt",
QNN_OP_PACKAGE_NAME_QTI_AISW, QNN_OP_ELEMENT_WISE_SQUARE_ROOT,
{reduce_name}, {output.node_arg.Name()}, {}, do_op_validation),
"CreateQnnNode failed");
} else {
ORT_RETURN_IF_ERROR(ProcessOutputs(qnn_model_wrapper, node_unit, std::move(input_names),
std::move(param_tensor_names), logger, do_op_validation,
GetQnnOpType(node_unit.OpType())));
}
return Status::OK();
}

View file

@ -388,6 +388,7 @@ bool ReduceOpHasAxesInput(const std::string& op_type, int opset_version) {
{"ReduceMean", 18},
{"ReduceProd", 18},
{"ReduceSum", 13},
{"ReduceL2", 18},
};
const auto it = opset_with_axes_as_input.find(op_type);

View file

@ -309,6 +309,27 @@ TEST_F(QnnCPUBackendTests, ReduceMeanOpset13) {
ExpectedEPNodeAssignment::All);
}
//
// ReduceL2
//
TEST_F(QnnCPUBackendTests, ReduceL2Opset18) {
RunReduceTest<float>("ReduceL2",
TestInputDef<float>({2, 2}, false, -10.0f, 10.0f),
std::vector<int64_t>{0, 1},
true, // keepdims
18,
ExpectedEPNodeAssignment::All);
}
TEST_F(QnnCPUBackendTests, ReduceL2Opset13) {
RunReduceTest<float>("ReduceL2",
TestInputDef<float>({2, 2}, false, -10.0f, 10.0f),
std::vector<int64_t>{0, 1},
true, // keepdims
13,
ExpectedEPNodeAssignment::All);
}
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
// Test creates a graph with a ReduceSum node, and checks that all nodes are supported by the QNN EP