From 30bb0959dc55b823bad9a7d774a8fca85c9358e6 Mon Sep 17 00:00:00 2001 From: Rachel Guo <35738743+YUNQIUGUO@users.noreply.github.com> Date: Tue, 20 Jun 2023 11:09:00 -0700 Subject: [PATCH] [NNAPI EP] Add ReduceMean Op support (#16294) ### Description As title. Special cases for ReduceMean: [UPDATE] The following cases are supported now by converting to providing an input with all axes for NNAPI. Behaviors when axes is not provided or axes provided as an empty vector: For ReduceMean Opset version 18: - Support case `axes` is provided as empty with `noop_with_empty_axes` set to true. - Support case `axes` is not provided with `noop_with_empty_axes` set to true. All treat as identity op. - Does not support the case when `axes` is not provided/provided as empty but `noop_with_empty_axes` is set to false. For ReduceMean OpSet Version 13-: - Does not support when `axes` attribute is not provided. (as onnx treats it as default behavior to reduce all dimensions, and the case is not implemented by NNAPI.) https://developer.android.com/ndk/reference/group/neural-networks#group___neural_networks_1ggaabbe492c60331b13038e39d4207940e0a047fe95a35b27f45c05432b6ca18eb6c > 1: A 1-D Tensor of [ANEURALNETWORKS_TENSOR_INT32](https://developer.android.com/ndk/reference/group/neural-networks#group___neural_networks_1ggaf06d1affd33f3bc698d0c04eceb23298ac34965d8e76ac5acfddf5acd9e40f896). The dimensions to reduce. Must be in the range [-rank(input_tensor), rank(input_tensor)).NOTE: When the operation was introduced, the documentation incorrectly stated that if dimensions were empty, the operation would reduce across all dimensions. This behavior was never implemented. ### Motivation and Context Fixes issue #16194 --------- Co-authored-by: rachguo --- .../nnapi/nnapi_builtin/builders/helper.cc | 14 ++ .../nnapi/nnapi_builtin/builders/helper.h | 4 + .../builders/impl/reduction_op_builder.cc | 205 ++++++++++++++++++ .../builders/impl/transpose_op_builder.cc | 2 +- .../builders/op_builder_factory.cc | 4 + .../builders/op_builder_factory.h | 1 + .../builders/op_builder_helpers.cc | 11 +- 7 files changed, 232 insertions(+), 9 deletions(-) create mode 100644 onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/reduction_op_builder.cc diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc index 60c7dca222..3209ad734f 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc @@ -392,5 +392,19 @@ bool CheckIsInitializer(const InitializedTensorSet& initializers, const NodeUnit return true; } +std::vector OnnxAxesToNnapi(gsl::span onnx_axes, std::optional input_rank) { + std::vector result; + result.reserve(onnx_axes.size()); + for (auto dim : onnx_axes) { + if (input_rank.has_value()) { + dim = HandleNegativeAxis(dim, *input_rank); + } + + result.push_back(narrow(dim)); + } + + return result; +} + } // namespace nnapi } // namespace onnxruntime diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h index 5241ebb1b7..421c55a2c9 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h @@ -172,5 +172,9 @@ inline uint32_t ShapeSize(const Shape& shape) { bool CheckIsInitializer(const InitializedTensorSet& initializers, const NodeUnit& node_unit, const std::string& input_name, const char* input_description); +// Convert ONNX int64 input to NNAPI int32 type input and optionally handle negative axis if needed +// Mostly used in handling `axes` input for now +std::vector OnnxAxesToNnapi(gsl::span onnx_axes, std::optional input_rank = std::nullopt); + } // namespace nnapi } // namespace onnxruntime diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/reduction_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/reduction_op_builder.cc new file mode 100644 index 0000000000..618779f6d2 --- /dev/null +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/reduction_op_builder.cc @@ -0,0 +1,205 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include + +#include "core/common/logging/logging.h" +#include "core/common/safeint.h" +#include "core/framework/tensorprotoutils.h" +#include "core/graph/graph_viewer.h" +#include "core/optimizer/initializer.h" +#include "core/providers/common.h" +#include "core/providers/shared/utils/utils.h" +#include "core/providers/nnapi/nnapi_builtin/builders/helper.h" +#include "core/providers/nnapi/nnapi_builtin/builders/model_builder.h" +#include "core/providers/nnapi/nnapi_builtin/builders/op_builder_factory.h" +#include "core/providers/nnapi/nnapi_builtin/builders/op_builder_helpers.h" +#include "core/providers/nnapi/nnapi_builtin/builders/impl/base_op_builder.h" + +using namespace android::nn::wrapper; + +namespace onnxruntime { +namespace nnapi { + +using namespace op_builder_helpers; + +class ReductionOpBuilder : public BaseOpBuilder { + // Add operator related + public: + void AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; + + private: + Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; + + // Operator support related + private: + int32_t GetMinSupportedNNAPIFeatureLevel(const NodeUnit& node_unit, + const OpSupportCheckParams& params) const override; + bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit, + const OpSupportCheckParams& params) const override; +}; + +// Add operator related + +void ReductionOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const { + const auto& inputs = node_unit.Inputs(); + if (inputs.size() > 1 && inputs[1].node_arg.Exists()) { + model_builder.AddInitializerToSkip(inputs[1].node_arg.Name()); + } +} + +Status ReductionOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { + const auto& op_type(node_unit.OpType()); + const auto& inputs = node_unit.Inputs(); + const auto& output = node_unit.Outputs()[0].node_arg.Name(); + + auto& shaper(model_builder.GetShaper()); + const auto input_shape = shaper[inputs[0].node_arg.Name()]; + const auto& operand_indices(model_builder.GetOperandIndices()); + const auto& operand_types(model_builder.GetOperandTypes()); + + NodeAttrHelper helper(node_unit); + + int32_t op_code; + if (op_type == "ReduceMean") { + op_code = ANEURALNETWORKS_MEAN; + } else { + // TODO: Add more reduction ops support + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "ReductionOpBuilder, unknown op: ", op_type); + } + + const bool keepdims = helper.Get("keepdims", 1) != 0; + const bool noop_with_empty_axes = helper.Get("noop_with_empty_axes", 0) != 0; + + // Get axes for ReduceMean + // Note: ONNX `ReduceMean` will reduce by default all dimensions if axes is not provided/provided as empty. However, NNAPI doesn't implement the behavior + // to reduce all dimensions by default when 'axes' is empty/not provided. We will convert the case by providing an input with all axes for NNAPI here. + // Notes from NNAPI doc: + // https://developer.android.com/ndk/reference/group/neural-networks#group___neural_networks_1ggaabbe492c60331b13038e39d4207940e0a047fe95a35b27f45c05432b6ca18eb6c + std::vector axes; + if (node_unit.SinceVersion() >= 18) { + if (inputs.size() > 1 && inputs[1].node_arg.Exists()) { + // ReduceMean-18 uses the second optional input as axes + const auto& initializers(model_builder.GetInitializerTensors()); + const auto& axes_tensor = *initializers.at(inputs[1].node_arg.Name()); + Initializer unpacked_tensor(axes_tensor); + auto raw_axes = unpacked_tensor.DataAsSpan(); + axes = OnnxAxesToNnapi(raw_axes, input_shape.size()); + } + } else { + // For ReduceMean-13 or earlier, retrieve axes from the attribute + const auto axes_int64 = helper.Get("axes", std::vector{}); + axes = OnnxAxesToNnapi(axes_int64, input_shape.size()); + } + + if (axes.empty() && !noop_with_empty_axes) { + // we provide an input with all axes for NNAPI here to simulate this default behavior to reduce all dimensions + axes.resize(input_shape.size()); + std::iota(axes.begin(), axes.end(), 0); + } + + // Add ReduceMean operation + InlinedVector input_indices; + input_indices.push_back(operand_indices.at(inputs[0].node_arg.Name())); // data + + if (!axes.empty()) { + const auto axes_name = model_builder.GetUniqueName(node_unit.Name() + inputs[0].node_arg.Name() + "_axes"); + Shape axes_dimen = {static_cast(axes.size())}; + const OperandType axes_operand_type(Type::TENSOR_INT32, axes_dimen); + ORT_RETURN_IF_ERROR(model_builder.AddOperandFromPersistMemoryBuffer(axes_name, axes.data(), axes_operand_type)); + + input_indices.push_back(operand_indices.at(axes_name)); // axes + + int32_t input_size = static_cast(input_shape.size()); + + // Make output dimensions + InlinedVector output_dimen; + if (keepdims) { + output_dimen.reserve(input_size); + } else { + output_dimen.reserve(input_size - axes.size()); + } + + for (int32_t i = 0; i < input_size; i++) { + if (std::find(axes.begin(), axes.end(), i) == axes.end()) { + output_dimen.push_back(input_shape[i]); + } else { + if (keepdims) { + output_dimen.push_back(1); + } + } + } + + // In case of a tensor has all 1's in dimension such as {1,1,1,1} and gets all reduced, + // NNAPI requires the output shape to be {1}. (otherwise NNAPI will treat it as dynamic shape.) + if (output_dimen.empty()) + output_dimen.push_back(1); + + shaper.AddShape(output, output_dimen); + + ADD_SCALAR_OPERAND(model_builder, input_indices, keepdims ? 1 : 0); + + const OperandType output_operand_type(operand_types.at(inputs[0].node_arg.Name()).type, output_dimen); + ORT_RETURN_IF_ERROR(model_builder.AddOperation(op_code, input_indices, + {output}, {output_operand_type})); + } else { + // If `axes` is still empty at this point, meaning that it's ReduceMean-18 and attribute `noop_with_empty_axes` specifies as 1, + // treat as an Identity op here. + const OperandType output_operand_type(operand_types.at(inputs[0].node_arg.Name()).type, input_shape); + model_builder.RegisterOperand(output, operand_indices.at(inputs[0].node_arg.Name()), output_operand_type); + } + + return Status::OK(); +} + +// Operator support related + +int32_t ReductionOpBuilder::GetMinSupportedNNAPIFeatureLevel( + const NodeUnit& node_unit, const OpSupportCheckParams& /* params */) const { + const auto& op(node_unit.OpType()); + if (op == "ReduceMean") { + return ANEURALNETWORKS_FEATURE_LEVEL_2; + } + + return ANEURALNETWORKS_FEATURE_LEVEL_3; +} + +bool ReductionOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit, + const OpSupportCheckParams& /* params */) const { + const auto& inputs = node_unit.Inputs(); + const auto& op(node_unit.OpType()); + + Shape input_shape; + if (!GetShape(inputs[0].node_arg, input_shape)) + return false; + + if (input_shape.size() > 4 || input_shape.empty()) { + LOGS_DEFAULT(VERBOSE) << "NNAPI reduction ops only support 1-4d shape, input is " + << input_shape.size() << "d shape"; + return false; + } + + if (op == "ReduceMean") { + if (inputs.size() > 1 && inputs[1].node_arg.Exists()) { + const auto& axes_name = inputs[1].node_arg.Name(); + if (!Contains(initializers, axes_name)) { + LOGS_DEFAULT(VERBOSE) << "Axes of ReduceMean must be a constant initializer."; + return false; + } + } + } + + return true; +} + +void CreateReductionOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) { + CreateSharedOpBuilderImpl( + op_type, op_registrations, + { + // TODO: Add more reduction ops support + "ReduceMean", + }); +} + +} // namespace nnapi +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/transpose_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/transpose_op_builder.cc index 898c3f1872..4d243c730b 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/transpose_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/transpose_op_builder.cc @@ -64,7 +64,7 @@ Status TransposeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, co const auto& input = node_unit.Inputs()[0].node_arg.Name(); const auto& output = node_unit.Outputs()[0].node_arg.Name(); NodeAttrHelper helper(node_unit); - std::vector perm = helper.Get("perm", std::vector()); + std::vector perm = helper.Get("perm", std::vector{}); auto input_dims = static_cast(shaper[input].size()); if (perm.empty()) { for (int32_t i = input_dims - 1; i >= 0; i--) diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_factory.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_factory.cc index 0bcf84e6bf..3cf2fcb337 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_factory.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_factory.cc @@ -85,6 +85,10 @@ static OpBuilderRegistrations CreateOpBuilderRegistrations() { CreateMinMaxOpBuilder("Min", op_registrations); } + { + CreateReductionOpBuilder("ReduceMean", op_registrations); + } + return op_registrations; } diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_factory.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_factory.h index 37dfc003b6..41df542437 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_factory.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_factory.h @@ -43,6 +43,7 @@ void CreateGemmOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_ void CreatePoolOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); void CreateMinMaxOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); void CreateUnaryOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); +void CreateReductionOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); } // namespace nnapi } // namespace onnxruntime diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_helpers.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_helpers.cc index 9832338c0c..cdcf2e4bd2 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_helpers.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_helpers.cc @@ -919,22 +919,17 @@ Status GetAxesForSqueezeAndUnSqueeze(ModelBuilder& model_builder, const NodeUnit if (node_unit.SinceVersion() > 12) { // For squeeze, axes is an optional input.If it is not supplied, return an empty axes as default to squeeze all // For unsqueeze, axes is a required input. This check has no effect for it - // TODO: Add helper function to handle the following conversion from int64 initializer to int32 if (node_unit.Inputs().size() > 1) { const auto& initializers(model_builder.GetInitializerTensors()); const auto& axes_tensor = *initializers.at(node_unit.Inputs()[1].node_arg.Name()); Initializer unpacked_tensor(axes_tensor); auto raw_axes = unpacked_tensor.DataAsSpan(); - const auto size = SafeInt(axes_tensor.dims()[0]); - axes.resize(size); - for (uint32_t i = 0; i < size; i++) { - // it is unlikely we have an axis value overflow for int32 - axes[i] = static_cast(raw_axes[i]); - } + axes = OnnxAxesToNnapi(raw_axes, std::nullopt); } } else { NodeAttrHelper helper(node_unit); - axes = helper.Get("axes", std::vector()); + const auto axes_int64 = helper.Get("axes", std::vector{}); + axes = OnnxAxesToNnapi(axes_int64, std::nullopt); } return Status::OK();