mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-07-04 04:07:22 +00:00
[NNAPI EP] Add ReduceMean Op support (#16294)
### Description <!-- Describe your changes. --> As title. Special cases for ReduceMean: [UPDATE] The following cases are supported now by converting to providing an input with all axes for NNAPI. Behaviors when axes is not provided or axes provided as an empty vector: For ReduceMean Opset version 18: - Support case `axes` is provided as empty with `noop_with_empty_axes` set to true. - Support case `axes` is not provided with `noop_with_empty_axes` set to true. All treat as identity op. - Does not support the case when `axes` is not provided/provided as empty but `noop_with_empty_axes` is set to false. For ReduceMean OpSet Version 13-: - Does not support when `axes` attribute is not provided. (as onnx treats it as default behavior to reduce all dimensions, and the case is not implemented by NNAPI.) https://developer.android.com/ndk/reference/group/neural-networks#group___neural_networks_1ggaabbe492c60331b13038e39d4207940e0a047fe95a35b27f45c05432b6ca18eb6c > 1: A 1-D Tensor of [ANEURALNETWORKS_TENSOR_INT32](https://developer.android.com/ndk/reference/group/neural-networks#group___neural_networks_1ggaf06d1affd33f3bc698d0c04eceb23298ac34965d8e76ac5acfddf5acd9e40f896). The dimensions to reduce. Must be in the range [-rank(input_tensor), rank(input_tensor)).NOTE: When the operation was introduced, the documentation incorrectly stated that if dimensions were empty, the operation would reduce across all dimensions. This behavior was never implemented. ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. --> Fixes issue #16194 --------- Co-authored-by: rachguo <rachguo@rachguos-Mini.attlocal.net>
This commit is contained in:
parent
d190db7fcd
commit
30bb0959dc
7 changed files with 232 additions and 9 deletions
|
|
@ -392,5 +392,19 @@ bool CheckIsInitializer(const InitializedTensorSet& initializers, const NodeUnit
|
|||
return true;
|
||||
}
|
||||
|
||||
std::vector<int32_t> OnnxAxesToNnapi(gsl::span<const int64_t> onnx_axes, std::optional<size_t> input_rank) {
|
||||
std::vector<int32_t> result;
|
||||
result.reserve(onnx_axes.size());
|
||||
for (auto dim : onnx_axes) {
|
||||
if (input_rank.has_value()) {
|
||||
dim = HandleNegativeAxis(dim, *input_rank);
|
||||
}
|
||||
|
||||
result.push_back(narrow<int32_t>(dim));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace nnapi
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -172,5 +172,9 @@ inline uint32_t ShapeSize(const Shape& shape) {
|
|||
bool CheckIsInitializer(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const std::string& input_name, const char* input_description);
|
||||
|
||||
// Convert ONNX int64 input to NNAPI int32 type input and optionally handle negative axis if needed
|
||||
// Mostly used in handling `axes` input for now
|
||||
std::vector<int32_t> OnnxAxesToNnapi(gsl::span<const int64_t> onnx_axes, std::optional<size_t> input_rank = std::nullopt);
|
||||
|
||||
} // namespace nnapi
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -0,0 +1,205 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include <onnx/onnx_pb.h>
|
||||
|
||||
#include "core/common/logging/logging.h"
|
||||
#include "core/common/safeint.h"
|
||||
#include "core/framework/tensorprotoutils.h"
|
||||
#include "core/graph/graph_viewer.h"
|
||||
#include "core/optimizer/initializer.h"
|
||||
#include "core/providers/common.h"
|
||||
#include "core/providers/shared/utils/utils.h"
|
||||
#include "core/providers/nnapi/nnapi_builtin/builders/helper.h"
|
||||
#include "core/providers/nnapi/nnapi_builtin/builders/model_builder.h"
|
||||
#include "core/providers/nnapi/nnapi_builtin/builders/op_builder_factory.h"
|
||||
#include "core/providers/nnapi/nnapi_builtin/builders/op_builder_helpers.h"
|
||||
#include "core/providers/nnapi/nnapi_builtin/builders/impl/base_op_builder.h"
|
||||
|
||||
using namespace android::nn::wrapper;
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace nnapi {
|
||||
|
||||
using namespace op_builder_helpers;
|
||||
|
||||
class ReductionOpBuilder : public BaseOpBuilder {
|
||||
// Add operator related
|
||||
public:
|
||||
void AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const override;
|
||||
|
||||
private:
|
||||
Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override;
|
||||
|
||||
// Operator support related
|
||||
private:
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& params) const override;
|
||||
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& params) const override;
|
||||
};
|
||||
|
||||
// Add operator related
|
||||
|
||||
void ReductionOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const {
|
||||
const auto& inputs = node_unit.Inputs();
|
||||
if (inputs.size() > 1 && inputs[1].node_arg.Exists()) {
|
||||
model_builder.AddInitializerToSkip(inputs[1].node_arg.Name());
|
||||
}
|
||||
}
|
||||
|
||||
Status ReductionOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const {
|
||||
const auto& op_type(node_unit.OpType());
|
||||
const auto& inputs = node_unit.Inputs();
|
||||
const auto& output = node_unit.Outputs()[0].node_arg.Name();
|
||||
|
||||
auto& shaper(model_builder.GetShaper());
|
||||
const auto input_shape = shaper[inputs[0].node_arg.Name()];
|
||||
const auto& operand_indices(model_builder.GetOperandIndices());
|
||||
const auto& operand_types(model_builder.GetOperandTypes());
|
||||
|
||||
NodeAttrHelper helper(node_unit);
|
||||
|
||||
int32_t op_code;
|
||||
if (op_type == "ReduceMean") {
|
||||
op_code = ANEURALNETWORKS_MEAN;
|
||||
} else {
|
||||
// TODO: Add more reduction ops support
|
||||
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "ReductionOpBuilder, unknown op: ", op_type);
|
||||
}
|
||||
|
||||
const bool keepdims = helper.Get("keepdims", 1) != 0;
|
||||
const bool noop_with_empty_axes = helper.Get("noop_with_empty_axes", 0) != 0;
|
||||
|
||||
// Get axes for ReduceMean
|
||||
// Note: ONNX `ReduceMean` will reduce by default all dimensions if axes is not provided/provided as empty. However, NNAPI doesn't implement the behavior
|
||||
// to reduce all dimensions by default when 'axes' is empty/not provided. We will convert the case by providing an input with all axes for NNAPI here.
|
||||
// Notes from NNAPI doc:
|
||||
// https://developer.android.com/ndk/reference/group/neural-networks#group___neural_networks_1ggaabbe492c60331b13038e39d4207940e0a047fe95a35b27f45c05432b6ca18eb6c
|
||||
std::vector<int32_t> axes;
|
||||
if (node_unit.SinceVersion() >= 18) {
|
||||
if (inputs.size() > 1 && inputs[1].node_arg.Exists()) {
|
||||
// ReduceMean-18 uses the second optional input as axes
|
||||
const auto& initializers(model_builder.GetInitializerTensors());
|
||||
const auto& axes_tensor = *initializers.at(inputs[1].node_arg.Name());
|
||||
Initializer unpacked_tensor(axes_tensor);
|
||||
auto raw_axes = unpacked_tensor.DataAsSpan<int64_t>();
|
||||
axes = OnnxAxesToNnapi(raw_axes, input_shape.size());
|
||||
}
|
||||
} else {
|
||||
// For ReduceMean-13 or earlier, retrieve axes from the attribute
|
||||
const auto axes_int64 = helper.Get("axes", std::vector<int64_t>{});
|
||||
axes = OnnxAxesToNnapi(axes_int64, input_shape.size());
|
||||
}
|
||||
|
||||
if (axes.empty() && !noop_with_empty_axes) {
|
||||
// we provide an input with all axes for NNAPI here to simulate this default behavior to reduce all dimensions
|
||||
axes.resize(input_shape.size());
|
||||
std::iota(axes.begin(), axes.end(), 0);
|
||||
}
|
||||
|
||||
// Add ReduceMean operation
|
||||
InlinedVector<uint32_t> input_indices;
|
||||
input_indices.push_back(operand_indices.at(inputs[0].node_arg.Name())); // data
|
||||
|
||||
if (!axes.empty()) {
|
||||
const auto axes_name = model_builder.GetUniqueName(node_unit.Name() + inputs[0].node_arg.Name() + "_axes");
|
||||
Shape axes_dimen = {static_cast<uint32_t>(axes.size())};
|
||||
const OperandType axes_operand_type(Type::TENSOR_INT32, axes_dimen);
|
||||
ORT_RETURN_IF_ERROR(model_builder.AddOperandFromPersistMemoryBuffer(axes_name, axes.data(), axes_operand_type));
|
||||
|
||||
input_indices.push_back(operand_indices.at(axes_name)); // axes
|
||||
|
||||
int32_t input_size = static_cast<int32_t>(input_shape.size());
|
||||
|
||||
// Make output dimensions
|
||||
InlinedVector<uint32_t> output_dimen;
|
||||
if (keepdims) {
|
||||
output_dimen.reserve(input_size);
|
||||
} else {
|
||||
output_dimen.reserve(input_size - axes.size());
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < input_size; i++) {
|
||||
if (std::find(axes.begin(), axes.end(), i) == axes.end()) {
|
||||
output_dimen.push_back(input_shape[i]);
|
||||
} else {
|
||||
if (keepdims) {
|
||||
output_dimen.push_back(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// In case of a tensor has all 1's in dimension such as {1,1,1,1} and gets all reduced,
|
||||
// NNAPI requires the output shape to be {1}. (otherwise NNAPI will treat it as dynamic shape.)
|
||||
if (output_dimen.empty())
|
||||
output_dimen.push_back(1);
|
||||
|
||||
shaper.AddShape(output, output_dimen);
|
||||
|
||||
ADD_SCALAR_OPERAND(model_builder, input_indices, keepdims ? 1 : 0);
|
||||
|
||||
const OperandType output_operand_type(operand_types.at(inputs[0].node_arg.Name()).type, output_dimen);
|
||||
ORT_RETURN_IF_ERROR(model_builder.AddOperation(op_code, input_indices,
|
||||
{output}, {output_operand_type}));
|
||||
} else {
|
||||
// If `axes` is still empty at this point, meaning that it's ReduceMean-18 and attribute `noop_with_empty_axes` specifies as 1,
|
||||
// treat as an Identity op here.
|
||||
const OperandType output_operand_type(operand_types.at(inputs[0].node_arg.Name()).type, input_shape);
|
||||
model_builder.RegisterOperand(output, operand_indices.at(inputs[0].node_arg.Name()), output_operand_type);
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Operator support related
|
||||
|
||||
int32_t ReductionOpBuilder::GetMinSupportedNNAPIFeatureLevel(
|
||||
const NodeUnit& node_unit, const OpSupportCheckParams& /* params */) const {
|
||||
const auto& op(node_unit.OpType());
|
||||
if (op == "ReduceMean") {
|
||||
return ANEURALNETWORKS_FEATURE_LEVEL_2;
|
||||
}
|
||||
|
||||
return ANEURALNETWORKS_FEATURE_LEVEL_3;
|
||||
}
|
||||
|
||||
bool ReductionOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& /* params */) const {
|
||||
const auto& inputs = node_unit.Inputs();
|
||||
const auto& op(node_unit.OpType());
|
||||
|
||||
Shape input_shape;
|
||||
if (!GetShape(inputs[0].node_arg, input_shape))
|
||||
return false;
|
||||
|
||||
if (input_shape.size() > 4 || input_shape.empty()) {
|
||||
LOGS_DEFAULT(VERBOSE) << "NNAPI reduction ops only support 1-4d shape, input is "
|
||||
<< input_shape.size() << "d shape";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (op == "ReduceMean") {
|
||||
if (inputs.size() > 1 && inputs[1].node_arg.Exists()) {
|
||||
const auto& axes_name = inputs[1].node_arg.Name();
|
||||
if (!Contains(initializers, axes_name)) {
|
||||
LOGS_DEFAULT(VERBOSE) << "Axes of ReduceMean must be a constant initializer.";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void CreateReductionOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {
|
||||
CreateSharedOpBuilderImpl<ReductionOpBuilder>(
|
||||
op_type, op_registrations,
|
||||
{
|
||||
// TODO: Add more reduction ops support
|
||||
"ReduceMean",
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace nnapi
|
||||
} // namespace onnxruntime
|
||||
|
|
@ -64,7 +64,7 @@ Status TransposeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, co
|
|||
const auto& input = node_unit.Inputs()[0].node_arg.Name();
|
||||
const auto& output = node_unit.Outputs()[0].node_arg.Name();
|
||||
NodeAttrHelper helper(node_unit);
|
||||
std::vector<int32_t> perm = helper.Get("perm", std::vector<int32_t>());
|
||||
std::vector<int32_t> perm = helper.Get("perm", std::vector<int32_t>{});
|
||||
auto input_dims = static_cast<int32_t>(shaper[input].size());
|
||||
if (perm.empty()) {
|
||||
for (int32_t i = input_dims - 1; i >= 0; i--)
|
||||
|
|
|
|||
|
|
@ -85,6 +85,10 @@ static OpBuilderRegistrations CreateOpBuilderRegistrations() {
|
|||
CreateMinMaxOpBuilder("Min", op_registrations);
|
||||
}
|
||||
|
||||
{
|
||||
CreateReductionOpBuilder("ReduceMean", op_registrations);
|
||||
}
|
||||
|
||||
return op_registrations;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -43,6 +43,7 @@ void CreateGemmOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_
|
|||
void CreatePoolOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
|
||||
void CreateMinMaxOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
|
||||
void CreateUnaryOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
|
||||
void CreateReductionOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
|
||||
|
||||
} // namespace nnapi
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -919,22 +919,17 @@ Status GetAxesForSqueezeAndUnSqueeze(ModelBuilder& model_builder, const NodeUnit
|
|||
if (node_unit.SinceVersion() > 12) {
|
||||
// For squeeze, axes is an optional input.If it is not supplied, return an empty axes as default to squeeze all
|
||||
// For unsqueeze, axes is a required input. This check has no effect for it
|
||||
// TODO: Add helper function to handle the following conversion from int64 initializer to int32
|
||||
if (node_unit.Inputs().size() > 1) {
|
||||
const auto& initializers(model_builder.GetInitializerTensors());
|
||||
const auto& axes_tensor = *initializers.at(node_unit.Inputs()[1].node_arg.Name());
|
||||
Initializer unpacked_tensor(axes_tensor);
|
||||
auto raw_axes = unpacked_tensor.DataAsSpan<int64_t>();
|
||||
const auto size = SafeInt<uint32_t>(axes_tensor.dims()[0]);
|
||||
axes.resize(size);
|
||||
for (uint32_t i = 0; i < size; i++) {
|
||||
// it is unlikely we have an axis value overflow for int32
|
||||
axes[i] = static_cast<int32_t>(raw_axes[i]);
|
||||
}
|
||||
axes = OnnxAxesToNnapi(raw_axes, std::nullopt);
|
||||
}
|
||||
} else {
|
||||
NodeAttrHelper helper(node_unit);
|
||||
axes = helper.Get("axes", std::vector<int32_t>());
|
||||
const auto axes_int64 = helper.Get("axes", std::vector<int64_t>{});
|
||||
axes = OnnxAxesToNnapi(axes_int64, std::nullopt);
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
|
|
|
|||
Loading…
Reference in a new issue