[NNAPI EP] Enable Slice support (#8031)

* Enable slice for NNAPI EP

* Add ANEURALNETWORKS_STRIDED_SLICE support

* Addressed CR comments

* Addressed CR comments, rename PrepareForCompute to PrepareForComputeHelper to avoid confusion
This commit is contained in:
Guoyu Wang 2021-06-17 12:36:12 -07:00 committed by GitHub
parent 96989b83ee
commit d83f7fd4aa
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 543 additions and 225 deletions

View file

@ -8,6 +8,7 @@
#include "core/framework/element_type_lists.h"
#include "core/providers/common.h"
#include "core/providers/cpu/tensor/slice_helper.h"
#include "core/providers/cpu/tensor/utils.h"
#include "core/providers/op_kernel_type_control.h"
#include "core/providers/op_kernel_type_control_utils.h"
@ -44,14 +45,6 @@ const auto data_type_constraints = BuildKernelDefConstraintsFromTypeList<DataTyp
const auto indices_type_constraints = BuildKernelDefConstraintsFromTypeList<IndicesTypes>();
const auto enabled_data_type_constraints = BuildKernelDefConstraintsFromTypeList<EnabledDataTypes>();
const auto enabled_indices_type_constraints = BuildKernelDefConstraintsFromTypeList<EnabledIndicesTypes>();
// std::clamp doesn't exist until C++17 so create a local version
template <typename T>
const T& clamp(const T& v, const T& lo, const T& hi) {
if (v < lo) return lo;
if (v > hi) return hi;
return v;
}
} // namespace
ONNX_CPU_OPERATOR_VERSIONED_KERNEL(
@ -93,6 +86,7 @@ ONNX_CPU_OPERATOR_KERNEL(
static void FlattenOutputDims(const std::vector<int64_t>& input_dimensions,
const std::vector<int64_t>& output_dims,
std::vector<int64_t>& starts,
std::vector<int64_t>& ends,
std::vector<int64_t>& steps,
std::vector<int64_t>*& flattened_output_dims) {
int num_to_combine = 0;
@ -120,6 +114,10 @@ static void FlattenOutputDims(const std::vector<int64_t>& input_dimensions,
// so we can just shrink via resize so the number of entries matches flattened_output_dims
starts.resize(num_dims);
steps.resize(num_dims);
// update ends as well
ends.resize(num_dims);
ends.back() = dim_value;
} else {
flattened_output_dims = nullptr;
}
@ -130,47 +128,9 @@ Status SliceBase::PrepareForCompute(const std::vector<int64_t>& raw_starts,
const std::vector<int64_t>& raw_ends,
const std::vector<int64_t>& raw_axes,
SliceOp::PrepareForComputeMetadata& compute_metadata) {
// Initialize axes to the provided axes attribute or to the default sequence
std::vector<int64_t> axes(raw_axes);
if (axes.empty()) {
//axes are omitted, they are set to[0, ..., ndim - 1]
axes.resize(compute_metadata.starts_.size());
std::iota(axes.begin(), axes.end(), 0);
}
// Iterate through the provided axes and override the start/end ranges
std::unordered_set<int64_t> unique_axes;
const auto& dimension_count = compute_metadata.input_dimensions_.size();
for (size_t axis_index = 0, axes_count = axes.size(); axis_index < axes_count; ++axis_index) {
auto axis = HandleNegativeAxis(axes[axis_index], dimension_count); // handle negative and enforce axis is valid
if (axis >= static_cast<int64_t>(dimension_count) || axis < 0)
return Status(ONNXRUNTIME, INVALID_ARGUMENT, "'axes' has an axis outside of the tensor dimension count");
if (unique_axes.find(axis) != unique_axes.end())
return Status(ONNXRUNTIME, INVALID_ARGUMENT, "'axes' has duplicates");
unique_axes.insert(axis);
// process start
auto start = raw_starts[axis_index];
if (start < 0)
start += compute_metadata.input_dimensions_[axis];
compute_metadata.starts_[axis] = clamp(start, int64_t{0}, compute_metadata.input_dimensions_[axis]);
// process end
auto end = raw_ends[axis_index];
if (end < 0)
end += compute_metadata.input_dimensions_[axis];
// find output dim value for this axis
auto temp = clamp(end, int64_t{0}, compute_metadata.input_dimensions_[axis]) - compute_metadata.starts_[axis];
if (temp < 0)
compute_metadata.output_dims_[axis] = 0;
else
compute_metadata.output_dims_[axis] = temp;
}
ORT_RETURN_IF_ERROR(SliceOp::PrepareForComputeHelper(raw_starts, raw_ends, raw_axes, compute_metadata));
FlattenOutputDims(compute_metadata.input_dimensions_, compute_metadata.output_dims_, compute_metadata.starts_,
compute_metadata.steps_, compute_metadata.p_flattened_output_dims_);
compute_metadata.ends_, compute_metadata.steps_, compute_metadata.p_flattened_output_dims_);
return Status::OK();
}
@ -180,70 +140,9 @@ Status SliceBase::PrepareForCompute(const std::vector<int64_t>& raw_starts,
const std::vector<int64_t>& raw_axes,
const std::vector<int64_t>& raw_steps,
SliceOp::PrepareForComputeMetadata& compute_metadata) {
// Initialize axes to the provided axes attribute or to the default sequence
std::vector<int64_t> axes(raw_axes);
if (axes.empty()) {
// axes are omitted, they are set to[0, ..., ndim - 1]
axes.resize(compute_metadata.starts_.size());
std::iota(axes.begin(), axes.end(), 0);
}
// Iterate through the provided axes and override the start/end/steps ranges
std::unordered_set<int64_t> unique_axes;
const auto& dimension_count = compute_metadata.input_dimensions_.size();
for (size_t axis_index = 0, axes_count = axes.size(); axis_index < axes_count; ++axis_index) {
auto axis = axes[axis_index] < 0 ? axes[axis_index] + static_cast<int64_t>(dimension_count) : axes[axis_index];
if (axis >= static_cast<int64_t>(dimension_count) || axis < 0)
return Status(ONNXRUNTIME, INVALID_ARGUMENT, "'axes' has an axis outside of the tensor dimension count");
if (unique_axes.find(axis) != unique_axes.end())
return Status(ONNXRUNTIME, INVALID_ARGUMENT, "'axes' has duplicates");
unique_axes.insert(axis);
// process step
auto step = axis_index < raw_steps.size() ? raw_steps[axis_index] : 1;
if (step == 0)
return Status(ONNXRUNTIME, INVALID_ARGUMENT, "'step' value cannot be 0");
compute_metadata.steps_[axis] = step;
// process start
auto start = raw_starts[axis_index];
if (start < 0)
start += compute_metadata.input_dimensions_[axis];
if (step < 0)
compute_metadata.starts_[axis] = clamp(start, int64_t{0}, compute_metadata.input_dimensions_[axis] - 1);
else
compute_metadata.starts_[axis] = clamp(start, int64_t{0}, compute_metadata.input_dimensions_[axis]);
// process end
auto end = raw_ends[axis_index];
// INT_MAX has a special meaning for end according to spec
// equivalent to 'None' in numpy
// it represent slicing to the end of the dimension
if (end == std::numeric_limits<int32_t>::max() ||
end == std::numeric_limits<int64_t>::max()) {
end = step < 0 ? -1 : compute_metadata.input_dimensions_[axis];
}
else {
if (end < 0)
end += compute_metadata.input_dimensions_[axis];
if (step < 0)
end = clamp(end, int64_t{-1}, compute_metadata.input_dimensions_[axis]);
else
end = clamp(end, int64_t{0}, compute_metadata.input_dimensions_[axis]);
}
// find output dim value for this axis
auto temp = static_cast<int64_t>(ceil(1.0 * (end - compute_metadata.starts_[axis]) / step));
if (temp < 0)
compute_metadata.output_dims_[axis] = 0;
else
compute_metadata.output_dims_[axis] = temp;
}
ORT_RETURN_IF_ERROR(SliceOp::PrepareForComputeHelper(raw_starts, raw_ends, raw_axes, raw_steps, compute_metadata));
FlattenOutputDims(compute_metadata.input_dimensions_, compute_metadata.output_dims_, compute_metadata.starts_,
compute_metadata.steps_, compute_metadata.p_flattened_output_dims_);
compute_metadata.ends_, compute_metadata.steps_, compute_metadata.p_flattened_output_dims_);
return Status::OK();
}

View file

@ -1,5 +1,6 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#pragma once
#ifndef SHARED_PROVIDER
#include "core/common/common.h"
@ -7,28 +8,10 @@
#include "core/util/math_cpuonly.h"
#endif
#include "core/providers/cpu/tensor/slice_compute_metadata.h"
namespace onnxruntime {
namespace SliceOp {
struct PrepareForComputeMetadata {
PrepareForComputeMetadata() = delete;
PrepareForComputeMetadata(const std::vector<int64_t>& input_dimensions)
: input_dimensions_(input_dimensions) {
size_t dimension_count = input_dimensions.size();
starts_.resize(dimension_count, 0);
steps_.resize(dimension_count, 1);
output_dims_ = input_dimensions;
}
const std::vector<int64_t>& input_dimensions_;
std::vector<int64_t> starts_;
std::vector<int64_t> steps_;
std::vector<int64_t> output_dims_;
std::vector<int64_t> flattened_output_dims_;
std::vector<int64_t>* p_flattened_output_dims_ = &flattened_output_dims_;
};
} // namespace SliceOp
class SliceBase {
// static methods that can be used from other ops if needed
public:

View file

@ -0,0 +1,33 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// This file contains the definition of the PrepareForComputeMetadata for Slice operator
#pragma once
#include <cstdint>
#include <vector>
namespace onnxruntime {
namespace SliceOp {
struct PrepareForComputeMetadata {
explicit PrepareForComputeMetadata(const std::vector<int64_t>& input_dimensions)
: input_dimensions_(input_dimensions),
ends_(input_dimensions),
output_dims_(input_dimensions) {
size_t dimension_count = input_dimensions.size();
starts_.resize(dimension_count, 0);
steps_.resize(dimension_count, 1);
}
const std::vector<int64_t>& input_dimensions_;
std::vector<int64_t> starts_;
std::vector<int64_t> ends_;
std::vector<int64_t> steps_;
std::vector<int64_t> output_dims_;
std::vector<int64_t> flattened_output_dims_;
std::vector<int64_t>* p_flattened_output_dims_ = &flattened_output_dims_;
};
} // namespace SliceOp
} // namespace onnxruntime

View file

@ -0,0 +1,141 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// This file contains the functions compute the starts, steps (strides) and output shape
// for Slice op, which can be called from other ops or EPs.
#pragma once
#include "core/providers/cpu/tensor/slice_compute_metadata.h"
namespace onnxruntime {
// std::clamp doesn't exist until C++17 so create a local version
template <typename T>
const T& clamp(const T& v, const T& lo, const T& hi) {
if (v < lo) return lo;
if (v > hi) return hi;
return v;
}
namespace SliceOp {
// compute output_dims without steps (Slice V1-9 & DynamicSlice)
// Please note this will not Flatten the output shape
inline Status PrepareForComputeHelper(const std::vector<int64_t>& raw_starts,
const std::vector<int64_t>& raw_ends,
const std::vector<int64_t>& raw_axes,
SliceOp::PrepareForComputeMetadata& compute_metadata) {
// Initialize axes to the provided axes attribute or to the default sequence
std::vector<int64_t> axes(raw_axes);
if (axes.empty()) {
//axes are omitted, they are set to[0, ..., ndim - 1]
axes.resize(compute_metadata.starts_.size());
std::iota(axes.begin(), axes.end(), 0);
}
// Iterate through the provided axes and override the start/end ranges
std::unordered_set<int64_t> unique_axes;
const auto& dimension_count = compute_metadata.input_dimensions_.size();
for (size_t axis_index = 0, axes_count = axes.size(); axis_index < axes_count; ++axis_index) {
auto axis = HandleNegativeAxis(axes[axis_index], dimension_count); // handle negative and enforce axis is valid
if (axis >= static_cast<int64_t>(dimension_count) || axis < 0)
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "'axes' has an axis outside of the tensor dimension count");
if (unique_axes.find(axis) != unique_axes.end())
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "'axes' has duplicates");
unique_axes.insert(axis);
// process start
auto start = raw_starts[axis_index];
if (start < 0)
start += compute_metadata.input_dimensions_[axis];
compute_metadata.starts_[axis] = clamp(start, int64_t{0}, compute_metadata.input_dimensions_[axis]);
// process end
auto end = raw_ends[axis_index];
if (end < 0)
end += compute_metadata.input_dimensions_[axis];
compute_metadata.ends_[axis] = clamp(end, int64_t{0}, compute_metadata.input_dimensions_[axis]);
// find output dim value for this axis
auto temp = compute_metadata.ends_[axis] - compute_metadata.starts_[axis];
if (temp < 0)
compute_metadata.output_dims_[axis] = 0;
else
compute_metadata.output_dims_[axis] = temp;
}
return Status::OK();
}
// compute output_dims with steps (Slice V10)
// Please note this will not Flatten the output shape
inline Status PrepareForComputeHelper(const std::vector<int64_t>& raw_starts,
const std::vector<int64_t>& raw_ends,
const std::vector<int64_t>& raw_axes,
const std::vector<int64_t>& raw_steps,
SliceOp::PrepareForComputeMetadata& compute_metadata) {
// Initialize axes to the provided axes attribute or to the default sequence
std::vector<int64_t> axes(raw_axes);
if (axes.empty()) {
// axes are omitted, they are set to[0, ..., ndim - 1]
axes.resize(compute_metadata.starts_.size());
std::iota(axes.begin(), axes.end(), 0);
}
// Iterate through the provided axes and override the start/end/steps ranges
std::unordered_set<int64_t> unique_axes;
const auto& dimension_count = compute_metadata.input_dimensions_.size();
for (size_t axis_index = 0, axes_count = axes.size(); axis_index < axes_count; ++axis_index) {
auto axis = axes[axis_index] < 0 ? axes[axis_index] + static_cast<int64_t>(dimension_count) : axes[axis_index];
if (axis >= static_cast<int64_t>(dimension_count) || axis < 0)
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "'axes' has an axis outside of the tensor dimension count");
if (unique_axes.find(axis) != unique_axes.end())
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "'axes' has duplicates");
unique_axes.insert(axis);
// process step
auto step = axis_index < raw_steps.size() ? raw_steps[axis_index] : 1;
if (step == 0)
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "'step' value cannot be 0");
compute_metadata.steps_[axis] = step;
// process start
auto start = raw_starts[axis_index];
if (start < 0)
start += compute_metadata.input_dimensions_[axis];
if (step < 0)
compute_metadata.starts_[axis] = clamp(start, int64_t{0}, compute_metadata.input_dimensions_[axis] - 1);
else
compute_metadata.starts_[axis] = clamp(start, int64_t{0}, compute_metadata.input_dimensions_[axis]);
// process end
auto end = raw_ends[axis_index];
// INT_MAX has a special meaning for end according to spec
// equivalent to 'None' in numpy
// it represent slicing to the end of the dimension
if (end == std::numeric_limits<int32_t>::max() ||
end == std::numeric_limits<int64_t>::max()) {
end = step < 0 ? -1 : compute_metadata.input_dimensions_[axis];
} else {
if (end < 0)
end += compute_metadata.input_dimensions_[axis];
if (step < 0)
end = clamp(end, int64_t{-1}, compute_metadata.input_dimensions_[axis]);
else
end = clamp(end, int64_t{0}, compute_metadata.input_dimensions_[axis]);
}
compute_metadata.ends_[axis] = end;
// find output dim value for this axis
auto temp = static_cast<int64_t>(ceil(1.0 * (compute_metadata.ends_[axis] - compute_metadata.starts_[axis]) / step));
if (temp < 0)
compute_metadata.output_dims_[axis] = 0;
else
compute_metadata.output_dims_[axis] = temp;
}
return Status::OK();
}
} // namespace SliceOp
} // namespace onnxruntime

View file

@ -539,8 +539,8 @@ std::string Shape2String(const std::vector<uint32_t>& shape) {
return os.str();
}
bool CheckIsInitializerTensor(const InitializedTensorSet& initializers, const Node& node,
size_t input_idx, const char* input_name) {
bool CheckIsInitializer(const InitializedTensorSet& initializers, const Node& node,
size_t input_idx, const char* input_name) {
if (!Contains(initializers, node.InputDefs()[input_idx]->Name())) {
LOGS_DEFAULT(VERBOSE) << input_name << " of " << node.OpType() << " must be an initializer tensor";
return false;

View file

@ -133,8 +133,8 @@ std::vector<std::vector<size_t>> GetSupportedNodes(const GraphViewer& graph_view
std::string Shape2String(const std::vector<uint32_t>& shape);
// Check the given input is an initializer tensor
bool CheckIsInitializerTensor(const InitializedTensorSet& initializers, const Node& node,
size_t index, const char* input_name) ORT_MUST_USE_RESULT;
bool CheckIsInitializer(const InitializedTensorSet& initializers, const Node& node,
size_t index, const char* input_name) ORT_MUST_USE_RESULT;
} // namespace nnapi
} // namespace onnxruntime

View file

@ -8,6 +8,7 @@
#include <onnx/onnx_pb.h>
#include "core/providers/shared/utils/utils.h"
#include "core/providers/cpu/tensor/slice_helper.h"
#include "helper.h"
#include "model_builder.h"
#include "op_builder.h"
@ -2536,7 +2537,6 @@ Status MinMaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
#pragma region op_elu
class EluOpBuilder : public BaseOpBuilder {
public:
private:
Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node) const override ORT_MUST_USE_RESULT;
};
@ -2561,6 +2561,179 @@ Status EluOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const No
#pragma endregion
#pragma region op_slice
class SliceOpBuilder : public BaseOpBuilder {
public:
void AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const override;
private:
Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node) const override ORT_MUST_USE_RESULT;
};
void SliceOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const {
// Skip everything except input0 for Slice
const auto input_defs = node.InputDefs();
model_builder.AddInitializerToSkip(input_defs[1]->Name()); // starts
model_builder.AddInitializerToSkip(input_defs[2]->Name()); // ends
if (input_defs.size() > 3) {
model_builder.AddInitializerToSkip(input_defs[3]->Name()); // axes
if (input_defs.size() > 4) {
model_builder.AddInitializerToSkip(input_defs[4]->Name()); // steps
}
}
}
Status SliceOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node) const {
auto& shaper(model_builder.GetShaper());
const auto& operand_indices(model_builder.GetOperandIndices());
const auto& operand_types(model_builder.GetOperandTypes());
const auto input_defs = node.InputDefs();
const auto& input_shape = shaper[input_defs[0]->Name()];
std::vector<int64_t> input_shape_64(input_shape.cbegin(), input_shape.cend());
SliceOp::PrepareForComputeMetadata compute_metadata(input_shape_64);
{
// We need to copy the data from the starts/ends/axes/steps initializers to int64 vectors
// to be used in shared PrepareForCompute function to calculate the output shape
// and normalize inputs, for example, input can be starts/ends/steps for certain axes,
// PrepareForCompute can generate standard starts/ends/steps/axes for each axes
std::vector<int64_t> input_starts;
std::vector<int64_t> input_ends;
std::vector<int64_t> input_axes;
std::vector<int64_t> input_steps;
const auto CopyInputData = [&node, &model_builder](size_t input_idx, std::vector<int64_t>& data) {
data.clear();
const auto input_defs = node.InputDefs();
// This is an optional input, return empty vector
if (input_defs.size() <= input_idx)
return Status::OK();
const auto& input_name = input_defs[input_idx]->Name();
const auto& initializers(model_builder.GetInitializerTensors());
const auto& tensor = *initializers.at(input_name);
std::unique_ptr<uint8_t[]> unpacked_tensor;
size_t tensor_byte_size;
ORT_RETURN_IF_ERROR(
onnxruntime::utils::UnpackInitializerData(tensor, model_builder.GetGraphViewer().ModelPath(),
unpacked_tensor, tensor_byte_size));
const auto data_type = tensor.data_type();
if (data_type == ONNX_NAMESPACE::TensorProto_DataType_INT64) {
const int64_t* tensor_data = reinterpret_cast<const int64_t*>(unpacked_tensor.get());
size_t size = tensor_byte_size / sizeof(int64_t);
data.insert(data.end(), tensor_data, tensor_data + size);
} else if (data_type == ONNX_NAMESPACE::TensorProto_DataType_INT32) {
const int32_t* tensor_data = reinterpret_cast<const int32_t*>(unpacked_tensor.get());
size_t size = tensor_byte_size / sizeof(int32_t);
data.insert(data.end(), tensor_data, tensor_data + size);
} else {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
"Data type for starts and ends inputs' is not supported in this build. Got ",
data_type);
}
return Status::OK();
};
ORT_RETURN_IF_ERROR(CopyInputData(1, input_starts));
ORT_RETURN_IF_ERROR(CopyInputData(2, input_ends));
ORT_RETURN_IF_ERROR(CopyInputData(3, input_axes));
ORT_RETURN_IF_ERROR(CopyInputData(4, input_steps));
ORT_RETURN_IF_ERROR(
SliceOp::PrepareForComputeHelper(input_starts, input_ends, input_axes, input_steps, compute_metadata));
}
// output shape is of type uint32_t, convert from int64 compute_metadata.output_dims_
Shape nnapi_output_shape;
nnapi_output_shape.reserve(compute_metadata.output_dims_.size());
std::transform(compute_metadata.output_dims_.cbegin(), compute_metadata.output_dims_.cend(),
std::back_inserter(nnapi_output_shape),
[](int64_t i) { return SafeInt<uint32_t>(i); });
const auto& input = node.InputDefs()[0]->Name();
const auto& output = node.OutputDefs()[0]->Name();
bool output_is_nhwc = model_builder.IsOperandNHWC(input);
// No shape inference for Slice, everything is calculated here, we only need to add the output shape
// to the shaper
shaper.AddShape(output, nnapi_output_shape);
const OperandType output_operand_type(operand_types.at(input).type, shaper[output]);
std::vector<uint32_t> input_indices;
input_indices.push_back(operand_indices.at(input));
// begin/end/strides of ANEURALNETWORKS_STRIDED_SLICE have the same shape
Shape param_dimen = {static_cast<uint32_t>(input_shape.size())};
// helper function to add begin/end/strides of ANEURALNETWORKS_STRIDED_SLICE
const auto AddOperand = [&model_builder, &node, &input_indices, &operand_indices](
const char* name, const Shape& shape, const std::vector<int64_t>& param_raw_data) {
std::vector<int32_t> param_data;
param_data.reserve(param_raw_data.size());
std::transform(param_raw_data.cbegin(), param_raw_data.cend(),
std::back_inserter(param_data),
[](int64_t i) { return SafeInt<int32_t>(i); });
std::string param_name = model_builder.GetUniqueName(node.Name() + name);
OperandType param_operand_type(Type::TENSOR_INT32, shape);
ORT_RETURN_IF_ERROR(
model_builder.AddOperandFromPersistMemoryBuffer(param_name, param_data.data(), param_operand_type));
input_indices.push_back(operand_indices.at(param_name));
return Status::OK();
};
ORT_RETURN_IF_ERROR(AddOperand("starts", param_dimen, compute_metadata.starts_)); //nnapi_begin
// NNAPI has 2 slice operations
// - ANEURALNETWORKS_SLICE
// Simpler and faster version of slice without steps, available from ANEURALNETWORKS_FEATURE_LEVEL_3
// Use this one if no step other than 1 is used in ONNX slice
// - ANEURALNETWORKS_STRIDED_SLICE
// More comprehensive version, available from ANEURALNETWORKS_FEATURE_LEVEL_2
int op_code = ANEURALNETWORKS_STRIDED_SLICE;
if (std::all_of(compute_metadata.steps_.cbegin(),
compute_metadata.steps_.cend(),
[](int64_t i) { return i == 1; }) &&
model_builder.GetNNAPIFeatureLevel() > ANEURALNETWORKS_FEATURE_LEVEL_2) {
op_code = ANEURALNETWORKS_SLICE;
// the nnapi size of the slice in this case is the output shape
ORT_RETURN_IF_ERROR(AddOperand("sizes", param_dimen, compute_metadata.output_dims_)); //nnapi_sizes
} else {
// ** The special treatment of ends **
// The nnapi_end need some special handling, based on the current undocumented design of
// ANEURALNETWORKS_STRIDED_SLICE
// For ORT, for a single axis, after SliceOp::PrepareForCompute, and the step is negative,
// and the last element for slice is at the beginning of the axis (we are slicing backwards)
// The end for this axis will be -1
// For NNAPI, it is not documented that end can be negative,
// see https://developer.android.com/ndk/reference/group/neural-networks#group___neural_networks_1ggaabbe492c60331b13038e39d4207940e0a89695302f8b1e7ae7ce8f4d8c0b8a752
// However, the actual NNAPI StridedSlice has some odd implementations,
// See https://android.googlesource.com/platform/frameworks/ml/+/5b525d4d9100819d87447bd2c2a0bcfdd62899ee/nn/common/operations/StridedSlice.cpp#177
// and, https://android.googlesource.com/platform/frameworks/ml/+/5b525d4d9100819d87447bd2c2a0bcfdd62899ee/nn/common/include/OperationsUtils.h#262
// If a negative end is no less than -dim (dimension of the axis), it will be treated as an index counting from
// the end, for example, dim = 5, and end = -1, the end will be normalized to 4, which will cause
// incorrect result, so here we have to make the end = -dim - 1 such that it will not be treated as
// an index counting from the end.
std::vector<int64_t> ends = compute_metadata.ends_;
for (size_t i = 0; i < ends.size(); ++i) {
if (ends[i] == -1) {
ends[i] = -static_cast<int32_t>(input_shape[i] + 1);
}
}
ORT_RETURN_IF_ERROR(AddOperand("ends", param_dimen, ends)); //nnapi_end
ORT_RETURN_IF_ERROR(AddOperand("steps", param_dimen, compute_metadata.steps_)); //nnapi_strides
// We do not use the following inputs in ANEURALNETWORKS_STRIDED_SLICE, set them all to 0
ADD_SCALAR_OPERAND(model_builder, input_indices, 0); // begin_mask
ADD_SCALAR_OPERAND(model_builder, input_indices, 0); // end_mask
ADD_SCALAR_OPERAND(model_builder, input_indices, 0); // shrink_axis_mask
}
return model_builder.AddOperation(op_code, input_indices, {output}, {output_operand_type}, {output_is_nhwc});
}
#pragma endregion
#pragma region CreateGetOpBuilders
// The reason we use macros to create OpBuilders is for easy exclusion in build if certain op(s) are not used
@ -2579,24 +2752,39 @@ Status EluOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const No
static OpBuilderRegistrations CreateOpBuilderRegistrations() {
OpBuilderRegistrations op_registrations;
// Builders handle a single op
NNAPI_EP_ADD_SINGLE_OP_BUILDER("BatchNormalization", BatchNormalizationOpBuilder);
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Cast", CastOpBuilder);
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Clip", ClipOpBuilder);
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Concat", ConcatOpBuilder);
NNAPI_EP_ADD_SINGLE_OP_BUILDER("DequantizeLinear", DequantizeLinearOpBuilder);
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Elu", EluOpBuilder);
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Flatten", FlattenOpBuilder);
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Identity", IdentityOpBuilder);
NNAPI_EP_ADD_SINGLE_OP_BUILDER("LRN", LRNOpBuilder);
NNAPI_EP_ADD_SINGLE_OP_BUILDER("QuantizeLinear", QuantizeLinearOpBuilder);
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Relu", ReluOpBuilder);
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Reshape", ReshapeOpBuilder);
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Resize", ResizeOpBuilder);
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Slice", SliceOpBuilder);
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Softmax", SoftMaxOpBuilder);
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Squeeze", SqueezeOpBuilder);
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Transpose", TransposeOpBuilder);
// Builders shared among similar ops
{
NNAPI_EP_ADD_SHARED_OP_BUILDER("Add", BinaryOpBuilder);
NNAPI_EP_ADD_SHARED_OP_BUILDER("Sub", BinaryOpBuilder);
NNAPI_EP_ADD_SHARED_OP_BUILDER("Mul", BinaryOpBuilder);
NNAPI_EP_ADD_SHARED_OP_BUILDER("Div", BinaryOpBuilder);
NNAPI_EP_ADD_SHARED_OP_BUILDER("QLinearAdd", BinaryOpBuilder);
NNAPI_EP_ADD_SHARED_OP_BUILDER("Mul", BinaryOpBuilder);
NNAPI_EP_ADD_SHARED_OP_BUILDER("Pow", BinaryOpBuilder);
NNAPI_EP_ADD_SHARED_OP_BUILDER("QLinearAdd", BinaryOpBuilder);
NNAPI_EP_ADD_SHARED_OP_BUILDER("Sub", BinaryOpBuilder);
}
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Relu", ReluOpBuilder);
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Transpose", TransposeOpBuilder);
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Reshape", ReshapeOpBuilder);
NNAPI_EP_ADD_SINGLE_OP_BUILDER("BatchNormalization", BatchNormalizationOpBuilder);
{
NNAPI_EP_ADD_SHARED_OP_BUILDER("AveragePool", PoolOpBuilder);
NNAPI_EP_ADD_SHARED_OP_BUILDER("GlobalAveragePool", PoolOpBuilder);
NNAPI_EP_ADD_SHARED_OP_BUILDER("GlobalMaxPool", PoolOpBuilder);
NNAPI_EP_ADD_SHARED_OP_BUILDER("AveragePool", PoolOpBuilder);
NNAPI_EP_ADD_SHARED_OP_BUILDER("MaxPool", PoolOpBuilder);
NNAPI_EP_ADD_SHARED_OP_BUILDER("QLinearAveragePool", PoolOpBuilder);
}
@ -2606,10 +2794,6 @@ static OpBuilderRegistrations CreateOpBuilderRegistrations() {
NNAPI_EP_ADD_SHARED_OP_BUILDER("QLinearConv", ConvOpBuilder);
}
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Cast", CastOpBuilder);
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Softmax", SoftMaxOpBuilder);
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Identity", IdentityOpBuilder);
{
NNAPI_EP_ADD_SHARED_OP_BUILDER("Gemm", GemmOpBuilder);
NNAPI_EP_ADD_SHARED_OP_BUILDER("MatMul", GemmOpBuilder);
@ -2621,30 +2805,19 @@ static OpBuilderRegistrations CreateOpBuilderRegistrations() {
NNAPI_EP_ADD_SHARED_OP_BUILDER("Exp", UnaryOpBuilder);
NNAPI_EP_ADD_SHARED_OP_BUILDER("Floor", UnaryOpBuilder);
NNAPI_EP_ADD_SHARED_OP_BUILDER("Log", UnaryOpBuilder);
NNAPI_EP_ADD_SHARED_OP_BUILDER("Sigmoid", UnaryOpBuilder);
NNAPI_EP_ADD_SHARED_OP_BUILDER("Neg", UnaryOpBuilder);
NNAPI_EP_ADD_SHARED_OP_BUILDER("QLinearSigmoid", UnaryOpBuilder);
NNAPI_EP_ADD_SHARED_OP_BUILDER("Sigmoid", UnaryOpBuilder);
NNAPI_EP_ADD_SHARED_OP_BUILDER("Sin", UnaryOpBuilder);
NNAPI_EP_ADD_SHARED_OP_BUILDER("Sqrt", UnaryOpBuilder);
NNAPI_EP_ADD_SHARED_OP_BUILDER("Tanh", UnaryOpBuilder);
NNAPI_EP_ADD_SHARED_OP_BUILDER("QLinearSigmoid", UnaryOpBuilder);
}
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Concat", ConcatOpBuilder);
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Squeeze", SqueezeOpBuilder);
NNAPI_EP_ADD_SINGLE_OP_BUILDER("QuantizeLinear", QuantizeLinearOpBuilder);
NNAPI_EP_ADD_SINGLE_OP_BUILDER("DequantizeLinear", DequantizeLinearOpBuilder);
NNAPI_EP_ADD_SINGLE_OP_BUILDER("LRN", LRNOpBuilder);
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Clip", ClipOpBuilder);
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Resize", ResizeOpBuilder);
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Flatten", FlattenOpBuilder);
{
NNAPI_EP_ADD_SHARED_OP_BUILDER("Min", MinMaxOpBuilder);
NNAPI_EP_ADD_SHARED_OP_BUILDER("Max", MinMaxOpBuilder);
NNAPI_EP_ADD_SHARED_OP_BUILDER("Min", MinMaxOpBuilder);
}
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Elu", EluOpBuilder);
return op_registrations;
}

View file

@ -76,7 +76,8 @@ class BaseOpSupportChecker : public IOpSupportChecker {
return true;
}
virtual int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const {
virtual int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */,
const OpSupportCheckParams& /* params */) const {
// ANEURALNETWORKS_FEATURE_LEVEL_1 is the baseline version of NNAPI,
// There is no NNAPI support for Android API level 26-
return ANEURALNETWORKS_FEATURE_LEVEL_1;
@ -319,7 +320,8 @@ class TransposeOpSupportChecker : public BaseOpSupportChecker {
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
const OpSupportCheckParams& params) const override;
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */,
const OpSupportCheckParams& /* params */) const override {
return ANEURALNETWORKS_FEATURE_LEVEL_2;
}
@ -490,7 +492,8 @@ class PoolOpSupportChecker : public BaseOpSupportChecker {
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
const OpSupportCheckParams& params) const override;
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& params) const override {
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */,
const OpSupportCheckParams& params) const override {
return params.use_nchw ? ANEURALNETWORKS_FEATURE_LEVEL_3 : ANEURALNETWORKS_FEATURE_LEVEL_2;
}
@ -667,7 +670,8 @@ class ConvOpSupportChecker : public BaseOpSupportChecker {
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
const OpSupportCheckParams& params) const override;
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& params) const override {
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */,
const OpSupportCheckParams& params) const override {
return params.use_nchw ? ANEURALNETWORKS_FEATURE_LEVEL_3 : ANEURALNETWORKS_FEATURE_LEVEL_2;
}
@ -775,7 +779,8 @@ class CastOpSupportChecker : public BaseOpSupportChecker {
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
const OpSupportCheckParams& params) const override;
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */,
const OpSupportCheckParams& /* params */) const override {
return ANEURALNETWORKS_FEATURE_LEVEL_3;
}
@ -805,7 +810,8 @@ class SoftMaxOpSupportChecker : public BaseOpSupportChecker {
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
const OpSupportCheckParams& params) const override;
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */,
const OpSupportCheckParams& /* params */) const override {
return ANEURALNETWORKS_FEATURE_LEVEL_2;
}
};
@ -1043,7 +1049,8 @@ class UnaryOpSupportChecker : public BaseOpSupportChecker {
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
const OpSupportCheckParams& params) const override;
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& node, const OpSupportCheckParams& params) const override;
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& node,
const OpSupportCheckParams& params) const override;
bool HasSupportedInputsImpl(const Node& node) const override;
@ -1079,8 +1086,8 @@ bool UnaryOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initia
return true;
}
int32_t UnaryOpSupportChecker::GetMinSupportedNNAPIFeatureLevel(
const Node& node, const OpSupportCheckParams& /* params */) const {
int32_t UnaryOpSupportChecker::GetMinSupportedNNAPIFeatureLevel(const Node& node,
const OpSupportCheckParams& /* params */) const {
const auto& op(node.OpType());
if (op == "Abs" ||
op == "Exp" ||
@ -1216,7 +1223,8 @@ class SqueezeOpSupportChecker : public BaseOpSupportChecker {
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
const OpSupportCheckParams& params) const override;
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */,
const OpSupportCheckParams& /* params */) const override {
return ANEURALNETWORKS_FEATURE_LEVEL_2;
}
};
@ -1255,7 +1263,8 @@ class QuantizeLinearOpSupportChecker : public BaseOpSupportChecker {
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
const OpSupportCheckParams& params) const override;
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */,
const OpSupportCheckParams& /* params */) const override {
return ANEURALNETWORKS_FEATURE_LEVEL_3;
}
};
@ -1296,7 +1305,8 @@ class DequantizeLinearOpSupportChecker : public BaseOpSupportChecker {
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
const OpSupportCheckParams& params) const override;
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */,
const OpSupportCheckParams& /* params */) const override {
return ANEURALNETWORKS_FEATURE_LEVEL_1;
}
bool HasSupportedInputsImpl(const Node& node) const override;
@ -1340,7 +1350,8 @@ class LRNOpSupportChecker : public BaseOpSupportChecker {
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
const OpSupportCheckParams& params) const override;
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */,
const OpSupportCheckParams& /* params */) const override {
return ANEURALNETWORKS_FEATURE_LEVEL_2;
}
};
@ -1397,7 +1408,8 @@ class ResizeOpSupportChecker : public BaseOpSupportChecker {
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
const OpSupportCheckParams& params) const override;
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override;
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */,
const OpSupportCheckParams& /* params */) const override;
// Resize opset 10- is very different than Resize opset 11+, with many key attributes missing
// We only support Resize opset 11+ here
@ -1516,7 +1528,8 @@ bool ResizeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initi
return true;
}
int32_t ResizeOpSupportChecker::GetMinSupportedNNAPIFeatureLevel(const Node& node, const OpSupportCheckParams& /* params */) const {
int32_t ResizeOpSupportChecker::GetMinSupportedNNAPIFeatureLevel(const Node& node,
const OpSupportCheckParams& /* params */) const {
int32_t input_type;
// This should not happen, but if it happens make sure this will require an impossible version
@ -1590,7 +1603,8 @@ class MinMaxOpSupportChecker : public BaseOpSupportChecker {
const std::string& op_type, OpSupportCheckerRegistrations& op_registrations);
private:
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */,
const OpSupportCheckParams& /* params */) const override {
return ANEURALNETWORKS_FEATURE_LEVEL_3;
}
@ -1629,7 +1643,8 @@ bool MinMaxOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& /* in
class EluOpSupportChecker : public BaseOpSupportChecker {
private:
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */,
const OpSupportCheckParams& /* params */) const override {
return ANEURALNETWORKS_FEATURE_LEVEL_4;
}
@ -1639,6 +1654,63 @@ class EluOpSupportChecker : public BaseOpSupportChecker {
#pragma endregion
#pragma region op_slice
class SliceOpSupportChecker : public BaseOpSupportChecker {
private:
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */,
const OpSupportCheckParams& /* params */) const override {
return ANEURALNETWORKS_FEATURE_LEVEL_2;
}
// We only support slice from opset 10
int GetMinSupportedOpSet(const Node& /* node */) const override { return 10; }
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
const OpSupportCheckParams& params) const override;
};
bool SliceOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
const OpSupportCheckParams& /* params */) const {
Shape input_shape;
if (!GetShape(*node.InputDefs()[0], input_shape))
return false;
if (input_shape.size() > 4) {
LOGS_DEFAULT(VERBOSE) << "Slice only supports 1-4d shape, input is "
<< input_shape.size() << "d shape";
return false;
}
// TODO, replace with std::find when we switch to c++17
if (std::any_of(input_shape.cbegin(), input_shape.cend(), [](int32_t i) { return i == 0; })) {
LOGS_DEFAULT(VERBOSE) << "Slice doesn't support dynamic input shape";
return false;
}
if (!CheckIsInitializer(initializers, node, 1, "starts")) {
return false;
}
if (!CheckIsInitializer(initializers, node, 2, "ends")) {
return false;
}
const auto& input_defs = node.InputDefs();
if (input_defs.size() > 3) {
if (!CheckIsInitializer(initializers, node, 3, "axes")) {
return false;
}
if (input_defs.size() > 4) {
if (!CheckIsInitializer(initializers, node, 4, "steps")) {
return false;
}
}
}
return true;
}
#pragma endregion
#pragma region CreateGetOpSupportCheckers
// The reason we use macros to create OpBuilders is for easy exclusion in build if certain op(s) are not used
@ -1657,26 +1729,43 @@ class EluOpSupportChecker : public BaseOpSupportChecker {
static OpSupportCheckerRegistrations CreateOpSupportCheckerRegistrations() {
OpSupportCheckerRegistrations op_registrations;
{
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Add", BinaryOpSupportChecker);
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Sub", BinaryOpSupportChecker);
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Mul", BinaryOpSupportChecker);
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Div", BinaryOpSupportChecker);
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("QLinearAdd", BinaryOpSupportChecker);
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Pow", BinaryOpSupportChecker);
}
// Support checkers handle a single op
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("BatchNormalization", BatchNormalizationOpSupportChecker);
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Cast", CastOpSupportChecker);
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Clip", ClipOpSupportChecker);
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Concat", ConcatOpSupportChecker);
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("DequantizeLinear", DequantizeLinearOpSupportChecker);
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Elu", EluOpSupportChecker);
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Flatten", FlattenOpSupportChecker);
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("LRN", LRNOpSupportChecker);
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("QuantizeLinear", QuantizeLinearOpSupportChecker);
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Reshape", ReshapeOpSupportChecker);
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Resize", ResizeOpSupportChecker);
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Slice", SliceOpSupportChecker);
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Softmax", SoftMaxOpSupportChecker);
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Squeeze", SqueezeOpSupportChecker);
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Transpose", TransposeOpSupportChecker);
// Identity is always supported, we use BaseOpSupportChecker as default
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Identity", BaseOpSupportChecker);
// Relu is always supported, we use BaseOpSupportChecker as default
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Relu", BaseOpSupportChecker);
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Transpose", TransposeOpSupportChecker);
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Reshape", ReshapeOpSupportChecker);
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("BatchNormalization", BatchNormalizationOpSupportChecker);
// Support Checkers shared among similar ops
{
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Add", BinaryOpSupportChecker);
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Div", BinaryOpSupportChecker);
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Mul", BinaryOpSupportChecker);
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Pow", BinaryOpSupportChecker);
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("QLinearAdd", BinaryOpSupportChecker);
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Sub", BinaryOpSupportChecker);
}
{
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("AveragePool", PoolOpSupportChecker);
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("GlobalAveragePool", PoolOpSupportChecker);
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("GlobalMaxPool", PoolOpSupportChecker);
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("AveragePool", PoolOpSupportChecker);
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("MaxPool", PoolOpSupportChecker);
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("QLinearAveragePool", PoolOpSupportChecker);
}
@ -1686,12 +1775,6 @@ static OpSupportCheckerRegistrations CreateOpSupportCheckerRegistrations() {
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("QLinearConv", ConvOpSupportChecker);
}
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Cast", CastOpSupportChecker);
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Softmax", SoftMaxOpSupportChecker);
// Identity is always supported, we use BaseOpSupportChecker as default
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Identity", BaseOpSupportChecker);
{
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Gemm", GemmOpSupportChecker);
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("MatMul", GemmOpSupportChecker);
@ -1703,30 +1786,19 @@ static OpSupportCheckerRegistrations CreateOpSupportCheckerRegistrations() {
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Exp", UnaryOpSupportChecker);
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Floor", UnaryOpSupportChecker);
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Log", UnaryOpSupportChecker);
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Sigmoid", UnaryOpSupportChecker);
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Neg", UnaryOpSupportChecker);
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("QLinearSigmoid", UnaryOpSupportChecker);
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Sigmoid", UnaryOpSupportChecker);
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Sin", UnaryOpSupportChecker);
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Sqrt", UnaryOpSupportChecker);
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Tanh", UnaryOpSupportChecker);
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("QLinearSigmoid", UnaryOpSupportChecker);
}
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Concat", ConcatOpSupportChecker);
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Squeeze", SqueezeOpSupportChecker);
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("QuantizeLinear", QuantizeLinearOpSupportChecker);
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("DequantizeLinear", DequantizeLinearOpSupportChecker);
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("LRN", LRNOpSupportChecker);
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Clip", ClipOpSupportChecker);
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Resize", ResizeOpSupportChecker);
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Flatten", FlattenOpSupportChecker);
{
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Min", MinMaxOpSupportChecker);
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Max", MinMaxOpSupportChecker);
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Min", MinMaxOpSupportChecker);
}
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Elu", EluOpSupportChecker);
return op_registrations;
}

View file

@ -19,8 +19,18 @@ void RunSliceTest(const std::vector<int64_t>& input_dims,
const std::vector<int64_t>& output_dims,
const std::vector<T>& output_vals,
bool v10_only = false) {
// V1-9
ORT_UNUSED_PARAMETER(steps);
std::unordered_set<std::string> excluded_providers;
if (!v10_only)
excluded_providers = {kTensorrtExecutionProvider, kOpenVINOExecutionProvider};
else
excluded_providers = {kTensorrtExecutionProvider};
// NNAPI EP does not support empty output
if (std::any_of(output_dims.cbegin(), output_dims.cend(), [](int64_t i) { return i == 0; })) {
excluded_providers.insert(kNnapiExecutionProvider);
}
if (!v10_only) {
OpTester testv9("Slice", 9);
testv9.AddAttribute("starts", starts);
@ -29,20 +39,27 @@ void RunSliceTest(const std::vector<int64_t>& input_dims,
testv9.AddAttribute("axes", axes);
testv9.AddInput<T>("data", input_dims, input_vals);
testv9.AddOutput<T>("output", output_dims, output_vals);
testv9.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider}); // OpenVINO EP: Disabled temporarily
testv9.Run(OpTester::ExpectResult::kExpectSuccess, "", excluded_providers); // OpenVINO EP: Disabled temporarily
}
// V10
OpTester testv10("Slice", 10);
testv10.AddInput<T>("data", input_dims, input_vals);
testv10.AddInput<int64_t>("starts", {static_cast<int64_t>(starts.size())}, starts);
testv10.AddInput<int64_t>("ends", {static_cast<int64_t>(ends.size())}, ends);
if (axes.size() != 0)
testv10.AddInput<int64_t>("axes", {static_cast<int64_t>(axes.size())}, axes);
if (steps.size() != 0)
testv10.AddInput<int64_t>("steps", {static_cast<int64_t>(steps.size())}, steps);
testv10.AddOutput<T>("output", output_dims, output_vals);
testv10.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
auto run_test = [&](bool only_data_not_initializer) {
OpTester testv10("Slice", 10);
testv10.AddInput<T>("data", input_dims, input_vals);
testv10.AddInput<int64_t>("starts", {static_cast<int64_t>(starts.size())}, starts, only_data_not_initializer);
testv10.AddInput<int64_t>("ends", {static_cast<int64_t>(ends.size())}, ends, only_data_not_initializer);
if (axes.size() != 0)
testv10.AddInput<int64_t>("axes", {static_cast<int64_t>(axes.size())}, axes, only_data_not_initializer);
if (steps.size() != 0)
testv10.AddInput<int64_t>("steps", {static_cast<int64_t>(steps.size())}, steps, only_data_not_initializer);
testv10.AddOutput<T>("output", output_dims, output_vals);
testv10.Run(OpTester::ExpectResult::kExpectSuccess, "", excluded_providers);
};
run_test(false);
// NNAPI EP requires the starts/ends/axes/steps be initializers
run_test(true);
}
// Slice V1-9 & Slice V10 can both run the following tests