mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-30 23:18:20 +00:00
[NNAPI EP] Enable Slice support (#8031)
* Enable slice for NNAPI EP * Add ANEURALNETWORKS_STRIDED_SLICE support * Addressed CR comments * Addressed CR comments, rename PrepareForCompute to PrepareForComputeHelper to avoid confusion
This commit is contained in:
parent
96989b83ee
commit
d83f7fd4aa
9 changed files with 543 additions and 225 deletions
|
|
@ -8,6 +8,7 @@
|
|||
|
||||
#include "core/framework/element_type_lists.h"
|
||||
#include "core/providers/common.h"
|
||||
#include "core/providers/cpu/tensor/slice_helper.h"
|
||||
#include "core/providers/cpu/tensor/utils.h"
|
||||
#include "core/providers/op_kernel_type_control.h"
|
||||
#include "core/providers/op_kernel_type_control_utils.h"
|
||||
|
|
@ -44,14 +45,6 @@ const auto data_type_constraints = BuildKernelDefConstraintsFromTypeList<DataTyp
|
|||
const auto indices_type_constraints = BuildKernelDefConstraintsFromTypeList<IndicesTypes>();
|
||||
const auto enabled_data_type_constraints = BuildKernelDefConstraintsFromTypeList<EnabledDataTypes>();
|
||||
const auto enabled_indices_type_constraints = BuildKernelDefConstraintsFromTypeList<EnabledIndicesTypes>();
|
||||
|
||||
// std::clamp doesn't exist until C++17 so create a local version
|
||||
template <typename T>
|
||||
const T& clamp(const T& v, const T& lo, const T& hi) {
|
||||
if (v < lo) return lo;
|
||||
if (v > hi) return hi;
|
||||
return v;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
ONNX_CPU_OPERATOR_VERSIONED_KERNEL(
|
||||
|
|
@ -93,6 +86,7 @@ ONNX_CPU_OPERATOR_KERNEL(
|
|||
static void FlattenOutputDims(const std::vector<int64_t>& input_dimensions,
|
||||
const std::vector<int64_t>& output_dims,
|
||||
std::vector<int64_t>& starts,
|
||||
std::vector<int64_t>& ends,
|
||||
std::vector<int64_t>& steps,
|
||||
std::vector<int64_t>*& flattened_output_dims) {
|
||||
int num_to_combine = 0;
|
||||
|
|
@ -120,6 +114,10 @@ static void FlattenOutputDims(const std::vector<int64_t>& input_dimensions,
|
|||
// so we can just shrink via resize so the number of entries matches flattened_output_dims
|
||||
starts.resize(num_dims);
|
||||
steps.resize(num_dims);
|
||||
|
||||
// update ends as well
|
||||
ends.resize(num_dims);
|
||||
ends.back() = dim_value;
|
||||
} else {
|
||||
flattened_output_dims = nullptr;
|
||||
}
|
||||
|
|
@ -130,47 +128,9 @@ Status SliceBase::PrepareForCompute(const std::vector<int64_t>& raw_starts,
|
|||
const std::vector<int64_t>& raw_ends,
|
||||
const std::vector<int64_t>& raw_axes,
|
||||
SliceOp::PrepareForComputeMetadata& compute_metadata) {
|
||||
// Initialize axes to the provided axes attribute or to the default sequence
|
||||
std::vector<int64_t> axes(raw_axes);
|
||||
if (axes.empty()) {
|
||||
//axes are omitted, they are set to[0, ..., ndim - 1]
|
||||
axes.resize(compute_metadata.starts_.size());
|
||||
std::iota(axes.begin(), axes.end(), 0);
|
||||
}
|
||||
|
||||
// Iterate through the provided axes and override the start/end ranges
|
||||
std::unordered_set<int64_t> unique_axes;
|
||||
const auto& dimension_count = compute_metadata.input_dimensions_.size();
|
||||
for (size_t axis_index = 0, axes_count = axes.size(); axis_index < axes_count; ++axis_index) {
|
||||
auto axis = HandleNegativeAxis(axes[axis_index], dimension_count); // handle negative and enforce axis is valid
|
||||
if (axis >= static_cast<int64_t>(dimension_count) || axis < 0)
|
||||
return Status(ONNXRUNTIME, INVALID_ARGUMENT, "'axes' has an axis outside of the tensor dimension count");
|
||||
if (unique_axes.find(axis) != unique_axes.end())
|
||||
return Status(ONNXRUNTIME, INVALID_ARGUMENT, "'axes' has duplicates");
|
||||
unique_axes.insert(axis);
|
||||
|
||||
// process start
|
||||
auto start = raw_starts[axis_index];
|
||||
if (start < 0)
|
||||
start += compute_metadata.input_dimensions_[axis];
|
||||
compute_metadata.starts_[axis] = clamp(start, int64_t{0}, compute_metadata.input_dimensions_[axis]);
|
||||
|
||||
// process end
|
||||
auto end = raw_ends[axis_index];
|
||||
if (end < 0)
|
||||
end += compute_metadata.input_dimensions_[axis];
|
||||
|
||||
// find output dim value for this axis
|
||||
auto temp = clamp(end, int64_t{0}, compute_metadata.input_dimensions_[axis]) - compute_metadata.starts_[axis];
|
||||
if (temp < 0)
|
||||
compute_metadata.output_dims_[axis] = 0;
|
||||
else
|
||||
compute_metadata.output_dims_[axis] = temp;
|
||||
}
|
||||
|
||||
ORT_RETURN_IF_ERROR(SliceOp::PrepareForComputeHelper(raw_starts, raw_ends, raw_axes, compute_metadata));
|
||||
FlattenOutputDims(compute_metadata.input_dimensions_, compute_metadata.output_dims_, compute_metadata.starts_,
|
||||
compute_metadata.steps_, compute_metadata.p_flattened_output_dims_);
|
||||
|
||||
compute_metadata.ends_, compute_metadata.steps_, compute_metadata.p_flattened_output_dims_);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
|
@ -180,70 +140,9 @@ Status SliceBase::PrepareForCompute(const std::vector<int64_t>& raw_starts,
|
|||
const std::vector<int64_t>& raw_axes,
|
||||
const std::vector<int64_t>& raw_steps,
|
||||
SliceOp::PrepareForComputeMetadata& compute_metadata) {
|
||||
// Initialize axes to the provided axes attribute or to the default sequence
|
||||
std::vector<int64_t> axes(raw_axes);
|
||||
|
||||
if (axes.empty()) {
|
||||
// axes are omitted, they are set to[0, ..., ndim - 1]
|
||||
axes.resize(compute_metadata.starts_.size());
|
||||
std::iota(axes.begin(), axes.end(), 0);
|
||||
}
|
||||
|
||||
// Iterate through the provided axes and override the start/end/steps ranges
|
||||
std::unordered_set<int64_t> unique_axes;
|
||||
const auto& dimension_count = compute_metadata.input_dimensions_.size();
|
||||
for (size_t axis_index = 0, axes_count = axes.size(); axis_index < axes_count; ++axis_index) {
|
||||
auto axis = axes[axis_index] < 0 ? axes[axis_index] + static_cast<int64_t>(dimension_count) : axes[axis_index];
|
||||
if (axis >= static_cast<int64_t>(dimension_count) || axis < 0)
|
||||
return Status(ONNXRUNTIME, INVALID_ARGUMENT, "'axes' has an axis outside of the tensor dimension count");
|
||||
if (unique_axes.find(axis) != unique_axes.end())
|
||||
return Status(ONNXRUNTIME, INVALID_ARGUMENT, "'axes' has duplicates");
|
||||
unique_axes.insert(axis);
|
||||
|
||||
// process step
|
||||
auto step = axis_index < raw_steps.size() ? raw_steps[axis_index] : 1;
|
||||
if (step == 0)
|
||||
return Status(ONNXRUNTIME, INVALID_ARGUMENT, "'step' value cannot be 0");
|
||||
compute_metadata.steps_[axis] = step;
|
||||
|
||||
// process start
|
||||
auto start = raw_starts[axis_index];
|
||||
if (start < 0)
|
||||
start += compute_metadata.input_dimensions_[axis];
|
||||
if (step < 0)
|
||||
compute_metadata.starts_[axis] = clamp(start, int64_t{0}, compute_metadata.input_dimensions_[axis] - 1);
|
||||
else
|
||||
compute_metadata.starts_[axis] = clamp(start, int64_t{0}, compute_metadata.input_dimensions_[axis]);
|
||||
|
||||
// process end
|
||||
auto end = raw_ends[axis_index];
|
||||
// INT_MAX has a special meaning for end according to spec
|
||||
// equivalent to 'None' in numpy
|
||||
// it represent slicing to the end of the dimension
|
||||
if (end == std::numeric_limits<int32_t>::max() ||
|
||||
end == std::numeric_limits<int64_t>::max()) {
|
||||
end = step < 0 ? -1 : compute_metadata.input_dimensions_[axis];
|
||||
}
|
||||
|
||||
else {
|
||||
if (end < 0)
|
||||
end += compute_metadata.input_dimensions_[axis];
|
||||
if (step < 0)
|
||||
end = clamp(end, int64_t{-1}, compute_metadata.input_dimensions_[axis]);
|
||||
else
|
||||
end = clamp(end, int64_t{0}, compute_metadata.input_dimensions_[axis]);
|
||||
}
|
||||
|
||||
// find output dim value for this axis
|
||||
auto temp = static_cast<int64_t>(ceil(1.0 * (end - compute_metadata.starts_[axis]) / step));
|
||||
if (temp < 0)
|
||||
compute_metadata.output_dims_[axis] = 0;
|
||||
else
|
||||
compute_metadata.output_dims_[axis] = temp;
|
||||
}
|
||||
|
||||
ORT_RETURN_IF_ERROR(SliceOp::PrepareForComputeHelper(raw_starts, raw_ends, raw_axes, raw_steps, compute_metadata));
|
||||
FlattenOutputDims(compute_metadata.input_dimensions_, compute_metadata.output_dims_, compute_metadata.starts_,
|
||||
compute_metadata.steps_, compute_metadata.p_flattened_output_dims_);
|
||||
compute_metadata.ends_, compute_metadata.steps_, compute_metadata.p_flattened_output_dims_);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
#pragma once
|
||||
|
||||
#ifndef SHARED_PROVIDER
|
||||
#include "core/common/common.h"
|
||||
|
|
@ -7,28 +8,10 @@
|
|||
#include "core/util/math_cpuonly.h"
|
||||
#endif
|
||||
|
||||
#include "core/providers/cpu/tensor/slice_compute_metadata.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
|
||||
namespace SliceOp {
|
||||
struct PrepareForComputeMetadata {
|
||||
PrepareForComputeMetadata() = delete;
|
||||
PrepareForComputeMetadata(const std::vector<int64_t>& input_dimensions)
|
||||
: input_dimensions_(input_dimensions) {
|
||||
size_t dimension_count = input_dimensions.size();
|
||||
starts_.resize(dimension_count, 0);
|
||||
steps_.resize(dimension_count, 1);
|
||||
output_dims_ = input_dimensions;
|
||||
}
|
||||
|
||||
const std::vector<int64_t>& input_dimensions_;
|
||||
std::vector<int64_t> starts_;
|
||||
std::vector<int64_t> steps_;
|
||||
std::vector<int64_t> output_dims_;
|
||||
std::vector<int64_t> flattened_output_dims_;
|
||||
std::vector<int64_t>* p_flattened_output_dims_ = &flattened_output_dims_;
|
||||
};
|
||||
} // namespace SliceOp
|
||||
|
||||
class SliceBase {
|
||||
// static methods that can be used from other ops if needed
|
||||
public:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,33 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
// This file contains the definition of the PrepareForComputeMetadata for Slice operator
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
namespace onnxruntime {
|
||||
|
||||
namespace SliceOp {
|
||||
struct PrepareForComputeMetadata {
|
||||
explicit PrepareForComputeMetadata(const std::vector<int64_t>& input_dimensions)
|
||||
: input_dimensions_(input_dimensions),
|
||||
ends_(input_dimensions),
|
||||
output_dims_(input_dimensions) {
|
||||
size_t dimension_count = input_dimensions.size();
|
||||
starts_.resize(dimension_count, 0);
|
||||
steps_.resize(dimension_count, 1);
|
||||
}
|
||||
|
||||
const std::vector<int64_t>& input_dimensions_;
|
||||
std::vector<int64_t> starts_;
|
||||
std::vector<int64_t> ends_;
|
||||
std::vector<int64_t> steps_;
|
||||
std::vector<int64_t> output_dims_;
|
||||
std::vector<int64_t> flattened_output_dims_;
|
||||
std::vector<int64_t>* p_flattened_output_dims_ = &flattened_output_dims_;
|
||||
};
|
||||
|
||||
} // namespace SliceOp
|
||||
} // namespace onnxruntime
|
||||
141
onnxruntime/core/providers/cpu/tensor/slice_helper.h
Normal file
141
onnxruntime/core/providers/cpu/tensor/slice_helper.h
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
// This file contains the functions compute the starts, steps (strides) and output shape
|
||||
// for Slice op, which can be called from other ops or EPs.
|
||||
#pragma once
|
||||
#include "core/providers/cpu/tensor/slice_compute_metadata.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
|
||||
// std::clamp doesn't exist until C++17 so create a local version
|
||||
template <typename T>
|
||||
const T& clamp(const T& v, const T& lo, const T& hi) {
|
||||
if (v < lo) return lo;
|
||||
if (v > hi) return hi;
|
||||
return v;
|
||||
}
|
||||
|
||||
namespace SliceOp {
|
||||
// compute output_dims without steps (Slice V1-9 & DynamicSlice)
|
||||
// Please note this will not Flatten the output shape
|
||||
inline Status PrepareForComputeHelper(const std::vector<int64_t>& raw_starts,
|
||||
const std::vector<int64_t>& raw_ends,
|
||||
const std::vector<int64_t>& raw_axes,
|
||||
SliceOp::PrepareForComputeMetadata& compute_metadata) {
|
||||
// Initialize axes to the provided axes attribute or to the default sequence
|
||||
std::vector<int64_t> axes(raw_axes);
|
||||
if (axes.empty()) {
|
||||
//axes are omitted, they are set to[0, ..., ndim - 1]
|
||||
axes.resize(compute_metadata.starts_.size());
|
||||
std::iota(axes.begin(), axes.end(), 0);
|
||||
}
|
||||
|
||||
// Iterate through the provided axes and override the start/end ranges
|
||||
std::unordered_set<int64_t> unique_axes;
|
||||
const auto& dimension_count = compute_metadata.input_dimensions_.size();
|
||||
for (size_t axis_index = 0, axes_count = axes.size(); axis_index < axes_count; ++axis_index) {
|
||||
auto axis = HandleNegativeAxis(axes[axis_index], dimension_count); // handle negative and enforce axis is valid
|
||||
if (axis >= static_cast<int64_t>(dimension_count) || axis < 0)
|
||||
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "'axes' has an axis outside of the tensor dimension count");
|
||||
if (unique_axes.find(axis) != unique_axes.end())
|
||||
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "'axes' has duplicates");
|
||||
unique_axes.insert(axis);
|
||||
|
||||
// process start
|
||||
auto start = raw_starts[axis_index];
|
||||
if (start < 0)
|
||||
start += compute_metadata.input_dimensions_[axis];
|
||||
compute_metadata.starts_[axis] = clamp(start, int64_t{0}, compute_metadata.input_dimensions_[axis]);
|
||||
|
||||
// process end
|
||||
auto end = raw_ends[axis_index];
|
||||
if (end < 0)
|
||||
end += compute_metadata.input_dimensions_[axis];
|
||||
compute_metadata.ends_[axis] = clamp(end, int64_t{0}, compute_metadata.input_dimensions_[axis]);
|
||||
|
||||
// find output dim value for this axis
|
||||
auto temp = compute_metadata.ends_[axis] - compute_metadata.starts_[axis];
|
||||
if (temp < 0)
|
||||
compute_metadata.output_dims_[axis] = 0;
|
||||
else
|
||||
compute_metadata.output_dims_[axis] = temp;
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// compute output_dims with steps (Slice V10)
|
||||
// Please note this will not Flatten the output shape
|
||||
inline Status PrepareForComputeHelper(const std::vector<int64_t>& raw_starts,
|
||||
const std::vector<int64_t>& raw_ends,
|
||||
const std::vector<int64_t>& raw_axes,
|
||||
const std::vector<int64_t>& raw_steps,
|
||||
SliceOp::PrepareForComputeMetadata& compute_metadata) {
|
||||
// Initialize axes to the provided axes attribute or to the default sequence
|
||||
std::vector<int64_t> axes(raw_axes);
|
||||
|
||||
if (axes.empty()) {
|
||||
// axes are omitted, they are set to[0, ..., ndim - 1]
|
||||
axes.resize(compute_metadata.starts_.size());
|
||||
std::iota(axes.begin(), axes.end(), 0);
|
||||
}
|
||||
|
||||
// Iterate through the provided axes and override the start/end/steps ranges
|
||||
std::unordered_set<int64_t> unique_axes;
|
||||
const auto& dimension_count = compute_metadata.input_dimensions_.size();
|
||||
for (size_t axis_index = 0, axes_count = axes.size(); axis_index < axes_count; ++axis_index) {
|
||||
auto axis = axes[axis_index] < 0 ? axes[axis_index] + static_cast<int64_t>(dimension_count) : axes[axis_index];
|
||||
if (axis >= static_cast<int64_t>(dimension_count) || axis < 0)
|
||||
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "'axes' has an axis outside of the tensor dimension count");
|
||||
if (unique_axes.find(axis) != unique_axes.end())
|
||||
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "'axes' has duplicates");
|
||||
unique_axes.insert(axis);
|
||||
|
||||
// process step
|
||||
auto step = axis_index < raw_steps.size() ? raw_steps[axis_index] : 1;
|
||||
if (step == 0)
|
||||
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "'step' value cannot be 0");
|
||||
compute_metadata.steps_[axis] = step;
|
||||
|
||||
// process start
|
||||
auto start = raw_starts[axis_index];
|
||||
if (start < 0)
|
||||
start += compute_metadata.input_dimensions_[axis];
|
||||
if (step < 0)
|
||||
compute_metadata.starts_[axis] = clamp(start, int64_t{0}, compute_metadata.input_dimensions_[axis] - 1);
|
||||
else
|
||||
compute_metadata.starts_[axis] = clamp(start, int64_t{0}, compute_metadata.input_dimensions_[axis]);
|
||||
|
||||
// process end
|
||||
auto end = raw_ends[axis_index];
|
||||
// INT_MAX has a special meaning for end according to spec
|
||||
// equivalent to 'None' in numpy
|
||||
// it represent slicing to the end of the dimension
|
||||
if (end == std::numeric_limits<int32_t>::max() ||
|
||||
end == std::numeric_limits<int64_t>::max()) {
|
||||
end = step < 0 ? -1 : compute_metadata.input_dimensions_[axis];
|
||||
} else {
|
||||
if (end < 0)
|
||||
end += compute_metadata.input_dimensions_[axis];
|
||||
if (step < 0)
|
||||
end = clamp(end, int64_t{-1}, compute_metadata.input_dimensions_[axis]);
|
||||
else
|
||||
end = clamp(end, int64_t{0}, compute_metadata.input_dimensions_[axis]);
|
||||
}
|
||||
|
||||
compute_metadata.ends_[axis] = end;
|
||||
|
||||
// find output dim value for this axis
|
||||
auto temp = static_cast<int64_t>(ceil(1.0 * (compute_metadata.ends_[axis] - compute_metadata.starts_[axis]) / step));
|
||||
if (temp < 0)
|
||||
compute_metadata.output_dims_[axis] = 0;
|
||||
else
|
||||
compute_metadata.output_dims_[axis] = temp;
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
} // namespace SliceOp
|
||||
} // namespace onnxruntime
|
||||
|
|
@ -539,8 +539,8 @@ std::string Shape2String(const std::vector<uint32_t>& shape) {
|
|||
return os.str();
|
||||
}
|
||||
|
||||
bool CheckIsInitializerTensor(const InitializedTensorSet& initializers, const Node& node,
|
||||
size_t input_idx, const char* input_name) {
|
||||
bool CheckIsInitializer(const InitializedTensorSet& initializers, const Node& node,
|
||||
size_t input_idx, const char* input_name) {
|
||||
if (!Contains(initializers, node.InputDefs()[input_idx]->Name())) {
|
||||
LOGS_DEFAULT(VERBOSE) << input_name << " of " << node.OpType() << " must be an initializer tensor";
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -133,8 +133,8 @@ std::vector<std::vector<size_t>> GetSupportedNodes(const GraphViewer& graph_view
|
|||
std::string Shape2String(const std::vector<uint32_t>& shape);
|
||||
|
||||
// Check the given input is an initializer tensor
|
||||
bool CheckIsInitializerTensor(const InitializedTensorSet& initializers, const Node& node,
|
||||
size_t index, const char* input_name) ORT_MUST_USE_RESULT;
|
||||
bool CheckIsInitializer(const InitializedTensorSet& initializers, const Node& node,
|
||||
size_t index, const char* input_name) ORT_MUST_USE_RESULT;
|
||||
|
||||
} // namespace nnapi
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@
|
|||
#include <onnx/onnx_pb.h>
|
||||
|
||||
#include "core/providers/shared/utils/utils.h"
|
||||
#include "core/providers/cpu/tensor/slice_helper.h"
|
||||
#include "helper.h"
|
||||
#include "model_builder.h"
|
||||
#include "op_builder.h"
|
||||
|
|
@ -2536,7 +2537,6 @@ Status MinMaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
|
|||
#pragma region op_elu
|
||||
|
||||
class EluOpBuilder : public BaseOpBuilder {
|
||||
public:
|
||||
private:
|
||||
Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node) const override ORT_MUST_USE_RESULT;
|
||||
};
|
||||
|
|
@ -2561,6 +2561,179 @@ Status EluOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const No
|
|||
|
||||
#pragma endregion
|
||||
|
||||
#pragma region op_slice
|
||||
|
||||
class SliceOpBuilder : public BaseOpBuilder {
|
||||
public:
|
||||
void AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const override;
|
||||
|
||||
private:
|
||||
Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node) const override ORT_MUST_USE_RESULT;
|
||||
};
|
||||
|
||||
void SliceOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const {
|
||||
// Skip everything except input0 for Slice
|
||||
const auto input_defs = node.InputDefs();
|
||||
model_builder.AddInitializerToSkip(input_defs[1]->Name()); // starts
|
||||
model_builder.AddInitializerToSkip(input_defs[2]->Name()); // ends
|
||||
if (input_defs.size() > 3) {
|
||||
model_builder.AddInitializerToSkip(input_defs[3]->Name()); // axes
|
||||
if (input_defs.size() > 4) {
|
||||
model_builder.AddInitializerToSkip(input_defs[4]->Name()); // steps
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Status SliceOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node) const {
|
||||
auto& shaper(model_builder.GetShaper());
|
||||
const auto& operand_indices(model_builder.GetOperandIndices());
|
||||
const auto& operand_types(model_builder.GetOperandTypes());
|
||||
const auto input_defs = node.InputDefs();
|
||||
const auto& input_shape = shaper[input_defs[0]->Name()];
|
||||
std::vector<int64_t> input_shape_64(input_shape.cbegin(), input_shape.cend());
|
||||
SliceOp::PrepareForComputeMetadata compute_metadata(input_shape_64);
|
||||
|
||||
{
|
||||
// We need to copy the data from the starts/ends/axes/steps initializers to int64 vectors
|
||||
// to be used in shared PrepareForCompute function to calculate the output shape
|
||||
// and normalize inputs, for example, input can be starts/ends/steps for certain axes,
|
||||
// PrepareForCompute can generate standard starts/ends/steps/axes for each axes
|
||||
std::vector<int64_t> input_starts;
|
||||
std::vector<int64_t> input_ends;
|
||||
std::vector<int64_t> input_axes;
|
||||
std::vector<int64_t> input_steps;
|
||||
|
||||
const auto CopyInputData = [&node, &model_builder](size_t input_idx, std::vector<int64_t>& data) {
|
||||
data.clear();
|
||||
const auto input_defs = node.InputDefs();
|
||||
|
||||
// This is an optional input, return empty vector
|
||||
if (input_defs.size() <= input_idx)
|
||||
return Status::OK();
|
||||
|
||||
const auto& input_name = input_defs[input_idx]->Name();
|
||||
const auto& initializers(model_builder.GetInitializerTensors());
|
||||
|
||||
const auto& tensor = *initializers.at(input_name);
|
||||
std::unique_ptr<uint8_t[]> unpacked_tensor;
|
||||
size_t tensor_byte_size;
|
||||
ORT_RETURN_IF_ERROR(
|
||||
onnxruntime::utils::UnpackInitializerData(tensor, model_builder.GetGraphViewer().ModelPath(),
|
||||
unpacked_tensor, tensor_byte_size));
|
||||
const auto data_type = tensor.data_type();
|
||||
if (data_type == ONNX_NAMESPACE::TensorProto_DataType_INT64) {
|
||||
const int64_t* tensor_data = reinterpret_cast<const int64_t*>(unpacked_tensor.get());
|
||||
size_t size = tensor_byte_size / sizeof(int64_t);
|
||||
data.insert(data.end(), tensor_data, tensor_data + size);
|
||||
} else if (data_type == ONNX_NAMESPACE::TensorProto_DataType_INT32) {
|
||||
const int32_t* tensor_data = reinterpret_cast<const int32_t*>(unpacked_tensor.get());
|
||||
size_t size = tensor_byte_size / sizeof(int32_t);
|
||||
data.insert(data.end(), tensor_data, tensor_data + size);
|
||||
} else {
|
||||
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
|
||||
"Data type for starts and ends inputs' is not supported in this build. Got ",
|
||||
data_type);
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
};
|
||||
|
||||
ORT_RETURN_IF_ERROR(CopyInputData(1, input_starts));
|
||||
ORT_RETURN_IF_ERROR(CopyInputData(2, input_ends));
|
||||
ORT_RETURN_IF_ERROR(CopyInputData(3, input_axes));
|
||||
ORT_RETURN_IF_ERROR(CopyInputData(4, input_steps));
|
||||
ORT_RETURN_IF_ERROR(
|
||||
SliceOp::PrepareForComputeHelper(input_starts, input_ends, input_axes, input_steps, compute_metadata));
|
||||
}
|
||||
|
||||
// output shape is of type uint32_t, convert from int64 compute_metadata.output_dims_
|
||||
Shape nnapi_output_shape;
|
||||
nnapi_output_shape.reserve(compute_metadata.output_dims_.size());
|
||||
std::transform(compute_metadata.output_dims_.cbegin(), compute_metadata.output_dims_.cend(),
|
||||
std::back_inserter(nnapi_output_shape),
|
||||
[](int64_t i) { return SafeInt<uint32_t>(i); });
|
||||
|
||||
const auto& input = node.InputDefs()[0]->Name();
|
||||
const auto& output = node.OutputDefs()[0]->Name();
|
||||
bool output_is_nhwc = model_builder.IsOperandNHWC(input);
|
||||
|
||||
// No shape inference for Slice, everything is calculated here, we only need to add the output shape
|
||||
// to the shaper
|
||||
shaper.AddShape(output, nnapi_output_shape);
|
||||
const OperandType output_operand_type(operand_types.at(input).type, shaper[output]);
|
||||
|
||||
std::vector<uint32_t> input_indices;
|
||||
input_indices.push_back(operand_indices.at(input));
|
||||
|
||||
// begin/end/strides of ANEURALNETWORKS_STRIDED_SLICE have the same shape
|
||||
Shape param_dimen = {static_cast<uint32_t>(input_shape.size())};
|
||||
|
||||
// helper function to add begin/end/strides of ANEURALNETWORKS_STRIDED_SLICE
|
||||
const auto AddOperand = [&model_builder, &node, &input_indices, &operand_indices](
|
||||
const char* name, const Shape& shape, const std::vector<int64_t>& param_raw_data) {
|
||||
std::vector<int32_t> param_data;
|
||||
param_data.reserve(param_raw_data.size());
|
||||
std::transform(param_raw_data.cbegin(), param_raw_data.cend(),
|
||||
std::back_inserter(param_data),
|
||||
[](int64_t i) { return SafeInt<int32_t>(i); });
|
||||
std::string param_name = model_builder.GetUniqueName(node.Name() + name);
|
||||
OperandType param_operand_type(Type::TENSOR_INT32, shape);
|
||||
ORT_RETURN_IF_ERROR(
|
||||
model_builder.AddOperandFromPersistMemoryBuffer(param_name, param_data.data(), param_operand_type));
|
||||
input_indices.push_back(operand_indices.at(param_name));
|
||||
return Status::OK();
|
||||
};
|
||||
|
||||
ORT_RETURN_IF_ERROR(AddOperand("starts", param_dimen, compute_metadata.starts_)); //nnapi_begin
|
||||
|
||||
// NNAPI has 2 slice operations
|
||||
// - ANEURALNETWORKS_SLICE
|
||||
// Simpler and faster version of slice without steps, available from ANEURALNETWORKS_FEATURE_LEVEL_3
|
||||
// Use this one if no step other than 1 is used in ONNX slice
|
||||
// - ANEURALNETWORKS_STRIDED_SLICE
|
||||
// More comprehensive version, available from ANEURALNETWORKS_FEATURE_LEVEL_2
|
||||
int op_code = ANEURALNETWORKS_STRIDED_SLICE;
|
||||
if (std::all_of(compute_metadata.steps_.cbegin(),
|
||||
compute_metadata.steps_.cend(),
|
||||
[](int64_t i) { return i == 1; }) &&
|
||||
model_builder.GetNNAPIFeatureLevel() > ANEURALNETWORKS_FEATURE_LEVEL_2) {
|
||||
op_code = ANEURALNETWORKS_SLICE;
|
||||
// the nnapi size of the slice in this case is the output shape
|
||||
ORT_RETURN_IF_ERROR(AddOperand("sizes", param_dimen, compute_metadata.output_dims_)); //nnapi_sizes
|
||||
} else {
|
||||
// ** The special treatment of ends **
|
||||
// The nnapi_end need some special handling, based on the current undocumented design of
|
||||
// ANEURALNETWORKS_STRIDED_SLICE
|
||||
// For ORT, for a single axis, after SliceOp::PrepareForCompute, and the step is negative,
|
||||
// and the last element for slice is at the beginning of the axis (we are slicing backwards)
|
||||
// The end for this axis will be -1
|
||||
// For NNAPI, it is not documented that end can be negative,
|
||||
// see https://developer.android.com/ndk/reference/group/neural-networks#group___neural_networks_1ggaabbe492c60331b13038e39d4207940e0a89695302f8b1e7ae7ce8f4d8c0b8a752
|
||||
// However, the actual NNAPI StridedSlice has some odd implementations,
|
||||
// See https://android.googlesource.com/platform/frameworks/ml/+/5b525d4d9100819d87447bd2c2a0bcfdd62899ee/nn/common/operations/StridedSlice.cpp#177
|
||||
// and, https://android.googlesource.com/platform/frameworks/ml/+/5b525d4d9100819d87447bd2c2a0bcfdd62899ee/nn/common/include/OperationsUtils.h#262
|
||||
// If a negative end is no less than -dim (dimension of the axis), it will be treated as an index counting from
|
||||
// the end, for example, dim = 5, and end = -1, the end will be normalized to 4, which will cause
|
||||
// incorrect result, so here we have to make the end = -dim - 1 such that it will not be treated as
|
||||
// an index counting from the end.
|
||||
std::vector<int64_t> ends = compute_metadata.ends_;
|
||||
for (size_t i = 0; i < ends.size(); ++i) {
|
||||
if (ends[i] == -1) {
|
||||
ends[i] = -static_cast<int32_t>(input_shape[i] + 1);
|
||||
}
|
||||
}
|
||||
ORT_RETURN_IF_ERROR(AddOperand("ends", param_dimen, ends)); //nnapi_end
|
||||
ORT_RETURN_IF_ERROR(AddOperand("steps", param_dimen, compute_metadata.steps_)); //nnapi_strides
|
||||
// We do not use the following inputs in ANEURALNETWORKS_STRIDED_SLICE, set them all to 0
|
||||
ADD_SCALAR_OPERAND(model_builder, input_indices, 0); // begin_mask
|
||||
ADD_SCALAR_OPERAND(model_builder, input_indices, 0); // end_mask
|
||||
ADD_SCALAR_OPERAND(model_builder, input_indices, 0); // shrink_axis_mask
|
||||
}
|
||||
return model_builder.AddOperation(op_code, input_indices, {output}, {output_operand_type}, {output_is_nhwc});
|
||||
}
|
||||
|
||||
#pragma endregion
|
||||
|
||||
#pragma region CreateGetOpBuilders
|
||||
|
||||
// The reason we use macros to create OpBuilders is for easy exclusion in build if certain op(s) are not used
|
||||
|
|
@ -2579,24 +2752,39 @@ Status EluOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const No
|
|||
static OpBuilderRegistrations CreateOpBuilderRegistrations() {
|
||||
OpBuilderRegistrations op_registrations;
|
||||
|
||||
// Builders handle a single op
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("BatchNormalization", BatchNormalizationOpBuilder);
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Cast", CastOpBuilder);
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Clip", ClipOpBuilder);
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Concat", ConcatOpBuilder);
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("DequantizeLinear", DequantizeLinearOpBuilder);
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Elu", EluOpBuilder);
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Flatten", FlattenOpBuilder);
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Identity", IdentityOpBuilder);
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("LRN", LRNOpBuilder);
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("QuantizeLinear", QuantizeLinearOpBuilder);
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Relu", ReluOpBuilder);
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Reshape", ReshapeOpBuilder);
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Resize", ResizeOpBuilder);
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Slice", SliceOpBuilder);
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Softmax", SoftMaxOpBuilder);
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Squeeze", SqueezeOpBuilder);
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Transpose", TransposeOpBuilder);
|
||||
|
||||
// Builders shared among similar ops
|
||||
{
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("Add", BinaryOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("Sub", BinaryOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("Mul", BinaryOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("Div", BinaryOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("QLinearAdd", BinaryOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("Mul", BinaryOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("Pow", BinaryOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("QLinearAdd", BinaryOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("Sub", BinaryOpBuilder);
|
||||
}
|
||||
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Relu", ReluOpBuilder);
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Transpose", TransposeOpBuilder);
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Reshape", ReshapeOpBuilder);
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("BatchNormalization", BatchNormalizationOpBuilder);
|
||||
|
||||
{
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("AveragePool", PoolOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("GlobalAveragePool", PoolOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("GlobalMaxPool", PoolOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("AveragePool", PoolOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("MaxPool", PoolOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("QLinearAveragePool", PoolOpBuilder);
|
||||
}
|
||||
|
|
@ -2606,10 +2794,6 @@ static OpBuilderRegistrations CreateOpBuilderRegistrations() {
|
|||
NNAPI_EP_ADD_SHARED_OP_BUILDER("QLinearConv", ConvOpBuilder);
|
||||
}
|
||||
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Cast", CastOpBuilder);
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Softmax", SoftMaxOpBuilder);
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Identity", IdentityOpBuilder);
|
||||
|
||||
{
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("Gemm", GemmOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("MatMul", GemmOpBuilder);
|
||||
|
|
@ -2621,30 +2805,19 @@ static OpBuilderRegistrations CreateOpBuilderRegistrations() {
|
|||
NNAPI_EP_ADD_SHARED_OP_BUILDER("Exp", UnaryOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("Floor", UnaryOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("Log", UnaryOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("Sigmoid", UnaryOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("Neg", UnaryOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("QLinearSigmoid", UnaryOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("Sigmoid", UnaryOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("Sin", UnaryOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("Sqrt", UnaryOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("Tanh", UnaryOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("QLinearSigmoid", UnaryOpBuilder);
|
||||
}
|
||||
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Concat", ConcatOpBuilder);
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Squeeze", SqueezeOpBuilder);
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("QuantizeLinear", QuantizeLinearOpBuilder);
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("DequantizeLinear", DequantizeLinearOpBuilder);
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("LRN", LRNOpBuilder);
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Clip", ClipOpBuilder);
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Resize", ResizeOpBuilder);
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Flatten", FlattenOpBuilder);
|
||||
|
||||
{
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("Min", MinMaxOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("Max", MinMaxOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("Min", MinMaxOpBuilder);
|
||||
}
|
||||
|
||||
NNAPI_EP_ADD_SINGLE_OP_BUILDER("Elu", EluOpBuilder);
|
||||
|
||||
return op_registrations;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -76,7 +76,8 @@ class BaseOpSupportChecker : public IOpSupportChecker {
|
|||
return true;
|
||||
}
|
||||
|
||||
virtual int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const {
|
||||
virtual int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */,
|
||||
const OpSupportCheckParams& /* params */) const {
|
||||
// ANEURALNETWORKS_FEATURE_LEVEL_1 is the baseline version of NNAPI,
|
||||
// There is no NNAPI support for Android API level 26-
|
||||
return ANEURALNETWORKS_FEATURE_LEVEL_1;
|
||||
|
|
@ -319,7 +320,8 @@ class TransposeOpSupportChecker : public BaseOpSupportChecker {
|
|||
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
|
||||
const OpSupportCheckParams& params) const override;
|
||||
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */,
|
||||
const OpSupportCheckParams& /* params */) const override {
|
||||
return ANEURALNETWORKS_FEATURE_LEVEL_2;
|
||||
}
|
||||
|
||||
|
|
@ -490,7 +492,8 @@ class PoolOpSupportChecker : public BaseOpSupportChecker {
|
|||
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
|
||||
const OpSupportCheckParams& params) const override;
|
||||
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& params) const override {
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */,
|
||||
const OpSupportCheckParams& params) const override {
|
||||
return params.use_nchw ? ANEURALNETWORKS_FEATURE_LEVEL_3 : ANEURALNETWORKS_FEATURE_LEVEL_2;
|
||||
}
|
||||
|
||||
|
|
@ -667,7 +670,8 @@ class ConvOpSupportChecker : public BaseOpSupportChecker {
|
|||
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
|
||||
const OpSupportCheckParams& params) const override;
|
||||
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& params) const override {
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */,
|
||||
const OpSupportCheckParams& params) const override {
|
||||
return params.use_nchw ? ANEURALNETWORKS_FEATURE_LEVEL_3 : ANEURALNETWORKS_FEATURE_LEVEL_2;
|
||||
}
|
||||
|
||||
|
|
@ -775,7 +779,8 @@ class CastOpSupportChecker : public BaseOpSupportChecker {
|
|||
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
|
||||
const OpSupportCheckParams& params) const override;
|
||||
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */,
|
||||
const OpSupportCheckParams& /* params */) const override {
|
||||
return ANEURALNETWORKS_FEATURE_LEVEL_3;
|
||||
}
|
||||
|
||||
|
|
@ -805,7 +810,8 @@ class SoftMaxOpSupportChecker : public BaseOpSupportChecker {
|
|||
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
|
||||
const OpSupportCheckParams& params) const override;
|
||||
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */,
|
||||
const OpSupportCheckParams& /* params */) const override {
|
||||
return ANEURALNETWORKS_FEATURE_LEVEL_2;
|
||||
}
|
||||
};
|
||||
|
|
@ -1043,7 +1049,8 @@ class UnaryOpSupportChecker : public BaseOpSupportChecker {
|
|||
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
|
||||
const OpSupportCheckParams& params) const override;
|
||||
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& node, const OpSupportCheckParams& params) const override;
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& node,
|
||||
const OpSupportCheckParams& params) const override;
|
||||
|
||||
bool HasSupportedInputsImpl(const Node& node) const override;
|
||||
|
||||
|
|
@ -1079,8 +1086,8 @@ bool UnaryOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initia
|
|||
return true;
|
||||
}
|
||||
|
||||
int32_t UnaryOpSupportChecker::GetMinSupportedNNAPIFeatureLevel(
|
||||
const Node& node, const OpSupportCheckParams& /* params */) const {
|
||||
int32_t UnaryOpSupportChecker::GetMinSupportedNNAPIFeatureLevel(const Node& node,
|
||||
const OpSupportCheckParams& /* params */) const {
|
||||
const auto& op(node.OpType());
|
||||
if (op == "Abs" ||
|
||||
op == "Exp" ||
|
||||
|
|
@ -1216,7 +1223,8 @@ class SqueezeOpSupportChecker : public BaseOpSupportChecker {
|
|||
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
|
||||
const OpSupportCheckParams& params) const override;
|
||||
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */,
|
||||
const OpSupportCheckParams& /* params */) const override {
|
||||
return ANEURALNETWORKS_FEATURE_LEVEL_2;
|
||||
}
|
||||
};
|
||||
|
|
@ -1255,7 +1263,8 @@ class QuantizeLinearOpSupportChecker : public BaseOpSupportChecker {
|
|||
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
|
||||
const OpSupportCheckParams& params) const override;
|
||||
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */,
|
||||
const OpSupportCheckParams& /* params */) const override {
|
||||
return ANEURALNETWORKS_FEATURE_LEVEL_3;
|
||||
}
|
||||
};
|
||||
|
|
@ -1296,7 +1305,8 @@ class DequantizeLinearOpSupportChecker : public BaseOpSupportChecker {
|
|||
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
|
||||
const OpSupportCheckParams& params) const override;
|
||||
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */,
|
||||
const OpSupportCheckParams& /* params */) const override {
|
||||
return ANEURALNETWORKS_FEATURE_LEVEL_1;
|
||||
}
|
||||
bool HasSupportedInputsImpl(const Node& node) const override;
|
||||
|
|
@ -1340,7 +1350,8 @@ class LRNOpSupportChecker : public BaseOpSupportChecker {
|
|||
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
|
||||
const OpSupportCheckParams& params) const override;
|
||||
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */,
|
||||
const OpSupportCheckParams& /* params */) const override {
|
||||
return ANEURALNETWORKS_FEATURE_LEVEL_2;
|
||||
}
|
||||
};
|
||||
|
|
@ -1397,7 +1408,8 @@ class ResizeOpSupportChecker : public BaseOpSupportChecker {
|
|||
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
|
||||
const OpSupportCheckParams& params) const override;
|
||||
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override;
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */,
|
||||
const OpSupportCheckParams& /* params */) const override;
|
||||
|
||||
// Resize opset 10- is very different than Resize opset 11+, with many key attributes missing
|
||||
// We only support Resize opset 11+ here
|
||||
|
|
@ -1516,7 +1528,8 @@ bool ResizeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initi
|
|||
return true;
|
||||
}
|
||||
|
||||
int32_t ResizeOpSupportChecker::GetMinSupportedNNAPIFeatureLevel(const Node& node, const OpSupportCheckParams& /* params */) const {
|
||||
int32_t ResizeOpSupportChecker::GetMinSupportedNNAPIFeatureLevel(const Node& node,
|
||||
const OpSupportCheckParams& /* params */) const {
|
||||
int32_t input_type;
|
||||
|
||||
// This should not happen, but if it happens make sure this will require an impossible version
|
||||
|
|
@ -1590,7 +1603,8 @@ class MinMaxOpSupportChecker : public BaseOpSupportChecker {
|
|||
const std::string& op_type, OpSupportCheckerRegistrations& op_registrations);
|
||||
|
||||
private:
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */,
|
||||
const OpSupportCheckParams& /* params */) const override {
|
||||
return ANEURALNETWORKS_FEATURE_LEVEL_3;
|
||||
}
|
||||
|
||||
|
|
@ -1629,7 +1643,8 @@ bool MinMaxOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& /* in
|
|||
|
||||
class EluOpSupportChecker : public BaseOpSupportChecker {
|
||||
private:
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */,
|
||||
const OpSupportCheckParams& /* params */) const override {
|
||||
return ANEURALNETWORKS_FEATURE_LEVEL_4;
|
||||
}
|
||||
|
||||
|
|
@ -1639,6 +1654,63 @@ class EluOpSupportChecker : public BaseOpSupportChecker {
|
|||
|
||||
#pragma endregion
|
||||
|
||||
#pragma region op_slice
|
||||
|
||||
class SliceOpSupportChecker : public BaseOpSupportChecker {
|
||||
private:
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */,
|
||||
const OpSupportCheckParams& /* params */) const override {
|
||||
return ANEURALNETWORKS_FEATURE_LEVEL_2;
|
||||
}
|
||||
|
||||
// We only support slice from opset 10
|
||||
int GetMinSupportedOpSet(const Node& /* node */) const override { return 10; }
|
||||
|
||||
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
|
||||
const OpSupportCheckParams& params) const override;
|
||||
};
|
||||
|
||||
bool SliceOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
|
||||
const OpSupportCheckParams& /* params */) const {
|
||||
Shape input_shape;
|
||||
if (!GetShape(*node.InputDefs()[0], input_shape))
|
||||
return false;
|
||||
|
||||
if (input_shape.size() > 4) {
|
||||
LOGS_DEFAULT(VERBOSE) << "Slice only supports 1-4d shape, input is "
|
||||
<< input_shape.size() << "d shape";
|
||||
return false;
|
||||
}
|
||||
|
||||
// TODO, replace with std::find when we switch to c++17
|
||||
if (std::any_of(input_shape.cbegin(), input_shape.cend(), [](int32_t i) { return i == 0; })) {
|
||||
LOGS_DEFAULT(VERBOSE) << "Slice doesn't support dynamic input shape";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!CheckIsInitializer(initializers, node, 1, "starts")) {
|
||||
return false;
|
||||
}
|
||||
if (!CheckIsInitializer(initializers, node, 2, "ends")) {
|
||||
return false;
|
||||
}
|
||||
const auto& input_defs = node.InputDefs();
|
||||
if (input_defs.size() > 3) {
|
||||
if (!CheckIsInitializer(initializers, node, 3, "axes")) {
|
||||
return false;
|
||||
}
|
||||
if (input_defs.size() > 4) {
|
||||
if (!CheckIsInitializer(initializers, node, 4, "steps")) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#pragma endregion
|
||||
|
||||
#pragma region CreateGetOpSupportCheckers
|
||||
|
||||
// The reason we use macros to create OpBuilders is for easy exclusion in build if certain op(s) are not used
|
||||
|
|
@ -1657,26 +1729,43 @@ class EluOpSupportChecker : public BaseOpSupportChecker {
|
|||
static OpSupportCheckerRegistrations CreateOpSupportCheckerRegistrations() {
|
||||
OpSupportCheckerRegistrations op_registrations;
|
||||
|
||||
{
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Add", BinaryOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Sub", BinaryOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Mul", BinaryOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Div", BinaryOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("QLinearAdd", BinaryOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Pow", BinaryOpSupportChecker);
|
||||
}
|
||||
// Support checkers handle a single op
|
||||
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("BatchNormalization", BatchNormalizationOpSupportChecker);
|
||||
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Cast", CastOpSupportChecker);
|
||||
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Clip", ClipOpSupportChecker);
|
||||
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Concat", ConcatOpSupportChecker);
|
||||
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("DequantizeLinear", DequantizeLinearOpSupportChecker);
|
||||
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Elu", EluOpSupportChecker);
|
||||
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Flatten", FlattenOpSupportChecker);
|
||||
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("LRN", LRNOpSupportChecker);
|
||||
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("QuantizeLinear", QuantizeLinearOpSupportChecker);
|
||||
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Reshape", ReshapeOpSupportChecker);
|
||||
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Resize", ResizeOpSupportChecker);
|
||||
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Slice", SliceOpSupportChecker);
|
||||
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Softmax", SoftMaxOpSupportChecker);
|
||||
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Squeeze", SqueezeOpSupportChecker);
|
||||
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Transpose", TransposeOpSupportChecker);
|
||||
|
||||
// Identity is always supported, we use BaseOpSupportChecker as default
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Identity", BaseOpSupportChecker);
|
||||
|
||||
// Relu is always supported, we use BaseOpSupportChecker as default
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Relu", BaseOpSupportChecker);
|
||||
|
||||
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Transpose", TransposeOpSupportChecker);
|
||||
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Reshape", ReshapeOpSupportChecker);
|
||||
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("BatchNormalization", BatchNormalizationOpSupportChecker);
|
||||
// Support Checkers shared among similar ops
|
||||
{
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Add", BinaryOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Div", BinaryOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Mul", BinaryOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Pow", BinaryOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("QLinearAdd", BinaryOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Sub", BinaryOpSupportChecker);
|
||||
}
|
||||
|
||||
{
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("AveragePool", PoolOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("GlobalAveragePool", PoolOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("GlobalMaxPool", PoolOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("AveragePool", PoolOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("MaxPool", PoolOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("QLinearAveragePool", PoolOpSupportChecker);
|
||||
}
|
||||
|
|
@ -1686,12 +1775,6 @@ static OpSupportCheckerRegistrations CreateOpSupportCheckerRegistrations() {
|
|||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("QLinearConv", ConvOpSupportChecker);
|
||||
}
|
||||
|
||||
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Cast", CastOpSupportChecker);
|
||||
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Softmax", SoftMaxOpSupportChecker);
|
||||
|
||||
// Identity is always supported, we use BaseOpSupportChecker as default
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Identity", BaseOpSupportChecker);
|
||||
|
||||
{
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Gemm", GemmOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("MatMul", GemmOpSupportChecker);
|
||||
|
|
@ -1703,30 +1786,19 @@ static OpSupportCheckerRegistrations CreateOpSupportCheckerRegistrations() {
|
|||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Exp", UnaryOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Floor", UnaryOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Log", UnaryOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Sigmoid", UnaryOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Neg", UnaryOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("QLinearSigmoid", UnaryOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Sigmoid", UnaryOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Sin", UnaryOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Sqrt", UnaryOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Tanh", UnaryOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("QLinearSigmoid", UnaryOpSupportChecker);
|
||||
}
|
||||
|
||||
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Concat", ConcatOpSupportChecker);
|
||||
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Squeeze", SqueezeOpSupportChecker);
|
||||
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("QuantizeLinear", QuantizeLinearOpSupportChecker);
|
||||
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("DequantizeLinear", DequantizeLinearOpSupportChecker);
|
||||
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("LRN", LRNOpSupportChecker);
|
||||
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Clip", ClipOpSupportChecker);
|
||||
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Resize", ResizeOpSupportChecker);
|
||||
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Flatten", FlattenOpSupportChecker);
|
||||
|
||||
{
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Min", MinMaxOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Max", MinMaxOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Min", MinMaxOpSupportChecker);
|
||||
}
|
||||
|
||||
NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Elu", EluOpSupportChecker);
|
||||
|
||||
return op_registrations;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -19,8 +19,18 @@ void RunSliceTest(const std::vector<int64_t>& input_dims,
|
|||
const std::vector<int64_t>& output_dims,
|
||||
const std::vector<T>& output_vals,
|
||||
bool v10_only = false) {
|
||||
// V1-9
|
||||
ORT_UNUSED_PARAMETER(steps);
|
||||
std::unordered_set<std::string> excluded_providers;
|
||||
|
||||
if (!v10_only)
|
||||
excluded_providers = {kTensorrtExecutionProvider, kOpenVINOExecutionProvider};
|
||||
else
|
||||
excluded_providers = {kTensorrtExecutionProvider};
|
||||
|
||||
// NNAPI EP does not support empty output
|
||||
if (std::any_of(output_dims.cbegin(), output_dims.cend(), [](int64_t i) { return i == 0; })) {
|
||||
excluded_providers.insert(kNnapiExecutionProvider);
|
||||
}
|
||||
|
||||
if (!v10_only) {
|
||||
OpTester testv9("Slice", 9);
|
||||
testv9.AddAttribute("starts", starts);
|
||||
|
|
@ -29,20 +39,27 @@ void RunSliceTest(const std::vector<int64_t>& input_dims,
|
|||
testv9.AddAttribute("axes", axes);
|
||||
testv9.AddInput<T>("data", input_dims, input_vals);
|
||||
testv9.AddOutput<T>("output", output_dims, output_vals);
|
||||
testv9.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider}); // OpenVINO EP: Disabled temporarily
|
||||
testv9.Run(OpTester::ExpectResult::kExpectSuccess, "", excluded_providers); // OpenVINO EP: Disabled temporarily
|
||||
}
|
||||
|
||||
// V10
|
||||
OpTester testv10("Slice", 10);
|
||||
testv10.AddInput<T>("data", input_dims, input_vals);
|
||||
testv10.AddInput<int64_t>("starts", {static_cast<int64_t>(starts.size())}, starts);
|
||||
testv10.AddInput<int64_t>("ends", {static_cast<int64_t>(ends.size())}, ends);
|
||||
if (axes.size() != 0)
|
||||
testv10.AddInput<int64_t>("axes", {static_cast<int64_t>(axes.size())}, axes);
|
||||
if (steps.size() != 0)
|
||||
testv10.AddInput<int64_t>("steps", {static_cast<int64_t>(steps.size())}, steps);
|
||||
testv10.AddOutput<T>("output", output_dims, output_vals);
|
||||
testv10.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
|
||||
auto run_test = [&](bool only_data_not_initializer) {
|
||||
OpTester testv10("Slice", 10);
|
||||
testv10.AddInput<T>("data", input_dims, input_vals);
|
||||
testv10.AddInput<int64_t>("starts", {static_cast<int64_t>(starts.size())}, starts, only_data_not_initializer);
|
||||
testv10.AddInput<int64_t>("ends", {static_cast<int64_t>(ends.size())}, ends, only_data_not_initializer);
|
||||
if (axes.size() != 0)
|
||||
testv10.AddInput<int64_t>("axes", {static_cast<int64_t>(axes.size())}, axes, only_data_not_initializer);
|
||||
if (steps.size() != 0)
|
||||
testv10.AddInput<int64_t>("steps", {static_cast<int64_t>(steps.size())}, steps, only_data_not_initializer);
|
||||
testv10.AddOutput<T>("output", output_dims, output_vals);
|
||||
testv10.Run(OpTester::ExpectResult::kExpectSuccess, "", excluded_providers);
|
||||
};
|
||||
|
||||
run_test(false);
|
||||
|
||||
// NNAPI EP requires the starts/ends/axes/steps be initializers
|
||||
run_test(true);
|
||||
}
|
||||
|
||||
// Slice V1-9 & Slice V10 can both run the following tests
|
||||
|
|
|
|||
Loading…
Reference in a new issue