From d83f7fd4aa176f56d9093e19a1db13399aac0c8a Mon Sep 17 00:00:00 2001 From: Guoyu Wang <62914304+gwang-msft@users.noreply.github.com> Date: Thu, 17 Jun 2021 12:36:12 -0700 Subject: [PATCH] [NNAPI EP] Enable Slice support (#8031) * Enable slice for NNAPI EP * Add ANEURALNETWORKS_STRIDED_SLICE support * Addressed CR comments * Addressed CR comments, rename PrepareForCompute to PrepareForComputeHelper to avoid confusion --- .../core/providers/cpu/tensor/slice.cc | 121 +-------- onnxruntime/core/providers/cpu/tensor/slice.h | 23 +- .../cpu/tensor/slice_compute_metadata.h | 33 +++ .../core/providers/cpu/tensor/slice_helper.h | 141 +++++++++++ .../nnapi/nnapi_builtin/builders/helper.cc | 4 +- .../nnapi/nnapi_builtin/builders/helper.h | 4 +- .../nnapi_builtin/builders/op_builder.cc | 229 +++++++++++++++--- .../builders/op_support_checker.cc | 170 +++++++++---- .../providers/cpu/tensor/slice_op.test.cc | 43 +++- 9 files changed, 543 insertions(+), 225 deletions(-) create mode 100644 onnxruntime/core/providers/cpu/tensor/slice_compute_metadata.h create mode 100644 onnxruntime/core/providers/cpu/tensor/slice_helper.h diff --git a/onnxruntime/core/providers/cpu/tensor/slice.cc b/onnxruntime/core/providers/cpu/tensor/slice.cc index 0557598732..e8b1fa5d6a 100644 --- a/onnxruntime/core/providers/cpu/tensor/slice.cc +++ b/onnxruntime/core/providers/cpu/tensor/slice.cc @@ -8,6 +8,7 @@ #include "core/framework/element_type_lists.h" #include "core/providers/common.h" +#include "core/providers/cpu/tensor/slice_helper.h" #include "core/providers/cpu/tensor/utils.h" #include "core/providers/op_kernel_type_control.h" #include "core/providers/op_kernel_type_control_utils.h" @@ -44,14 +45,6 @@ const auto data_type_constraints = BuildKernelDefConstraintsFromTypeList(); const auto enabled_data_type_constraints = BuildKernelDefConstraintsFromTypeList(); const auto enabled_indices_type_constraints = BuildKernelDefConstraintsFromTypeList(); - -// std::clamp doesn't exist until C++17 so create a local version -template -const T& clamp(const T& v, const T& lo, const T& hi) { - if (v < lo) return lo; - if (v > hi) return hi; - return v; -} } // namespace ONNX_CPU_OPERATOR_VERSIONED_KERNEL( @@ -93,6 +86,7 @@ ONNX_CPU_OPERATOR_KERNEL( static void FlattenOutputDims(const std::vector& input_dimensions, const std::vector& output_dims, std::vector& starts, + std::vector& ends, std::vector& steps, std::vector*& flattened_output_dims) { int num_to_combine = 0; @@ -120,6 +114,10 @@ static void FlattenOutputDims(const std::vector& input_dimensions, // so we can just shrink via resize so the number of entries matches flattened_output_dims starts.resize(num_dims); steps.resize(num_dims); + + // update ends as well + ends.resize(num_dims); + ends.back() = dim_value; } else { flattened_output_dims = nullptr; } @@ -130,47 +128,9 @@ Status SliceBase::PrepareForCompute(const std::vector& raw_starts, const std::vector& raw_ends, const std::vector& raw_axes, SliceOp::PrepareForComputeMetadata& compute_metadata) { - // Initialize axes to the provided axes attribute or to the default sequence - std::vector axes(raw_axes); - if (axes.empty()) { - //axes are omitted, they are set to[0, ..., ndim - 1] - axes.resize(compute_metadata.starts_.size()); - std::iota(axes.begin(), axes.end(), 0); - } - - // Iterate through the provided axes and override the start/end ranges - std::unordered_set unique_axes; - const auto& dimension_count = compute_metadata.input_dimensions_.size(); - for (size_t axis_index = 0, axes_count = axes.size(); axis_index < axes_count; ++axis_index) { - auto axis = HandleNegativeAxis(axes[axis_index], dimension_count); // handle negative and enforce axis is valid - if (axis >= static_cast(dimension_count) || axis < 0) - return Status(ONNXRUNTIME, INVALID_ARGUMENT, "'axes' has an axis outside of the tensor dimension count"); - if (unique_axes.find(axis) != unique_axes.end()) - return Status(ONNXRUNTIME, INVALID_ARGUMENT, "'axes' has duplicates"); - unique_axes.insert(axis); - - // process start - auto start = raw_starts[axis_index]; - if (start < 0) - start += compute_metadata.input_dimensions_[axis]; - compute_metadata.starts_[axis] = clamp(start, int64_t{0}, compute_metadata.input_dimensions_[axis]); - - // process end - auto end = raw_ends[axis_index]; - if (end < 0) - end += compute_metadata.input_dimensions_[axis]; - - // find output dim value for this axis - auto temp = clamp(end, int64_t{0}, compute_metadata.input_dimensions_[axis]) - compute_metadata.starts_[axis]; - if (temp < 0) - compute_metadata.output_dims_[axis] = 0; - else - compute_metadata.output_dims_[axis] = temp; - } - + ORT_RETURN_IF_ERROR(SliceOp::PrepareForComputeHelper(raw_starts, raw_ends, raw_axes, compute_metadata)); FlattenOutputDims(compute_metadata.input_dimensions_, compute_metadata.output_dims_, compute_metadata.starts_, - compute_metadata.steps_, compute_metadata.p_flattened_output_dims_); - + compute_metadata.ends_, compute_metadata.steps_, compute_metadata.p_flattened_output_dims_); return Status::OK(); } @@ -180,70 +140,9 @@ Status SliceBase::PrepareForCompute(const std::vector& raw_starts, const std::vector& raw_axes, const std::vector& raw_steps, SliceOp::PrepareForComputeMetadata& compute_metadata) { - // Initialize axes to the provided axes attribute or to the default sequence - std::vector axes(raw_axes); - - if (axes.empty()) { - // axes are omitted, they are set to[0, ..., ndim - 1] - axes.resize(compute_metadata.starts_.size()); - std::iota(axes.begin(), axes.end(), 0); - } - - // Iterate through the provided axes and override the start/end/steps ranges - std::unordered_set unique_axes; - const auto& dimension_count = compute_metadata.input_dimensions_.size(); - for (size_t axis_index = 0, axes_count = axes.size(); axis_index < axes_count; ++axis_index) { - auto axis = axes[axis_index] < 0 ? axes[axis_index] + static_cast(dimension_count) : axes[axis_index]; - if (axis >= static_cast(dimension_count) || axis < 0) - return Status(ONNXRUNTIME, INVALID_ARGUMENT, "'axes' has an axis outside of the tensor dimension count"); - if (unique_axes.find(axis) != unique_axes.end()) - return Status(ONNXRUNTIME, INVALID_ARGUMENT, "'axes' has duplicates"); - unique_axes.insert(axis); - - // process step - auto step = axis_index < raw_steps.size() ? raw_steps[axis_index] : 1; - if (step == 0) - return Status(ONNXRUNTIME, INVALID_ARGUMENT, "'step' value cannot be 0"); - compute_metadata.steps_[axis] = step; - - // process start - auto start = raw_starts[axis_index]; - if (start < 0) - start += compute_metadata.input_dimensions_[axis]; - if (step < 0) - compute_metadata.starts_[axis] = clamp(start, int64_t{0}, compute_metadata.input_dimensions_[axis] - 1); - else - compute_metadata.starts_[axis] = clamp(start, int64_t{0}, compute_metadata.input_dimensions_[axis]); - - // process end - auto end = raw_ends[axis_index]; - // INT_MAX has a special meaning for end according to spec - // equivalent to 'None' in numpy - // it represent slicing to the end of the dimension - if (end == std::numeric_limits::max() || - end == std::numeric_limits::max()) { - end = step < 0 ? -1 : compute_metadata.input_dimensions_[axis]; - } - - else { - if (end < 0) - end += compute_metadata.input_dimensions_[axis]; - if (step < 0) - end = clamp(end, int64_t{-1}, compute_metadata.input_dimensions_[axis]); - else - end = clamp(end, int64_t{0}, compute_metadata.input_dimensions_[axis]); - } - - // find output dim value for this axis - auto temp = static_cast(ceil(1.0 * (end - compute_metadata.starts_[axis]) / step)); - if (temp < 0) - compute_metadata.output_dims_[axis] = 0; - else - compute_metadata.output_dims_[axis] = temp; - } - + ORT_RETURN_IF_ERROR(SliceOp::PrepareForComputeHelper(raw_starts, raw_ends, raw_axes, raw_steps, compute_metadata)); FlattenOutputDims(compute_metadata.input_dimensions_, compute_metadata.output_dims_, compute_metadata.starts_, - compute_metadata.steps_, compute_metadata.p_flattened_output_dims_); + compute_metadata.ends_, compute_metadata.steps_, compute_metadata.p_flattened_output_dims_); return Status::OK(); } diff --git a/onnxruntime/core/providers/cpu/tensor/slice.h b/onnxruntime/core/providers/cpu/tensor/slice.h index 4e52777fd1..f9549257ca 100644 --- a/onnxruntime/core/providers/cpu/tensor/slice.h +++ b/onnxruntime/core/providers/cpu/tensor/slice.h @@ -1,5 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +#pragma once #ifndef SHARED_PROVIDER #include "core/common/common.h" @@ -7,28 +8,10 @@ #include "core/util/math_cpuonly.h" #endif +#include "core/providers/cpu/tensor/slice_compute_metadata.h" + namespace onnxruntime { -namespace SliceOp { -struct PrepareForComputeMetadata { - PrepareForComputeMetadata() = delete; - PrepareForComputeMetadata(const std::vector& input_dimensions) - : input_dimensions_(input_dimensions) { - size_t dimension_count = input_dimensions.size(); - starts_.resize(dimension_count, 0); - steps_.resize(dimension_count, 1); - output_dims_ = input_dimensions; - } - - const std::vector& input_dimensions_; - std::vector starts_; - std::vector steps_; - std::vector output_dims_; - std::vector flattened_output_dims_; - std::vector* p_flattened_output_dims_ = &flattened_output_dims_; -}; -} // namespace SliceOp - class SliceBase { // static methods that can be used from other ops if needed public: diff --git a/onnxruntime/core/providers/cpu/tensor/slice_compute_metadata.h b/onnxruntime/core/providers/cpu/tensor/slice_compute_metadata.h new file mode 100644 index 0000000000..0eb37124ce --- /dev/null +++ b/onnxruntime/core/providers/cpu/tensor/slice_compute_metadata.h @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +// This file contains the definition of the PrepareForComputeMetadata for Slice operator +#pragma once + +#include +#include + +namespace onnxruntime { + +namespace SliceOp { +struct PrepareForComputeMetadata { + explicit PrepareForComputeMetadata(const std::vector& input_dimensions) + : input_dimensions_(input_dimensions), + ends_(input_dimensions), + output_dims_(input_dimensions) { + size_t dimension_count = input_dimensions.size(); + starts_.resize(dimension_count, 0); + steps_.resize(dimension_count, 1); + } + + const std::vector& input_dimensions_; + std::vector starts_; + std::vector ends_; + std::vector steps_; + std::vector output_dims_; + std::vector flattened_output_dims_; + std::vector* p_flattened_output_dims_ = &flattened_output_dims_; +}; + +} // namespace SliceOp +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/cpu/tensor/slice_helper.h b/onnxruntime/core/providers/cpu/tensor/slice_helper.h new file mode 100644 index 0000000000..410c9d2c37 --- /dev/null +++ b/onnxruntime/core/providers/cpu/tensor/slice_helper.h @@ -0,0 +1,141 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +// This file contains the functions compute the starts, steps (strides) and output shape +// for Slice op, which can be called from other ops or EPs. +#pragma once +#include "core/providers/cpu/tensor/slice_compute_metadata.h" + +namespace onnxruntime { + +// std::clamp doesn't exist until C++17 so create a local version +template +const T& clamp(const T& v, const T& lo, const T& hi) { + if (v < lo) return lo; + if (v > hi) return hi; + return v; +} + +namespace SliceOp { +// compute output_dims without steps (Slice V1-9 & DynamicSlice) +// Please note this will not Flatten the output shape +inline Status PrepareForComputeHelper(const std::vector& raw_starts, + const std::vector& raw_ends, + const std::vector& raw_axes, + SliceOp::PrepareForComputeMetadata& compute_metadata) { + // Initialize axes to the provided axes attribute or to the default sequence + std::vector axes(raw_axes); + if (axes.empty()) { + //axes are omitted, they are set to[0, ..., ndim - 1] + axes.resize(compute_metadata.starts_.size()); + std::iota(axes.begin(), axes.end(), 0); + } + + // Iterate through the provided axes and override the start/end ranges + std::unordered_set unique_axes; + const auto& dimension_count = compute_metadata.input_dimensions_.size(); + for (size_t axis_index = 0, axes_count = axes.size(); axis_index < axes_count; ++axis_index) { + auto axis = HandleNegativeAxis(axes[axis_index], dimension_count); // handle negative and enforce axis is valid + if (axis >= static_cast(dimension_count) || axis < 0) + return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "'axes' has an axis outside of the tensor dimension count"); + if (unique_axes.find(axis) != unique_axes.end()) + return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "'axes' has duplicates"); + unique_axes.insert(axis); + + // process start + auto start = raw_starts[axis_index]; + if (start < 0) + start += compute_metadata.input_dimensions_[axis]; + compute_metadata.starts_[axis] = clamp(start, int64_t{0}, compute_metadata.input_dimensions_[axis]); + + // process end + auto end = raw_ends[axis_index]; + if (end < 0) + end += compute_metadata.input_dimensions_[axis]; + compute_metadata.ends_[axis] = clamp(end, int64_t{0}, compute_metadata.input_dimensions_[axis]); + + // find output dim value for this axis + auto temp = compute_metadata.ends_[axis] - compute_metadata.starts_[axis]; + if (temp < 0) + compute_metadata.output_dims_[axis] = 0; + else + compute_metadata.output_dims_[axis] = temp; + } + + return Status::OK(); +} + +// compute output_dims with steps (Slice V10) +// Please note this will not Flatten the output shape +inline Status PrepareForComputeHelper(const std::vector& raw_starts, + const std::vector& raw_ends, + const std::vector& raw_axes, + const std::vector& raw_steps, + SliceOp::PrepareForComputeMetadata& compute_metadata) { + // Initialize axes to the provided axes attribute or to the default sequence + std::vector axes(raw_axes); + + if (axes.empty()) { + // axes are omitted, they are set to[0, ..., ndim - 1] + axes.resize(compute_metadata.starts_.size()); + std::iota(axes.begin(), axes.end(), 0); + } + + // Iterate through the provided axes and override the start/end/steps ranges + std::unordered_set unique_axes; + const auto& dimension_count = compute_metadata.input_dimensions_.size(); + for (size_t axis_index = 0, axes_count = axes.size(); axis_index < axes_count; ++axis_index) { + auto axis = axes[axis_index] < 0 ? axes[axis_index] + static_cast(dimension_count) : axes[axis_index]; + if (axis >= static_cast(dimension_count) || axis < 0) + return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "'axes' has an axis outside of the tensor dimension count"); + if (unique_axes.find(axis) != unique_axes.end()) + return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "'axes' has duplicates"); + unique_axes.insert(axis); + + // process step + auto step = axis_index < raw_steps.size() ? raw_steps[axis_index] : 1; + if (step == 0) + return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "'step' value cannot be 0"); + compute_metadata.steps_[axis] = step; + + // process start + auto start = raw_starts[axis_index]; + if (start < 0) + start += compute_metadata.input_dimensions_[axis]; + if (step < 0) + compute_metadata.starts_[axis] = clamp(start, int64_t{0}, compute_metadata.input_dimensions_[axis] - 1); + else + compute_metadata.starts_[axis] = clamp(start, int64_t{0}, compute_metadata.input_dimensions_[axis]); + + // process end + auto end = raw_ends[axis_index]; + // INT_MAX has a special meaning for end according to spec + // equivalent to 'None' in numpy + // it represent slicing to the end of the dimension + if (end == std::numeric_limits::max() || + end == std::numeric_limits::max()) { + end = step < 0 ? -1 : compute_metadata.input_dimensions_[axis]; + } else { + if (end < 0) + end += compute_metadata.input_dimensions_[axis]; + if (step < 0) + end = clamp(end, int64_t{-1}, compute_metadata.input_dimensions_[axis]); + else + end = clamp(end, int64_t{0}, compute_metadata.input_dimensions_[axis]); + } + + compute_metadata.ends_[axis] = end; + + // find output dim value for this axis + auto temp = static_cast(ceil(1.0 * (compute_metadata.ends_[axis] - compute_metadata.starts_[axis]) / step)); + if (temp < 0) + compute_metadata.output_dims_[axis] = 0; + else + compute_metadata.output_dims_[axis] = temp; + } + + return Status::OK(); +} + +} // namespace SliceOp +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc index 868d1f5d73..4646d1cb08 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc @@ -539,8 +539,8 @@ std::string Shape2String(const std::vector& shape) { return os.str(); } -bool CheckIsInitializerTensor(const InitializedTensorSet& initializers, const Node& node, - size_t input_idx, const char* input_name) { +bool CheckIsInitializer(const InitializedTensorSet& initializers, const Node& node, + size_t input_idx, const char* input_name) { if (!Contains(initializers, node.InputDefs()[input_idx]->Name())) { LOGS_DEFAULT(VERBOSE) << input_name << " of " << node.OpType() << " must be an initializer tensor"; return false; diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h index 277025ebf0..71ff61d3e7 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h @@ -133,8 +133,8 @@ std::vector> GetSupportedNodes(const GraphViewer& graph_view std::string Shape2String(const std::vector& shape); // Check the given input is an initializer tensor -bool CheckIsInitializerTensor(const InitializedTensorSet& initializers, const Node& node, - size_t index, const char* input_name) ORT_MUST_USE_RESULT; +bool CheckIsInitializer(const InitializedTensorSet& initializers, const Node& node, + size_t index, const char* input_name) ORT_MUST_USE_RESULT; } // namespace nnapi } // namespace onnxruntime diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc index 6368ee2a5b..9e640f56b3 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc @@ -8,6 +8,7 @@ #include #include "core/providers/shared/utils/utils.h" +#include "core/providers/cpu/tensor/slice_helper.h" #include "helper.h" #include "model_builder.h" #include "op_builder.h" @@ -2536,7 +2537,6 @@ Status MinMaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const #pragma region op_elu class EluOpBuilder : public BaseOpBuilder { - public: private: Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node) const override ORT_MUST_USE_RESULT; }; @@ -2561,6 +2561,179 @@ Status EluOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const No #pragma endregion +#pragma region op_slice + +class SliceOpBuilder : public BaseOpBuilder { + public: + void AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const override; + + private: + Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node) const override ORT_MUST_USE_RESULT; +}; + +void SliceOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const { + // Skip everything except input0 for Slice + const auto input_defs = node.InputDefs(); + model_builder.AddInitializerToSkip(input_defs[1]->Name()); // starts + model_builder.AddInitializerToSkip(input_defs[2]->Name()); // ends + if (input_defs.size() > 3) { + model_builder.AddInitializerToSkip(input_defs[3]->Name()); // axes + if (input_defs.size() > 4) { + model_builder.AddInitializerToSkip(input_defs[4]->Name()); // steps + } + } +} + +Status SliceOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node) const { + auto& shaper(model_builder.GetShaper()); + const auto& operand_indices(model_builder.GetOperandIndices()); + const auto& operand_types(model_builder.GetOperandTypes()); + const auto input_defs = node.InputDefs(); + const auto& input_shape = shaper[input_defs[0]->Name()]; + std::vector input_shape_64(input_shape.cbegin(), input_shape.cend()); + SliceOp::PrepareForComputeMetadata compute_metadata(input_shape_64); + + { + // We need to copy the data from the starts/ends/axes/steps initializers to int64 vectors + // to be used in shared PrepareForCompute function to calculate the output shape + // and normalize inputs, for example, input can be starts/ends/steps for certain axes, + // PrepareForCompute can generate standard starts/ends/steps/axes for each axes + std::vector input_starts; + std::vector input_ends; + std::vector input_axes; + std::vector input_steps; + + const auto CopyInputData = [&node, &model_builder](size_t input_idx, std::vector& data) { + data.clear(); + const auto input_defs = node.InputDefs(); + + // This is an optional input, return empty vector + if (input_defs.size() <= input_idx) + return Status::OK(); + + const auto& input_name = input_defs[input_idx]->Name(); + const auto& initializers(model_builder.GetInitializerTensors()); + + const auto& tensor = *initializers.at(input_name); + std::unique_ptr unpacked_tensor; + size_t tensor_byte_size; + ORT_RETURN_IF_ERROR( + onnxruntime::utils::UnpackInitializerData(tensor, model_builder.GetGraphViewer().ModelPath(), + unpacked_tensor, tensor_byte_size)); + const auto data_type = tensor.data_type(); + if (data_type == ONNX_NAMESPACE::TensorProto_DataType_INT64) { + const int64_t* tensor_data = reinterpret_cast(unpacked_tensor.get()); + size_t size = tensor_byte_size / sizeof(int64_t); + data.insert(data.end(), tensor_data, tensor_data + size); + } else if (data_type == ONNX_NAMESPACE::TensorProto_DataType_INT32) { + const int32_t* tensor_data = reinterpret_cast(unpacked_tensor.get()); + size_t size = tensor_byte_size / sizeof(int32_t); + data.insert(data.end(), tensor_data, tensor_data + size); + } else { + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, + "Data type for starts and ends inputs' is not supported in this build. Got ", + data_type); + } + + return Status::OK(); + }; + + ORT_RETURN_IF_ERROR(CopyInputData(1, input_starts)); + ORT_RETURN_IF_ERROR(CopyInputData(2, input_ends)); + ORT_RETURN_IF_ERROR(CopyInputData(3, input_axes)); + ORT_RETURN_IF_ERROR(CopyInputData(4, input_steps)); + ORT_RETURN_IF_ERROR( + SliceOp::PrepareForComputeHelper(input_starts, input_ends, input_axes, input_steps, compute_metadata)); + } + + // output shape is of type uint32_t, convert from int64 compute_metadata.output_dims_ + Shape nnapi_output_shape; + nnapi_output_shape.reserve(compute_metadata.output_dims_.size()); + std::transform(compute_metadata.output_dims_.cbegin(), compute_metadata.output_dims_.cend(), + std::back_inserter(nnapi_output_shape), + [](int64_t i) { return SafeInt(i); }); + + const auto& input = node.InputDefs()[0]->Name(); + const auto& output = node.OutputDefs()[0]->Name(); + bool output_is_nhwc = model_builder.IsOperandNHWC(input); + + // No shape inference for Slice, everything is calculated here, we only need to add the output shape + // to the shaper + shaper.AddShape(output, nnapi_output_shape); + const OperandType output_operand_type(operand_types.at(input).type, shaper[output]); + + std::vector input_indices; + input_indices.push_back(operand_indices.at(input)); + + // begin/end/strides of ANEURALNETWORKS_STRIDED_SLICE have the same shape + Shape param_dimen = {static_cast(input_shape.size())}; + + // helper function to add begin/end/strides of ANEURALNETWORKS_STRIDED_SLICE + const auto AddOperand = [&model_builder, &node, &input_indices, &operand_indices]( + const char* name, const Shape& shape, const std::vector& param_raw_data) { + std::vector param_data; + param_data.reserve(param_raw_data.size()); + std::transform(param_raw_data.cbegin(), param_raw_data.cend(), + std::back_inserter(param_data), + [](int64_t i) { return SafeInt(i); }); + std::string param_name = model_builder.GetUniqueName(node.Name() + name); + OperandType param_operand_type(Type::TENSOR_INT32, shape); + ORT_RETURN_IF_ERROR( + model_builder.AddOperandFromPersistMemoryBuffer(param_name, param_data.data(), param_operand_type)); + input_indices.push_back(operand_indices.at(param_name)); + return Status::OK(); + }; + + ORT_RETURN_IF_ERROR(AddOperand("starts", param_dimen, compute_metadata.starts_)); //nnapi_begin + + // NNAPI has 2 slice operations + // - ANEURALNETWORKS_SLICE + // Simpler and faster version of slice without steps, available from ANEURALNETWORKS_FEATURE_LEVEL_3 + // Use this one if no step other than 1 is used in ONNX slice + // - ANEURALNETWORKS_STRIDED_SLICE + // More comprehensive version, available from ANEURALNETWORKS_FEATURE_LEVEL_2 + int op_code = ANEURALNETWORKS_STRIDED_SLICE; + if (std::all_of(compute_metadata.steps_.cbegin(), + compute_metadata.steps_.cend(), + [](int64_t i) { return i == 1; }) && + model_builder.GetNNAPIFeatureLevel() > ANEURALNETWORKS_FEATURE_LEVEL_2) { + op_code = ANEURALNETWORKS_SLICE; + // the nnapi size of the slice in this case is the output shape + ORT_RETURN_IF_ERROR(AddOperand("sizes", param_dimen, compute_metadata.output_dims_)); //nnapi_sizes + } else { + // ** The special treatment of ends ** + // The nnapi_end need some special handling, based on the current undocumented design of + // ANEURALNETWORKS_STRIDED_SLICE + // For ORT, for a single axis, after SliceOp::PrepareForCompute, and the step is negative, + // and the last element for slice is at the beginning of the axis (we are slicing backwards) + // The end for this axis will be -1 + // For NNAPI, it is not documented that end can be negative, + // see https://developer.android.com/ndk/reference/group/neural-networks#group___neural_networks_1ggaabbe492c60331b13038e39d4207940e0a89695302f8b1e7ae7ce8f4d8c0b8a752 + // However, the actual NNAPI StridedSlice has some odd implementations, + // See https://android.googlesource.com/platform/frameworks/ml/+/5b525d4d9100819d87447bd2c2a0bcfdd62899ee/nn/common/operations/StridedSlice.cpp#177 + // and, https://android.googlesource.com/platform/frameworks/ml/+/5b525d4d9100819d87447bd2c2a0bcfdd62899ee/nn/common/include/OperationsUtils.h#262 + // If a negative end is no less than -dim (dimension of the axis), it will be treated as an index counting from + // the end, for example, dim = 5, and end = -1, the end will be normalized to 4, which will cause + // incorrect result, so here we have to make the end = -dim - 1 such that it will not be treated as + // an index counting from the end. + std::vector ends = compute_metadata.ends_; + for (size_t i = 0; i < ends.size(); ++i) { + if (ends[i] == -1) { + ends[i] = -static_cast(input_shape[i] + 1); + } + } + ORT_RETURN_IF_ERROR(AddOperand("ends", param_dimen, ends)); //nnapi_end + ORT_RETURN_IF_ERROR(AddOperand("steps", param_dimen, compute_metadata.steps_)); //nnapi_strides + // We do not use the following inputs in ANEURALNETWORKS_STRIDED_SLICE, set them all to 0 + ADD_SCALAR_OPERAND(model_builder, input_indices, 0); // begin_mask + ADD_SCALAR_OPERAND(model_builder, input_indices, 0); // end_mask + ADD_SCALAR_OPERAND(model_builder, input_indices, 0); // shrink_axis_mask + } + return model_builder.AddOperation(op_code, input_indices, {output}, {output_operand_type}, {output_is_nhwc}); +} + +#pragma endregion + #pragma region CreateGetOpBuilders // The reason we use macros to create OpBuilders is for easy exclusion in build if certain op(s) are not used @@ -2579,24 +2752,39 @@ Status EluOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const No static OpBuilderRegistrations CreateOpBuilderRegistrations() { OpBuilderRegistrations op_registrations; + // Builders handle a single op + NNAPI_EP_ADD_SINGLE_OP_BUILDER("BatchNormalization", BatchNormalizationOpBuilder); + NNAPI_EP_ADD_SINGLE_OP_BUILDER("Cast", CastOpBuilder); + NNAPI_EP_ADD_SINGLE_OP_BUILDER("Clip", ClipOpBuilder); + NNAPI_EP_ADD_SINGLE_OP_BUILDER("Concat", ConcatOpBuilder); + NNAPI_EP_ADD_SINGLE_OP_BUILDER("DequantizeLinear", DequantizeLinearOpBuilder); + NNAPI_EP_ADD_SINGLE_OP_BUILDER("Elu", EluOpBuilder); + NNAPI_EP_ADD_SINGLE_OP_BUILDER("Flatten", FlattenOpBuilder); + NNAPI_EP_ADD_SINGLE_OP_BUILDER("Identity", IdentityOpBuilder); + NNAPI_EP_ADD_SINGLE_OP_BUILDER("LRN", LRNOpBuilder); + NNAPI_EP_ADD_SINGLE_OP_BUILDER("QuantizeLinear", QuantizeLinearOpBuilder); + NNAPI_EP_ADD_SINGLE_OP_BUILDER("Relu", ReluOpBuilder); + NNAPI_EP_ADD_SINGLE_OP_BUILDER("Reshape", ReshapeOpBuilder); + NNAPI_EP_ADD_SINGLE_OP_BUILDER("Resize", ResizeOpBuilder); + NNAPI_EP_ADD_SINGLE_OP_BUILDER("Slice", SliceOpBuilder); + NNAPI_EP_ADD_SINGLE_OP_BUILDER("Softmax", SoftMaxOpBuilder); + NNAPI_EP_ADD_SINGLE_OP_BUILDER("Squeeze", SqueezeOpBuilder); + NNAPI_EP_ADD_SINGLE_OP_BUILDER("Transpose", TransposeOpBuilder); + + // Builders shared among similar ops { NNAPI_EP_ADD_SHARED_OP_BUILDER("Add", BinaryOpBuilder); - NNAPI_EP_ADD_SHARED_OP_BUILDER("Sub", BinaryOpBuilder); - NNAPI_EP_ADD_SHARED_OP_BUILDER("Mul", BinaryOpBuilder); NNAPI_EP_ADD_SHARED_OP_BUILDER("Div", BinaryOpBuilder); - NNAPI_EP_ADD_SHARED_OP_BUILDER("QLinearAdd", BinaryOpBuilder); + NNAPI_EP_ADD_SHARED_OP_BUILDER("Mul", BinaryOpBuilder); NNAPI_EP_ADD_SHARED_OP_BUILDER("Pow", BinaryOpBuilder); + NNAPI_EP_ADD_SHARED_OP_BUILDER("QLinearAdd", BinaryOpBuilder); + NNAPI_EP_ADD_SHARED_OP_BUILDER("Sub", BinaryOpBuilder); } - NNAPI_EP_ADD_SINGLE_OP_BUILDER("Relu", ReluOpBuilder); - NNAPI_EP_ADD_SINGLE_OP_BUILDER("Transpose", TransposeOpBuilder); - NNAPI_EP_ADD_SINGLE_OP_BUILDER("Reshape", ReshapeOpBuilder); - NNAPI_EP_ADD_SINGLE_OP_BUILDER("BatchNormalization", BatchNormalizationOpBuilder); - { + NNAPI_EP_ADD_SHARED_OP_BUILDER("AveragePool", PoolOpBuilder); NNAPI_EP_ADD_SHARED_OP_BUILDER("GlobalAveragePool", PoolOpBuilder); NNAPI_EP_ADD_SHARED_OP_BUILDER("GlobalMaxPool", PoolOpBuilder); - NNAPI_EP_ADD_SHARED_OP_BUILDER("AveragePool", PoolOpBuilder); NNAPI_EP_ADD_SHARED_OP_BUILDER("MaxPool", PoolOpBuilder); NNAPI_EP_ADD_SHARED_OP_BUILDER("QLinearAveragePool", PoolOpBuilder); } @@ -2606,10 +2794,6 @@ static OpBuilderRegistrations CreateOpBuilderRegistrations() { NNAPI_EP_ADD_SHARED_OP_BUILDER("QLinearConv", ConvOpBuilder); } - NNAPI_EP_ADD_SINGLE_OP_BUILDER("Cast", CastOpBuilder); - NNAPI_EP_ADD_SINGLE_OP_BUILDER("Softmax", SoftMaxOpBuilder); - NNAPI_EP_ADD_SINGLE_OP_BUILDER("Identity", IdentityOpBuilder); - { NNAPI_EP_ADD_SHARED_OP_BUILDER("Gemm", GemmOpBuilder); NNAPI_EP_ADD_SHARED_OP_BUILDER("MatMul", GemmOpBuilder); @@ -2621,30 +2805,19 @@ static OpBuilderRegistrations CreateOpBuilderRegistrations() { NNAPI_EP_ADD_SHARED_OP_BUILDER("Exp", UnaryOpBuilder); NNAPI_EP_ADD_SHARED_OP_BUILDER("Floor", UnaryOpBuilder); NNAPI_EP_ADD_SHARED_OP_BUILDER("Log", UnaryOpBuilder); - NNAPI_EP_ADD_SHARED_OP_BUILDER("Sigmoid", UnaryOpBuilder); NNAPI_EP_ADD_SHARED_OP_BUILDER("Neg", UnaryOpBuilder); + NNAPI_EP_ADD_SHARED_OP_BUILDER("QLinearSigmoid", UnaryOpBuilder); + NNAPI_EP_ADD_SHARED_OP_BUILDER("Sigmoid", UnaryOpBuilder); NNAPI_EP_ADD_SHARED_OP_BUILDER("Sin", UnaryOpBuilder); NNAPI_EP_ADD_SHARED_OP_BUILDER("Sqrt", UnaryOpBuilder); NNAPI_EP_ADD_SHARED_OP_BUILDER("Tanh", UnaryOpBuilder); - NNAPI_EP_ADD_SHARED_OP_BUILDER("QLinearSigmoid", UnaryOpBuilder); } - NNAPI_EP_ADD_SINGLE_OP_BUILDER("Concat", ConcatOpBuilder); - NNAPI_EP_ADD_SINGLE_OP_BUILDER("Squeeze", SqueezeOpBuilder); - NNAPI_EP_ADD_SINGLE_OP_BUILDER("QuantizeLinear", QuantizeLinearOpBuilder); - NNAPI_EP_ADD_SINGLE_OP_BUILDER("DequantizeLinear", DequantizeLinearOpBuilder); - NNAPI_EP_ADD_SINGLE_OP_BUILDER("LRN", LRNOpBuilder); - NNAPI_EP_ADD_SINGLE_OP_BUILDER("Clip", ClipOpBuilder); - NNAPI_EP_ADD_SINGLE_OP_BUILDER("Resize", ResizeOpBuilder); - NNAPI_EP_ADD_SINGLE_OP_BUILDER("Flatten", FlattenOpBuilder); - { - NNAPI_EP_ADD_SHARED_OP_BUILDER("Min", MinMaxOpBuilder); NNAPI_EP_ADD_SHARED_OP_BUILDER("Max", MinMaxOpBuilder); + NNAPI_EP_ADD_SHARED_OP_BUILDER("Min", MinMaxOpBuilder); } - NNAPI_EP_ADD_SINGLE_OP_BUILDER("Elu", EluOpBuilder); - return op_registrations; } diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc index eecb5ad0f8..dd8f7b68e3 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc @@ -76,7 +76,8 @@ class BaseOpSupportChecker : public IOpSupportChecker { return true; } - virtual int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const { + virtual int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, + const OpSupportCheckParams& /* params */) const { // ANEURALNETWORKS_FEATURE_LEVEL_1 is the baseline version of NNAPI, // There is no NNAPI support for Android API level 26- return ANEURALNETWORKS_FEATURE_LEVEL_1; @@ -319,7 +320,8 @@ class TransposeOpSupportChecker : public BaseOpSupportChecker { bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node, const OpSupportCheckParams& params) const override; - int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override { + int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, + const OpSupportCheckParams& /* params */) const override { return ANEURALNETWORKS_FEATURE_LEVEL_2; } @@ -490,7 +492,8 @@ class PoolOpSupportChecker : public BaseOpSupportChecker { bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node, const OpSupportCheckParams& params) const override; - int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& params) const override { + int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, + const OpSupportCheckParams& params) const override { return params.use_nchw ? ANEURALNETWORKS_FEATURE_LEVEL_3 : ANEURALNETWORKS_FEATURE_LEVEL_2; } @@ -667,7 +670,8 @@ class ConvOpSupportChecker : public BaseOpSupportChecker { bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node, const OpSupportCheckParams& params) const override; - int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& params) const override { + int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, + const OpSupportCheckParams& params) const override { return params.use_nchw ? ANEURALNETWORKS_FEATURE_LEVEL_3 : ANEURALNETWORKS_FEATURE_LEVEL_2; } @@ -775,7 +779,8 @@ class CastOpSupportChecker : public BaseOpSupportChecker { bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node, const OpSupportCheckParams& params) const override; - int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override { + int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, + const OpSupportCheckParams& /* params */) const override { return ANEURALNETWORKS_FEATURE_LEVEL_3; } @@ -805,7 +810,8 @@ class SoftMaxOpSupportChecker : public BaseOpSupportChecker { bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node, const OpSupportCheckParams& params) const override; - int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override { + int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, + const OpSupportCheckParams& /* params */) const override { return ANEURALNETWORKS_FEATURE_LEVEL_2; } }; @@ -1043,7 +1049,8 @@ class UnaryOpSupportChecker : public BaseOpSupportChecker { bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node, const OpSupportCheckParams& params) const override; - int32_t GetMinSupportedNNAPIFeatureLevel(const Node& node, const OpSupportCheckParams& params) const override; + int32_t GetMinSupportedNNAPIFeatureLevel(const Node& node, + const OpSupportCheckParams& params) const override; bool HasSupportedInputsImpl(const Node& node) const override; @@ -1079,8 +1086,8 @@ bool UnaryOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initia return true; } -int32_t UnaryOpSupportChecker::GetMinSupportedNNAPIFeatureLevel( - const Node& node, const OpSupportCheckParams& /* params */) const { +int32_t UnaryOpSupportChecker::GetMinSupportedNNAPIFeatureLevel(const Node& node, + const OpSupportCheckParams& /* params */) const { const auto& op(node.OpType()); if (op == "Abs" || op == "Exp" || @@ -1216,7 +1223,8 @@ class SqueezeOpSupportChecker : public BaseOpSupportChecker { bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node, const OpSupportCheckParams& params) const override; - int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override { + int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, + const OpSupportCheckParams& /* params */) const override { return ANEURALNETWORKS_FEATURE_LEVEL_2; } }; @@ -1255,7 +1263,8 @@ class QuantizeLinearOpSupportChecker : public BaseOpSupportChecker { bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node, const OpSupportCheckParams& params) const override; - int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override { + int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, + const OpSupportCheckParams& /* params */) const override { return ANEURALNETWORKS_FEATURE_LEVEL_3; } }; @@ -1296,7 +1305,8 @@ class DequantizeLinearOpSupportChecker : public BaseOpSupportChecker { bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node, const OpSupportCheckParams& params) const override; - int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override { + int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, + const OpSupportCheckParams& /* params */) const override { return ANEURALNETWORKS_FEATURE_LEVEL_1; } bool HasSupportedInputsImpl(const Node& node) const override; @@ -1340,7 +1350,8 @@ class LRNOpSupportChecker : public BaseOpSupportChecker { bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node, const OpSupportCheckParams& params) const override; - int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override { + int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, + const OpSupportCheckParams& /* params */) const override { return ANEURALNETWORKS_FEATURE_LEVEL_2; } }; @@ -1397,7 +1408,8 @@ class ResizeOpSupportChecker : public BaseOpSupportChecker { bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node, const OpSupportCheckParams& params) const override; - int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override; + int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, + const OpSupportCheckParams& /* params */) const override; // Resize opset 10- is very different than Resize opset 11+, with many key attributes missing // We only support Resize opset 11+ here @@ -1516,7 +1528,8 @@ bool ResizeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initi return true; } -int32_t ResizeOpSupportChecker::GetMinSupportedNNAPIFeatureLevel(const Node& node, const OpSupportCheckParams& /* params */) const { +int32_t ResizeOpSupportChecker::GetMinSupportedNNAPIFeatureLevel(const Node& node, + const OpSupportCheckParams& /* params */) const { int32_t input_type; // This should not happen, but if it happens make sure this will require an impossible version @@ -1590,7 +1603,8 @@ class MinMaxOpSupportChecker : public BaseOpSupportChecker { const std::string& op_type, OpSupportCheckerRegistrations& op_registrations); private: - int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override { + int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, + const OpSupportCheckParams& /* params */) const override { return ANEURALNETWORKS_FEATURE_LEVEL_3; } @@ -1629,7 +1643,8 @@ bool MinMaxOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& /* in class EluOpSupportChecker : public BaseOpSupportChecker { private: - int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override { + int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, + const OpSupportCheckParams& /* params */) const override { return ANEURALNETWORKS_FEATURE_LEVEL_4; } @@ -1639,6 +1654,63 @@ class EluOpSupportChecker : public BaseOpSupportChecker { #pragma endregion +#pragma region op_slice + +class SliceOpSupportChecker : public BaseOpSupportChecker { + private: + int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, + const OpSupportCheckParams& /* params */) const override { + return ANEURALNETWORKS_FEATURE_LEVEL_2; + } + + // We only support slice from opset 10 + int GetMinSupportedOpSet(const Node& /* node */) const override { return 10; } + + bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node, + const OpSupportCheckParams& params) const override; +}; + +bool SliceOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node, + const OpSupportCheckParams& /* params */) const { + Shape input_shape; + if (!GetShape(*node.InputDefs()[0], input_shape)) + return false; + + if (input_shape.size() > 4) { + LOGS_DEFAULT(VERBOSE) << "Slice only supports 1-4d shape, input is " + << input_shape.size() << "d shape"; + return false; + } + + // TODO, replace with std::find when we switch to c++17 + if (std::any_of(input_shape.cbegin(), input_shape.cend(), [](int32_t i) { return i == 0; })) { + LOGS_DEFAULT(VERBOSE) << "Slice doesn't support dynamic input shape"; + return false; + } + + if (!CheckIsInitializer(initializers, node, 1, "starts")) { + return false; + } + if (!CheckIsInitializer(initializers, node, 2, "ends")) { + return false; + } + const auto& input_defs = node.InputDefs(); + if (input_defs.size() > 3) { + if (!CheckIsInitializer(initializers, node, 3, "axes")) { + return false; + } + if (input_defs.size() > 4) { + if (!CheckIsInitializer(initializers, node, 4, "steps")) { + return false; + } + } + } + + return true; +} + +#pragma endregion + #pragma region CreateGetOpSupportCheckers // The reason we use macros to create OpBuilders is for easy exclusion in build if certain op(s) are not used @@ -1657,26 +1729,43 @@ class EluOpSupportChecker : public BaseOpSupportChecker { static OpSupportCheckerRegistrations CreateOpSupportCheckerRegistrations() { OpSupportCheckerRegistrations op_registrations; - { - NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Add", BinaryOpSupportChecker); - NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Sub", BinaryOpSupportChecker); - NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Mul", BinaryOpSupportChecker); - NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Div", BinaryOpSupportChecker); - NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("QLinearAdd", BinaryOpSupportChecker); - NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Pow", BinaryOpSupportChecker); - } + // Support checkers handle a single op + NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("BatchNormalization", BatchNormalizationOpSupportChecker); + NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Cast", CastOpSupportChecker); + NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Clip", ClipOpSupportChecker); + NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Concat", ConcatOpSupportChecker); + NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("DequantizeLinear", DequantizeLinearOpSupportChecker); + NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Elu", EluOpSupportChecker); + NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Flatten", FlattenOpSupportChecker); + NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("LRN", LRNOpSupportChecker); + NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("QuantizeLinear", QuantizeLinearOpSupportChecker); + NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Reshape", ReshapeOpSupportChecker); + NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Resize", ResizeOpSupportChecker); + NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Slice", SliceOpSupportChecker); + NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Softmax", SoftMaxOpSupportChecker); + NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Squeeze", SqueezeOpSupportChecker); + NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Transpose", TransposeOpSupportChecker); + + // Identity is always supported, we use BaseOpSupportChecker as default + NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Identity", BaseOpSupportChecker); // Relu is always supported, we use BaseOpSupportChecker as default NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Relu", BaseOpSupportChecker); - NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Transpose", TransposeOpSupportChecker); - NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Reshape", ReshapeOpSupportChecker); - NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("BatchNormalization", BatchNormalizationOpSupportChecker); + // Support Checkers shared among similar ops + { + NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Add", BinaryOpSupportChecker); + NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Div", BinaryOpSupportChecker); + NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Mul", BinaryOpSupportChecker); + NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Pow", BinaryOpSupportChecker); + NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("QLinearAdd", BinaryOpSupportChecker); + NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Sub", BinaryOpSupportChecker); + } { + NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("AveragePool", PoolOpSupportChecker); NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("GlobalAveragePool", PoolOpSupportChecker); NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("GlobalMaxPool", PoolOpSupportChecker); - NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("AveragePool", PoolOpSupportChecker); NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("MaxPool", PoolOpSupportChecker); NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("QLinearAveragePool", PoolOpSupportChecker); } @@ -1686,12 +1775,6 @@ static OpSupportCheckerRegistrations CreateOpSupportCheckerRegistrations() { NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("QLinearConv", ConvOpSupportChecker); } - NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Cast", CastOpSupportChecker); - NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Softmax", SoftMaxOpSupportChecker); - - // Identity is always supported, we use BaseOpSupportChecker as default - NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Identity", BaseOpSupportChecker); - { NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Gemm", GemmOpSupportChecker); NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("MatMul", GemmOpSupportChecker); @@ -1703,30 +1786,19 @@ static OpSupportCheckerRegistrations CreateOpSupportCheckerRegistrations() { NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Exp", UnaryOpSupportChecker); NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Floor", UnaryOpSupportChecker); NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Log", UnaryOpSupportChecker); - NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Sigmoid", UnaryOpSupportChecker); NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Neg", UnaryOpSupportChecker); + NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("QLinearSigmoid", UnaryOpSupportChecker); + NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Sigmoid", UnaryOpSupportChecker); NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Sin", UnaryOpSupportChecker); NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Sqrt", UnaryOpSupportChecker); NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Tanh", UnaryOpSupportChecker); - NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("QLinearSigmoid", UnaryOpSupportChecker); } - NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Concat", ConcatOpSupportChecker); - NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Squeeze", SqueezeOpSupportChecker); - NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("QuantizeLinear", QuantizeLinearOpSupportChecker); - NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("DequantizeLinear", DequantizeLinearOpSupportChecker); - NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("LRN", LRNOpSupportChecker); - NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Clip", ClipOpSupportChecker); - NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Resize", ResizeOpSupportChecker); - NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Flatten", FlattenOpSupportChecker); - { - NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Min", MinMaxOpSupportChecker); NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Max", MinMaxOpSupportChecker); + NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Min", MinMaxOpSupportChecker); } - NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Elu", EluOpSupportChecker); - return op_registrations; } diff --git a/onnxruntime/test/providers/cpu/tensor/slice_op.test.cc b/onnxruntime/test/providers/cpu/tensor/slice_op.test.cc index a194095a15..4389508fd3 100644 --- a/onnxruntime/test/providers/cpu/tensor/slice_op.test.cc +++ b/onnxruntime/test/providers/cpu/tensor/slice_op.test.cc @@ -19,8 +19,18 @@ void RunSliceTest(const std::vector& input_dims, const std::vector& output_dims, const std::vector& output_vals, bool v10_only = false) { - // V1-9 - ORT_UNUSED_PARAMETER(steps); + std::unordered_set excluded_providers; + + if (!v10_only) + excluded_providers = {kTensorrtExecutionProvider, kOpenVINOExecutionProvider}; + else + excluded_providers = {kTensorrtExecutionProvider}; + + // NNAPI EP does not support empty output + if (std::any_of(output_dims.cbegin(), output_dims.cend(), [](int64_t i) { return i == 0; })) { + excluded_providers.insert(kNnapiExecutionProvider); + } + if (!v10_only) { OpTester testv9("Slice", 9); testv9.AddAttribute("starts", starts); @@ -29,20 +39,27 @@ void RunSliceTest(const std::vector& input_dims, testv9.AddAttribute("axes", axes); testv9.AddInput("data", input_dims, input_vals); testv9.AddOutput("output", output_dims, output_vals); - testv9.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider}); // OpenVINO EP: Disabled temporarily + testv9.Run(OpTester::ExpectResult::kExpectSuccess, "", excluded_providers); // OpenVINO EP: Disabled temporarily } // V10 - OpTester testv10("Slice", 10); - testv10.AddInput("data", input_dims, input_vals); - testv10.AddInput("starts", {static_cast(starts.size())}, starts); - testv10.AddInput("ends", {static_cast(ends.size())}, ends); - if (axes.size() != 0) - testv10.AddInput("axes", {static_cast(axes.size())}, axes); - if (steps.size() != 0) - testv10.AddInput("steps", {static_cast(steps.size())}, steps); - testv10.AddOutput("output", output_dims, output_vals); - testv10.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); + auto run_test = [&](bool only_data_not_initializer) { + OpTester testv10("Slice", 10); + testv10.AddInput("data", input_dims, input_vals); + testv10.AddInput("starts", {static_cast(starts.size())}, starts, only_data_not_initializer); + testv10.AddInput("ends", {static_cast(ends.size())}, ends, only_data_not_initializer); + if (axes.size() != 0) + testv10.AddInput("axes", {static_cast(axes.size())}, axes, only_data_not_initializer); + if (steps.size() != 0) + testv10.AddInput("steps", {static_cast(steps.size())}, steps, only_data_not_initializer); + testv10.AddOutput("output", output_dims, output_vals); + testv10.Run(OpTester::ExpectResult::kExpectSuccess, "", excluded_providers); + }; + + run_test(false); + + // NNAPI EP requires the starts/ends/axes/steps be initializers + run_test(true); } // Slice V1-9 & Slice V10 can both run the following tests