This commit is contained in:
wejoncy 2023-03-07 12:04:42 +08:00 committed by JiCheng
parent 4ca84ac303
commit 18015f0f55
16 changed files with 65 additions and 60 deletions

View file

@ -45,7 +45,7 @@ Status LRNOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const No
const auto& operand_indices(model_builder.GetOperandIndices());
const auto& operand_types(model_builder.GetOperandTypes());
NodeAttrHelper helper(node_unit);
const auto android_feature_level = model_builder.GetEfficientFeatureLevel();
const auto android_feature_level = model_builder.GetEffectiveFeatureLevel();
auto input = node_unit.Inputs()[0].node_arg.Name();
const auto& output = node_unit.Outputs()[0].node_arg.Name();

View file

@ -48,7 +48,7 @@ bool HasExternalInitializer(const InitializedTensorSet& initializers, const Node
Status BaseOpBuilder::AddToModelBuilder(ModelBuilder& model_builder, const NodeUnit& node_unit) const {
OpSupportCheckParams params{
model_builder.GetEfficientFeatureLevel(),
model_builder.GetEffectiveFeatureLevel(),
model_builder.UseNCHW(),
};
ORT_RETURN_IF_NOT(IsOpSupported(model_builder.GetInitializerTensors(), node_unit, params),

View file

@ -245,7 +245,7 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
int32_t fuse_code = model_builder.FindActivation(node_unit);
ADD_SCALAR_OPERAND(model_builder, input_indices, fuse_code);
if (model_builder.GetEfficientFeatureLevel() > ANEURALNETWORKS_FEATURE_LEVEL_2) {
if (model_builder.GetEffectiveFeatureLevel() > ANEURALNETWORKS_FEATURE_LEVEL_2) {
ADD_SCALAR_OPERAND(model_builder, input_indices, use_nchw);
// 1. NNAPI Grouped Conv does not support dilations

View file

@ -39,7 +39,7 @@ Status DepthToSpaceOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
auto& shaper(model_builder.GetShaper());
const auto& operand_indices(model_builder.GetOperandIndices());
const auto& operand_types(model_builder.GetOperandTypes());
const auto android_feature_level = model_builder.GetEfficientFeatureLevel();
const auto android_feature_level = model_builder.GetEffectiveFeatureLevel();
NodeAttrHelper helper(node_unit);
const auto& input = node_unit.Inputs()[0].node_arg.Name();

View file

@ -84,7 +84,7 @@ class GemmOpBuilder : public BaseOpBuilder {
// Add operator related
void GemmOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const {
if (op_builder_helpers::IsSupportedBatchMatMul(node_unit, model_builder.GetEfficientFeatureLevel())) {
if (op_builder_helpers::IsSupportedBatchMatMul(node_unit, model_builder.GetEffectiveFeatureLevel())) {
// no initializers to skip for batch matmul
return;
}
@ -125,7 +125,7 @@ void GemmOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Nod
}
Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const {
if (op_builder_helpers::IsSupportedBatchMatMul(node_unit, model_builder.GetEfficientFeatureLevel())) {
if (op_builder_helpers::IsSupportedBatchMatMul(node_unit, model_builder.GetEffectiveFeatureLevel())) {
return op_builder_helpers::BuildBatchMatMul(model_builder, node_unit);
}

View file

@ -146,7 +146,7 @@ Status PoolOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
ADD_SCALAR_OPERAND(model_builder, input_indices, kernel_shape[0]);
ADD_SCALAR_OPERAND(model_builder, input_indices, fuse_code);
if (model_builder.GetEfficientFeatureLevel() > ANEURALNETWORKS_FEATURE_LEVEL_2) { // nchw only supported on api 29+
if (model_builder.GetEffectiveFeatureLevel() > ANEURALNETWORKS_FEATURE_LEVEL_2) { // nchw only supported on api 29+
ADD_SCALAR_OPERAND(model_builder, input_indices, use_nchw);
}

View file

@ -77,7 +77,7 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
const auto& initializers(model_builder.GetInitializerTensors());
NodeAttrHelper helper(node_unit);
const auto& inputs = node_unit.Inputs();
const auto android_feature_level = model_builder.GetEfficientFeatureLevel();
const auto android_feature_level = model_builder.GetEffectiveFeatureLevel();
const auto& output = node_unit.Outputs()[0].node_arg.Name();
auto input = inputs[0].node_arg.Name();

View file

@ -163,7 +163,7 @@ Status SliceOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
if (std::all_of(compute_metadata.steps_.cbegin(),
compute_metadata.steps_.cend(),
[](int64_t i) { return i == 1; }) &&
model_builder.GetEfficientFeatureLevel() > ANEURALNETWORKS_FEATURE_LEVEL_2) {
model_builder.GetEffectiveFeatureLevel() > ANEURALNETWORKS_FEATURE_LEVEL_2) {
op_code = ANEURALNETWORKS_SLICE;
// the nnapi size of the slice in this case is the output shape
ORT_RETURN_IF_ERROR(AddOperand("sizes", param_dimen, compute_metadata.output_dims_)); // nnapi_sizes

View file

@ -62,7 +62,7 @@ Status SoftMaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, cons
auto& shaper(model_builder.GetShaper());
const auto& operand_indices(model_builder.GetOperandIndices());
const auto& operand_types(model_builder.GetOperandTypes());
const auto android_feature_level = model_builder.GetEfficientFeatureLevel();
const auto android_feature_level = model_builder.GetEffectiveFeatureLevel();
NodeAttrHelper helper(node_unit);
auto input = node_unit.Inputs()[0].node_arg.Name();

View file

@ -28,11 +28,11 @@ namespace onnxruntime {
namespace nnapi {
ModelBuilder::ModelBuilder(const GraphViewer& graph_viewer, const NnApi& nnapi_handle,
const std::vector<DeviceWrapper>& nnapi_target_devices)
gsl::span<const DeviceWrapper> nnapi_target_devices)
: nnapi_(nnapi_handle), graph_viewer_(graph_viewer), nnapi_model_{std::make_unique<Model>(nnapi_handle)},
shaper_{graph_viewer}, nnapi_target_devices_(nnapi_target_devices),
nnapi_target_device_feature_level_(GetNNAPIEffectiveFeatureLevel(nnapi_handle, nnapi_target_devices_)) {
nnapi_model_->nnapi_target_device_feature_level_ = nnapi_target_device_feature_level_;
nnapi_effective_feature_level_(GetNNAPIEffectiveFeatureLevel(nnapi_handle, nnapi_target_devices_)) {
nnapi_model_->nnapi_effective_feature_level_ = nnapi_effective_feature_level_;
}
// Scalar operand is copied into the model, no need to persist
@ -367,10 +367,10 @@ Status ModelBuilder::AddNewNNAPIOperand(const OperandType& operand_type, uint32_
index = next_index_++;
if (operand_type.channelQuant) {
if (nnapi_target_device_feature_level_ < ANEURALNETWORKS_FEATURE_LEVEL_3) {
if (nnapi_effective_feature_level_ < ANEURALNETWORKS_FEATURE_LEVEL_3) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"Per-channel quantization is only supported on Android API level 29+,",
" system NNAPI feature level: ", nnapi_target_device_feature_level_);
" system NNAPI feature level: ", nnapi_effective_feature_level_);
}
RETURN_STATUS_ON_ERROR(nnapi_.ANeuralNetworksModel_setOperandSymmPerChannelQuantParams(
@ -506,7 +506,7 @@ Status ModelBuilder::Compile(std::unique_ptr<Model>& model) {
"on identifyInputsAndOutputs");
// relax fp32tofp16 is only available on API 28+
if (use_fp16_ && nnapi_target_device_feature_level_ > ANEURALNETWORKS_FEATURE_LEVEL_1) {
if (use_fp16_ && nnapi_effective_feature_level_ > ANEURALNETWORKS_FEATURE_LEVEL_1) {
RETURN_STATUS_ON_ERROR_WITH_NOTE(
nnapi_.ANeuralNetworksModel_relaxComputationFloat32toFloat16(
nnapi_model_->model_, true),

View file

@ -31,7 +31,7 @@ class ModelBuilder {
using Shape = Shaper::Shape;
ModelBuilder(const GraphViewer& graph_viewer, const NnApi& nnapi_handle,
const std::vector<DeviceWrapper>& nnapi_target_devices);
gsl::span<const DeviceWrapper> nnapi_target_devices);
common::Status Compile(std::unique_ptr<Model>& model);
@ -102,7 +102,7 @@ class ModelBuilder {
// the given node must be in the underlying graph_viewer
const NodeUnit& GetNodeUnit(const Node* node) const;
int32_t GetEfficientFeatureLevel() const { return nnapi_target_device_feature_level_; }
int32_t GetEffectiveFeatureLevel() const { return nnapi_effective_feature_level_; }
private:
const NnApi& nnapi_;
const GraphViewer& graph_viewer_;
@ -144,10 +144,10 @@ class ModelBuilder {
std::unordered_set<std::string> unique_names_;
const std::vector<DeviceWrapper>& nnapi_target_devices_;
gsl::span<const DeviceWrapper> nnapi_target_devices_;
// feature_level, to decide if we can run this node on NNAPI
int32_t nnapi_target_device_feature_level_ = 0;
int32_t nnapi_effective_feature_level_ = 0;
// The number of nnapi operations in this model
size_t num_nnapi_ops_ = 0;
uint32_t next_index_ = 0;

View file

@ -847,9 +847,9 @@ Status AddSqueezeOp(ModelBuilder& model_builder,
const std::string& node_name,
const std::string& input, const std::string& output,
std::vector<int32_t> axes) {
if (model_builder.GetEfficientFeatureLevel() < ANEURALNETWORKS_FEATURE_LEVEL_2) {
if (model_builder.GetEffectiveFeatureLevel() < ANEURALNETWORKS_FEATURE_LEVEL_2) {
return ORT_MAKE_STATUS(
ONNXRUNTIME, FAIL, "Squeeze is not supported on API level ", model_builder.GetEfficientFeatureLevel());
ONNXRUNTIME, FAIL, "Squeeze is not supported on API level ", model_builder.GetEffectiveFeatureLevel());
}
auto& shaper(model_builder.GetShaper());
@ -1013,7 +1013,7 @@ bool CanSkipReshape(const ModelBuilder& model_builder, const NodeUnit& node_unit
// Now the dest node is Gemm/Matmul, we want to make sure it is supported
OpSupportCheckParams params{
model_builder.GetEfficientFeatureLevel(),
model_builder.GetEffectiveFeatureLevel(),
model_builder.UseNCHW(),
};

View file

@ -7,7 +7,7 @@
#include "core/providers/common.h"
#include "core/providers/nnapi/nnapi_builtin/builders/helper.h"
#include "core/providers/nnapi/nnapi_builtin/nnapi_lib/nnapi_implementation.h"
#include "nnapi_api_helper.h"
#include "core/providers/nnapi/nnapi_builtin/nnapi_api_helper.h"
#ifdef USENNAPISHAREDMEM
#include <sys/mman.h>
@ -90,7 +90,7 @@ size_t Model::GetMappedOutputIdx(const std::string& name) const {
bool Model::SupportsDynamicOutputShape() const {
// dynamic output shape is only supported on Android API level 29+ (ANEURALNETWORKS_FEATURE_LEVEL_3)
return nnapi_target_device_feature_level_ >= ANEURALNETWORKS_FEATURE_LEVEL_3 && dynamic_output_buffer_size_ > 0;
return nnapi_effective_feature_level_ >= ANEURALNETWORKS_FEATURE_LEVEL_3 && dynamic_output_buffer_size_ > 0;
}
Status Model::PrepareForExecution(std::unique_ptr<Execution>& execution) {

View file

@ -1,11 +1,10 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "nnapi_api_helper.h"
#include "core/common/inlined_containers_fwd.h"
#include "core/providers/nnapi/nnapi_builtin/builders/model_builder.h"
#include "core/providers/nnapi/nnapi_builtin/nnapi_lib/nnapi_implementation.h"
#include "core/providers/nnapi/nnapi_builtin/nnapi_api_helper.h"
#include "core/common/logging/logging.h"
#ifdef __ANDROID__
@ -15,23 +14,6 @@
namespace onnxruntime {
namespace nnapi {
/** How feature level works for NNAPI. refer to https://developer.android.com/ndk/reference/group/neural-networks
*
* NNAPI device feature level is closely related to NNAPI runtime feature level
(ANeuralNetworks_getRuntimeFeatureLevel), which indicates an NNAPI runtime feature level
(the most advanced NNAPI specification and features that the runtime implements).
An NNAPI device feature level is always less than or equal to the runtime feature level.
*
* On Android devices with API level 30 and older, the Android API level of the Android device
must be used for NNAPI runtime feature discovery.
Enum values in FeatureLevelCode from feature level 1 to 5 have their
corresponding Android API levels listed in their documentation,
and each such enum value equals the corresponding API level.
This allows using the Android API level as the feature level.
This mapping between enum value and Android API level does not exist for
feature levels after NNAPI feature level 5 and API levels after S (31).
*/
static int32_t GetNNAPIRuntimeFeatureLevel(const NnApi& nnapi_handle) {
int32_t runtime_level = static_cast<int32_t>(nnapi_handle.nnapi_runtime_feature_level);
@ -51,12 +33,12 @@ static int32_t GetNNAPIRuntimeFeatureLevel(const NnApi& nnapi_handle) {
* @return The max feature level support by a set of devices.
*
*/
static int32_t GetDeviceFeatureLevelInternal(const NnApi& nnapi_handle, const std::vector<DeviceWrapper>& device_sets) {
static int32_t GetDeviceFeatureLevelInternal(const NnApi& nnapi_handle, gsl::span<const DeviceWrapper> devices) {
int32_t target_feature_level = GetNNAPIRuntimeFeatureLevel(nnapi_handle);
int64_t devices_feature_level = -1;
for (const auto &device : device_sets) {
for (const auto &device : devices) {
// we want to op run on the device with the highest feature level so we can support more ops.
// and we don't care which device runs them.
devices_feature_level = std::max(device.feature_level, devices_feature_level);
@ -75,7 +57,7 @@ static int32_t GetDeviceFeatureLevelInternal(const NnApi& nnapi_handle, const st
// get all target devices which satisfy the target_device_option
// we will always put CPU device at the end if cpu is enabled
Status GetTargetDevices(const NnApi& nnapi_handle, TargetDeviceOption target_device_option,
std::vector<DeviceWrapper>& device_sets) {
InlinedVector<DeviceWrapper>& devices) {
// GetTargetDevices is only supported when NNAPI runtime feature level >= ANEURALNETWORKS_FEATURE_LEVEL_3
if (GetNNAPIRuntimeFeatureLevel(nnapi_handle) < ANEURALNETWORKS_FEATURE_LEVEL_3)
return Status::OK();
@ -110,9 +92,9 @@ Status GetTargetDevices(const NnApi& nnapi_handle, TargetDeviceOption target_dev
}
if (device_is_cpu) {
cpu_index = static_cast<int32_t>(device_sets.size());
cpu_index = static_cast<int32_t>(devices.size());
}
device_sets.push_back({device, std::string(device_name), device_type, curr_device_feature_level});
devices.push_back({device, std::string(device_name), device_type, curr_device_feature_level});
}
// put CPU device at the end
@ -120,17 +102,16 @@ Status GetTargetDevices(const NnApi& nnapi_handle, TargetDeviceOption target_dev
// and nnapi internally skip the last device if it has already found one.
// 2) we can easily exclude nnapi-reference when not strict excluding CPU.
// 3) we can easily log the detail of how op was assigned on NNAPI devices which is helpful for debugging.
if (cpu_index != -1 && cpu_index != static_cast<int32_t>(device_sets.size()) - 1) {
std::swap(device_sets[device_sets.size() - 1], device_sets[cpu_index]);
if (cpu_index != -1 && cpu_index != static_cast<int32_t>(devices.size()) - 1) {
std::swap(devices[devices.size() - 1], devices[cpu_index]);
}
return Status::OK();
}
std::string GetDeviceDescription(const std::vector<DeviceWrapper>& device_sets) {
std::string GetDeviceDescription(gsl::span<const DeviceWrapper> devices) {
std::string nnapi_target_devices_detail;
for (const auto& device : device_sets) {
for (const auto& device : devices) {
const auto device_detail = MakeString("[Name: [", device.name, "], Type [", device.type, "]], ");
nnapi_target_devices_detail += device_detail + " ,";
}
@ -140,7 +121,7 @@ std::string GetDeviceDescription(const std::vector<DeviceWrapper>& device_sets)
// Get devices-set first and then get the max feature level supported by all target devices
// return -1 if failed. It's not necessary to handle the error here, because level=-1 will refuse all ops
int32_t GetNNAPIEffectiveFeatureLevelFromTargetDeviceOption(const NnApi& nnapi_handle, TargetDeviceOption target_device_option) {
std::vector<DeviceWrapper> nnapi_target_devices;
InlinedVector<DeviceWrapper> nnapi_target_devices;
if (auto st = GetTargetDevices(nnapi_handle, target_device_option, nnapi_target_devices); !st.IsOK()) {
LOGS_DEFAULT(WARNING) << "GetTargetDevices failed for :" << st.ErrorMessage();
return -1;
@ -150,7 +131,7 @@ int32_t GetNNAPIEffectiveFeatureLevelFromTargetDeviceOption(const NnApi& nnapi_h
// get the max feature level supported by all target devices, If no devices are specified,
// it will return the runtime feature level
int32_t GetNNAPIEffectiveFeatureLevel(const NnApi& nnapi_handle, const std::vector<DeviceWrapper>& device_handles) {
int32_t GetNNAPIEffectiveFeatureLevel(const NnApi& nnapi_handle, gsl::span<const DeviceWrapper> device_handles) {
return GetDeviceFeatureLevelInternal(nnapi_handle, device_handles);
}

View file

@ -34,12 +34,35 @@ enum class TargetDeviceOption : int8_t {
};
const char* const nnapi_cpu = ("nnapi-reference");
int32_t GetNNAPIEffectiveFeatureLevel(const NnApi& nnapi_handle, const std::vector<DeviceWrapper>& device_handles);
/** How feature level works for NNAPI. refer to https://developer.android.com/ndk/reference/group/neural-networks
*
* NNAPI device feature level is closely related to NNAPI runtime feature level
(ANeuralNetworks_getRuntimeFeatureLevel), which indicates an NNAPI runtime feature level
(the most advanced NNAPI specification and features that the runtime implements).
An NNAPI device feature level is always less than or equal to the runtime feature level.
*
* On Android devices with API level 30 and older, the Android API level of the Android device
must be used for NNAPI runtime feature discovery.
Enum values in FeatureLevelCode from feature level 1 to 5 have their
corresponding Android API levels listed in their documentation,
and each such enum value equals the corresponding API level.
This allows using the Android API level as the feature level.
This mapping between enum value and Android API level does not exist for
feature levels after NNAPI feature level 5 and API levels after S (31).
*/
int32_t GetNNAPIEffectiveFeatureLevel(const NnApi& nnapi_handle, gsl::span<const DeviceWrapper> device_handles);
/**
* Get all hardware accelerators by different deviceoption.
*
*/
Status GetTargetDevices(const NnApi& nnapi_handle, TargetDeviceOption target_device_option,
std::vector<DeviceWrapper>& nnapi_target_devices);
InlinedVector<DeviceWrapper>& nnapi_target_devices);
int32_t GetNNAPIEffectiveFeatureLevelFromTargetDeviceOption(const NnApi& nnapi_handle, TargetDeviceOption target_device_option);
std::string GetDeviceDescription(const std::vector<DeviceWrapper>& device_sets);
std::string GetDeviceDescription(gsl::span<const DeviceWrapper> devices);
} // namespace nnapi
} // namespace onnxruntime

View file

@ -3,6 +3,7 @@
#pragma once
#include "core/common/inlined_containers_fwd.h"
#include "core/common/optional.h"
#include "core/framework/execution_provider.h"
#include "core/providers/nnapi/nnapi_builtin/nnapi_api_helper.h"
@ -45,7 +46,7 @@ class NnapiExecutionProvider : public IExecutionProvider {
// nnapi handle for either Android NNAPI or x86 hooker.
const nnapi::NnApi* nnapi_handle_ = nullptr;
std::vector<nnapi::DeviceWrapper> nnapi_target_devices_;
InlinedVector<nnapi::DeviceWrapper> nnapi_target_devices_;
nnapi::TargetDeviceOption target_device_option_;
};
} // namespace onnxruntime