This commit is contained in:
wejoncy 2023-03-10 13:34:08 +08:00 committed by JiCheng
parent cd3173d531
commit 92fabf57ea
4 changed files with 52 additions and 24 deletions

View file

@ -53,6 +53,7 @@ Status BaseOpBuilder::AddToModelBuilder(ModelBuilder& model_builder, const NodeU
};
ORT_RETURN_IF_NOT(IsOpSupported(model_builder.GetInitializerTensors(), node_unit, params),
"Unsupported operator ", node_unit.OpType());
model_builder.SetDebugTrackNode(node_unit.Index());
ORT_RETURN_IF_ERROR(AddToModelBuilderImpl(model_builder, node_unit));
LOGS_DEFAULT(VERBOSE) << "Operator name: [" << node_unit.Name()
<< "] type: [" << node_unit.OpType() << "] was added";

View file

@ -28,9 +28,10 @@ namespace onnxruntime {
namespace nnapi {
ModelBuilder::ModelBuilder(const GraphViewer& graph_viewer, const NnApi& nnapi_handle,
gsl::span<const DeviceWrapper> nnapi_target_devices)
gsl::span<const DeviceWrapper> nnapi_target_devices,
TargetDeviceOption target_device_option)
: nnapi_(nnapi_handle), graph_viewer_(graph_viewer), nnapi_model_{std::make_unique<Model>(nnapi_handle)},
shaper_{graph_viewer}, nnapi_target_devices_(nnapi_target_devices),
shaper_{graph_viewer}, nnapi_target_devices_(nnapi_target_devices), target_device_option_(target_device_option),
nnapi_effective_feature_level_(GetNNAPIEffectiveFeatureLevel(nnapi_handle, nnapi_target_devices_)) {
nnapi_model_->nnapi_effective_feature_level_ = nnapi_effective_feature_level_;
}
@ -475,6 +476,9 @@ Status ModelBuilder::AddOperations() {
Status ModelBuilder::AddOperation(int op, const InlinedVector<uint32_t>& input_indices,
const std::vector<std::string>& output_names,
const std::vector<OperandType>& output_types) {
#ifndef NDEBUG
operations_recorder_.emplace_back(track_node_index_, op);
#endif
InlinedVector<uint32_t> output_indices;
for (size_t i = 0; i < output_types.size(); i++) {
uint32_t index = 0;
@ -548,47 +552,52 @@ Status ModelBuilder::Compile(std::unique_ptr<Model>& model) {
return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
"The model cannot run using current set of target devices, ",
GetDevicesDescription(nnapi_target_devices_));
} else {
// workaround for bugs in Android OS. sometimes ops are passed checking but failed at compilation.
}
// else // no else after return
if (target_device_option_ != TargetDeviceOption::ALL_DEVICES) {
use_create_for_devices = true;
}
}
#ifndef NDEBUG
if (nnapi_target_devices_.size() > 1 && nnapi_target_devices_.back().type == ANEURALNETWORKS_DEVICE_CPU) {
auto* supported_ops = supported_ops_holder.get();
auto supported_ops = gsl::make_span(supported_ops_holder.get(), num_nnapi_ops_);
RETURN_STATUS_ON_ERROR_WITH_NOTE(
nnapi_.ANeuralNetworksModel_getSupportedOperationsForDevices(
nnapi_model_->model_, device_handles.data(),
static_cast<uint32_t>(device_handles.size() - 1), supported_ops),
static_cast<uint32_t>(device_handles.size() - 1), supported_ops.data()),
"on getSupportedOperationsForDevices");
std::unordered_map<std::string, int32_t> optype_support;
const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder();
for (size_t idx = 0; idx < node_indices.size(); idx++) {
auto node_idx = node_indices[idx];
const auto* node(graph_viewer_.GetNode(node_idx));
ORT_ENFORCE(num_nnapi_ops_==operations_recorder_.size(), "num_nnapi_ops_!=operations_recorder_.size()");
std::unordered_map<std::string, std::pair<int32_t, int32_t>> optype_support_status;
for (size_t idx = 0; idx < operations_recorder_.size(); idx++) {
auto [onnx_node_idx, nnapi_idx] = operations_recorder_[idx];
const auto* node(graph_viewer_.GetNode(onnx_node_idx));
auto stat_name = node->OpType() + ".nnapi_op_" + std::to_string(nnapi_idx);
if (!supported_ops[idx]) {
optype_support[node->OpType()]++;
optype_support_status[stat_name].first++;
} else {
optype_support[node->OpType()]--;
optype_support_status[stat_name].second++;
}
}
size_t total_ops = 0;
std::string fb_op_alloc_detail, nm_op_alloc_detail;
std::string fallback_op_detail, normal_op_detail;
for (const auto& [op, count] : optype_support) {
if (count > 0) {
total_ops += count;
fb_op_alloc_detail += std::to_string(count) + "x " + op + ",";
} else {
nm_op_alloc_detail += std::to_string(-count) + "x " + op + ",";
for (const auto& [op, ops_status] : optype_support_status) {
auto& [support_cnt, unspport_cnt] = ops_status;
total_ops += support_cnt + unspport_cnt;
if (support_cnt > 0) {
fallback_op_detail += MakeString(support_cnt, "x ", op, ", ");
} else if (unspport_cnt > 0) {
normal_op_detail += MakeString(unspport_cnt, "x ", op, ", ");
}
}
LOGS_DEFAULT(VERBOSE) << total_ops << " Ops [" << fb_op_alloc_detail << "] out of " << num_nnapi_ops_
LOGS_DEFAULT(VERBOSE) << total_ops << " Ops [" << fallback_op_detail << "] out of " << num_nnapi_ops_
<< " are falling-back to " << kNnapiCpuDeviceName << ", and ["
<< nm_op_alloc_detail << "] are running in accelerators.";
<< normal_op_detail << "] are running in accelerators.";
}
#endif
// When calling ANeuralNetworksCompilation_createForDevices,

View file

@ -6,6 +6,7 @@
#include <unordered_set>
#include "core/common/inlined_containers.h"
#include "core/common/inlined_containers_fwd.h"
#include "core/graph/basic_types.h"
#include "core/providers/nnapi/nnapi_builtin/model.h"
#include "core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksWrapper.h"
@ -31,7 +32,7 @@ class ModelBuilder {
using Shape = Shaper::Shape;
ModelBuilder(const GraphViewer& graph_viewer, const NnApi& nnapi_handle,
gsl::span<const DeviceWrapper> nnapi_target_devices);
gsl::span<const DeviceWrapper> nnapi_target_devices, TargetDeviceOption target_device_option);
common::Status Compile(std::unique_ptr<Model>& model);
@ -104,6 +105,15 @@ class ModelBuilder {
int32_t GetEffectiveFeatureLevel() const { return nnapi_effective_feature_level_; }
// Just for Debugging
void SetDebugTrackNode(const size_t node_index) {
#ifndef NDEBUG
track_node_index_ = node_index;
#else
ORT_UNUSED_PARAMETER(node_index);
#endif
}
private:
const NnApi& nnapi_;
const GraphViewer& graph_viewer_;
@ -147,12 +157,20 @@ class ModelBuilder {
gsl::span<const DeviceWrapper> nnapi_target_devices_;
const TargetDeviceOption target_device_option_;
// feature_level, to decide if we can run this node on NNAPI
int32_t nnapi_effective_feature_level_ = 0;
// The number of nnapi operations in this model
size_t num_nnapi_ops_ = 0;
uint32_t next_index_ = 0;
#ifndef NDEBUG
// tracking current node index for debugging
size_t track_node_index_ = 0;
// recording onnx node index and nnapi operation index.
// An onnx node might be decomposed into multiple nnapi operations
InlinedVector<std::pair<size_t, int32_t>> operations_recorder_;
#endif
// Convert the onnx model to ANeuralNetworksModel
common::Status Prepare();

View file

@ -289,7 +289,7 @@ common::Status NnapiExecutionProvider::Compile(const std::vector<FusedNodeAndGra
Node& fused_node = fused_node_and_graph.fused_node;
const onnxruntime::GraphViewer& graph_viewer(fused_node_and_graph.filtered_graph);
nnapi::ModelBuilder builder(graph_viewer, *nnapi_handle_, nnapi_target_devices_);
nnapi::ModelBuilder builder(graph_viewer, *nnapi_handle_, nnapi_target_devices_, target_device_option_);
builder.SetUseNCHW(nnapi_flags_ & NNAPI_FLAG_USE_NCHW);
builder.SetUseFp16(nnapi_flags_ & NNAPI_FLAG_USE_FP16);