diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/base_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/base_op_builder.cc index 7a18679329..d0c615c6ac 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/base_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/base_op_builder.cc @@ -53,6 +53,7 @@ Status BaseOpBuilder::AddToModelBuilder(ModelBuilder& model_builder, const NodeU }; ORT_RETURN_IF_NOT(IsOpSupported(model_builder.GetInitializerTensors(), node_unit, params), "Unsupported operator ", node_unit.OpType()); + model_builder.SetDebugTrackNode(node_unit.Index()); ORT_RETURN_IF_ERROR(AddToModelBuilderImpl(model_builder, node_unit)); LOGS_DEFAULT(VERBOSE) << "Operator name: [" << node_unit.Name() << "] type: [" << node_unit.OpType() << "] was added"; diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc index e8432cfc01..9e8f494eb7 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc @@ -28,9 +28,10 @@ namespace onnxruntime { namespace nnapi { ModelBuilder::ModelBuilder(const GraphViewer& graph_viewer, const NnApi& nnapi_handle, - gsl::span nnapi_target_devices) + gsl::span nnapi_target_devices, + TargetDeviceOption target_device_option) : nnapi_(nnapi_handle), graph_viewer_(graph_viewer), nnapi_model_{std::make_unique(nnapi_handle)}, - shaper_{graph_viewer}, nnapi_target_devices_(nnapi_target_devices), + shaper_{graph_viewer}, nnapi_target_devices_(nnapi_target_devices), target_device_option_(target_device_option), nnapi_effective_feature_level_(GetNNAPIEffectiveFeatureLevel(nnapi_handle, nnapi_target_devices_)) { nnapi_model_->nnapi_effective_feature_level_ = nnapi_effective_feature_level_; } @@ -475,6 +476,9 @@ Status ModelBuilder::AddOperations() { Status ModelBuilder::AddOperation(int op, const InlinedVector& input_indices, const std::vector& output_names, const std::vector& output_types) { +#ifndef NDEBUG + operations_recorder_.emplace_back(track_node_index_, op); +#endif InlinedVector output_indices; for (size_t i = 0; i < output_types.size(); i++) { uint32_t index = 0; @@ -548,47 +552,52 @@ Status ModelBuilder::Compile(std::unique_ptr& model) { return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "The model cannot run using current set of target devices, ", GetDevicesDescription(nnapi_target_devices_)); - - } else { + // workaround for bugs in Android OS. sometimes ops are passed checking but failed at compilation. + } + // else // no else after return + if (target_device_option_ != TargetDeviceOption::ALL_DEVICES) { use_create_for_devices = true; } } #ifndef NDEBUG if (nnapi_target_devices_.size() > 1 && nnapi_target_devices_.back().type == ANEURALNETWORKS_DEVICE_CPU) { - auto* supported_ops = supported_ops_holder.get(); + auto supported_ops = gsl::make_span(supported_ops_holder.get(), num_nnapi_ops_); RETURN_STATUS_ON_ERROR_WITH_NOTE( nnapi_.ANeuralNetworksModel_getSupportedOperationsForDevices( nnapi_model_->model_, device_handles.data(), - static_cast(device_handles.size() - 1), supported_ops), + static_cast(device_handles.size() - 1), supported_ops.data()), "on getSupportedOperationsForDevices"); - std::unordered_map optype_support; - const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder(); - for (size_t idx = 0; idx < node_indices.size(); idx++) { - auto node_idx = node_indices[idx]; - const auto* node(graph_viewer_.GetNode(node_idx)); + ORT_ENFORCE(num_nnapi_ops_==operations_recorder_.size(), "num_nnapi_ops_!=operations_recorder_.size()"); + std::unordered_map> optype_support_status; + for (size_t idx = 0; idx < operations_recorder_.size(); idx++) { + auto [onnx_node_idx, nnapi_idx] = operations_recorder_[idx]; + const auto* node(graph_viewer_.GetNode(onnx_node_idx)); + auto stat_name = node->OpType() + ".nnapi_op_" + std::to_string(nnapi_idx); + if (!supported_ops[idx]) { - optype_support[node->OpType()]++; + optype_support_status[stat_name].first++; } else { - optype_support[node->OpType()]--; + optype_support_status[stat_name].second++; } } size_t total_ops = 0; - std::string fb_op_alloc_detail, nm_op_alloc_detail; + std::string fallback_op_detail, normal_op_detail; - for (const auto& [op, count] : optype_support) { - if (count > 0) { - total_ops += count; - fb_op_alloc_detail += std::to_string(count) + "x " + op + ","; - } else { - nm_op_alloc_detail += std::to_string(-count) + "x " + op + ","; + for (const auto& [op, ops_status] : optype_support_status) { + auto& [support_cnt, unspport_cnt] = ops_status; + total_ops += support_cnt + unspport_cnt; + if (support_cnt > 0) { + fallback_op_detail += MakeString(support_cnt, "x ", op, ", "); + } else if (unspport_cnt > 0) { + normal_op_detail += MakeString(unspport_cnt, "x ", op, ", "); } } - LOGS_DEFAULT(VERBOSE) << total_ops << " Ops [" << fb_op_alloc_detail << "] out of " << num_nnapi_ops_ + LOGS_DEFAULT(VERBOSE) << total_ops << " Ops [" << fallback_op_detail << "] out of " << num_nnapi_ops_ << " are falling-back to " << kNnapiCpuDeviceName << ", and [" - << nm_op_alloc_detail << "] are running in accelerators."; + << normal_op_detail << "] are running in accelerators."; } #endif // When calling ANeuralNetworksCompilation_createForDevices, diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h index 28d858b388..2c3b9cc815 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h @@ -6,6 +6,7 @@ #include #include "core/common/inlined_containers.h" +#include "core/common/inlined_containers_fwd.h" #include "core/graph/basic_types.h" #include "core/providers/nnapi/nnapi_builtin/model.h" #include "core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksWrapper.h" @@ -31,7 +32,7 @@ class ModelBuilder { using Shape = Shaper::Shape; ModelBuilder(const GraphViewer& graph_viewer, const NnApi& nnapi_handle, - gsl::span nnapi_target_devices); + gsl::span nnapi_target_devices, TargetDeviceOption target_device_option); common::Status Compile(std::unique_ptr& model); @@ -104,6 +105,15 @@ class ModelBuilder { int32_t GetEffectiveFeatureLevel() const { return nnapi_effective_feature_level_; } + // Just for Debugging + void SetDebugTrackNode(const size_t node_index) { +#ifndef NDEBUG + track_node_index_ = node_index; +#else + ORT_UNUSED_PARAMETER(node_index); +#endif + } + private: const NnApi& nnapi_; const GraphViewer& graph_viewer_; @@ -147,12 +157,20 @@ class ModelBuilder { gsl::span nnapi_target_devices_; + const TargetDeviceOption target_device_option_; // feature_level, to decide if we can run this node on NNAPI int32_t nnapi_effective_feature_level_ = 0; // The number of nnapi operations in this model size_t num_nnapi_ops_ = 0; uint32_t next_index_ = 0; +#ifndef NDEBUG + // tracking current node index for debugging + size_t track_node_index_ = 0; + // recording onnx node index and nnapi operation index. + // An onnx node might be decomposed into multiple nnapi operations + InlinedVector> operations_recorder_; +#endif // Convert the onnx model to ANeuralNetworksModel common::Status Prepare(); diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc index 8e58ed00c9..75d3abe982 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc @@ -289,7 +289,7 @@ common::Status NnapiExecutionProvider::Compile(const std::vector