[CoreML EP] Use partitioning utils in CoreMLExecutionProvider::GetCapability(). (#8179)

Use partitioning utils in CoreMLExecutionProvider::GetCapability().
This commit is contained in:
Edward Chen 2021-06-30 09:57:36 -07:00 committed by GitHub
parent 4993680e56
commit 665ecdf9ce
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 46 additions and 143 deletions

View file

@ -1,6 +1,9 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "core/providers/coreml/builders/helper.h"
#include <algorithm>
#include <vector>
#ifdef __APPLE__
@ -8,12 +11,10 @@
#include <TargetConditionals.h>
#endif
#include "helper.h"
#include <core/graph/graph_viewer.h>
#include "core/graph/graph_viewer.h"
#include "core/providers/common.h"
#include "core/providers/coreml/builders/op_builder_factory.h"
#include "core/providers/coreml/model/host_utils.h"
#include "op_builder_factory.h"
namespace onnxruntime {
namespace coreml {
@ -74,45 +75,34 @@ bool IsInputSupported(const NodeArg& input, const std::string& parent_name, cons
return true;
}
std::vector<std::vector<NodeIndex>> GetSupportedNodes(const GraphViewer& graph_viewer, const logging::Logger& logger) {
std::vector<std::vector<size_t>> supported_node_groups;
std::unordered_set<const Node*> GetSupportedNodes(const GraphViewer& graph_viewer,
const logging::Logger& logger) {
std::unordered_set<const Node*> supported_nodes{};
if (!util::HasRequiredBaseOS()) {
LOGS(logger, WARNING) << "All ops will fallback to CPU EP, because we do not have supported OS";
return supported_node_groups;
return supported_nodes;
}
for (const auto* input : graph_viewer.GetInputs()) {
if (!IsInputSupported(*input, "graph", logger)) {
return supported_node_groups;
}
const auto& graph_inputs = graph_viewer.GetInputs();
if (std::any_of(graph_inputs.begin(), graph_inputs.end(),
[&](const NodeArg* input) { return !IsInputSupported(*input, "graph", logger); })) {
return supported_nodes;
}
std::vector<size_t> supported_node_group;
const auto& node_indices = graph_viewer.GetNodesInTopologicalOrder();
for (size_t i = 0; i < node_indices.size(); i++) {
auto node_idx = node_indices[i];
const auto* node(graph_viewer.GetNode(node_idx));
bool supported = IsNodeSupported(*node, graph_viewer, logger);
LOGS(logger, VERBOSE) << "Operator type: [" << node->OpType()
<< "] index: [" << node_idx
<< "] name: [" << node->Name()
for (const auto& node : graph_viewer.Nodes()) {
const bool supported = IsNodeSupported(node, graph_viewer, logger);
LOGS(logger, VERBOSE) << "Operator type: [" << node.OpType()
<< "] index: [" << node.Index()
<< "] name: [" << node.Name()
<< "] supported: [" << supported
<< "]";
if (supported) {
supported_node_group.push_back(node_idx);
} else {
if (!supported_node_group.empty()) {
supported_node_groups.push_back(supported_node_group);
supported_node_group.clear();
}
supported_nodes.insert(&node);
}
}
if (!supported_node_group.empty()) {
supported_node_groups.push_back(supported_node_group);
}
return supported_node_groups;
return supported_nodes;
}
bool HasNeuralEngine(const logging::Logger& logger) {

View file

@ -3,8 +3,8 @@
#pragma once
#include <core/common/status.h>
#include <core/graph/basic_types.h>
#include "core/common/status.h"
#include "core/graph/basic_types.h"
namespace onnxruntime {
@ -24,9 +24,9 @@ bool IsInputSupported(const NodeArg& node_arg, const std::string& parent_name, c
bool IsNodeSupported(const Node& node, const GraphViewer& graph_viewer, const logging::Logger& logger);
// Get a list of groups of supported nodes, each group represents a subgraph supported by CoreML EP
std::vector<std::vector<NodeIndex>> GetSupportedNodes(const GraphViewer& graph_viewer,
const logging::Logger& logger);
// Gets the set of nodes that are supported by the CoreML EP.
std::unordered_set<const Node*> GetSupportedNodes(const GraphViewer& graph_viewer,
const logging::Logger& logger);
// CoreML is more efficient running using Apple Neural Engine
// This is to detect if the current system has Apple Neural Engine

View file

@ -1,17 +1,18 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "coreml_execution_provider.h"
#include "core/providers/coreml/coreml_execution_provider.h"
#include "core/framework/allocatormgr.h"
#include "core/framework/compute_capability.h"
#include "core/graph/graph_viewer.h"
#include "core/providers/partitioning_utils.h"
#include "core/session/onnxruntime_cxx_api.h"
#include "model/model.h"
#include "model/host_utils.h"
#include "builders/helper.h"
#include "builders/model_builder.h"
#include "core/providers/coreml/builders/helper.h"
#include "core/providers/coreml/builders/model_builder.h"
#include "core/providers/coreml/model/host_utils.h"
#include "core/providers/coreml/model/model.h"
namespace onnxruntime {
@ -49,121 +50,33 @@ CoreMLExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_vie
return result;
}
/*
Very basic search for groups of nodes that can be handled by the EP.
This doesn't work perfectly if you have a scenario like the following where A and D could be handled by the EP
but B is between them in the topological sort as you'll get two single node capabilities. However if can also
be advantageous if C and E could be handled by the EP as they would be combined with D even though not connected.
Not sure how often each of these scenarios happens.
A B C
| / |
D E
| |
Would probably be better to walk the edges for each node the EP can handle as they are iterated in topological order,
accumulating nodes (and saving which ones have been taken) until you run out. This would guarantee all
connected nodes that can be handled are grouped together.
*/
const auto& logger = *GetLogger();
bool has_neural_engine = coreml::HasNeuralEngine(logger);
const bool has_neural_engine = coreml::HasNeuralEngine(logger);
if ((coreml_flags_ & COREML_FLAG_ONLY_ENABLE_DEVICE_WITH_ANE) && !has_neural_engine) {
LOGS(logger, VERBOSE) << "The current system does not have Apple Neural Engine";
return result;
}
const auto node_groups = coreml::GetSupportedNodes(graph_viewer, logger);
const auto supported_nodes = coreml::GetSupportedNodes(graph_viewer, logger);
if (node_groups.empty()) {
return result;
}
const auto& graph_output_list = graph_viewer.GetOutputs();
std::unordered_set<const NodeArg*> graph_outputs(graph_output_list.cbegin(), graph_output_list.cend());
size_t num_of_supported_nodes = 0;
for (const auto& group : node_groups) {
if (group.empty())
continue;
num_of_supported_nodes += group.size();
LOGS(logger, VERBOSE) << "CoreMLExecutionProvider::GetCapability, current supported node group size: "
<< group.size();
std::unordered_set<NodeIndex> node_set;
node_set.reserve(group.size());
for (const auto& index : group) {
node_set.insert(index);
}
std::unique_ptr<IndexedSubGraph> sub_graph = std::make_unique<IndexedSubGraph>();
std::unordered_set<const NodeArg*> node_outputs;
std::unordered_set<const NodeArg*> subgraph_inputs;
std::unordered_set<const NodeArg*> subgraph_outputs;
std::vector<const NodeArg*> ordered_subgraph_inputs;
std::vector<const NodeArg*> ordered_subgraph_outputs;
for (const auto& index : group) {
sub_graph->nodes.push_back(index);
const auto* node = graph_viewer.GetNode(index);
for (const auto* input : node->InputDefs()) {
// if the node input was not produced by this subgraph, add it to the subgraph inputs.
if (node_outputs.count(input) == 0) {
if (subgraph_inputs.count(input) == 0) {
subgraph_inputs.insert(input);
ordered_subgraph_inputs.push_back(input);
}
}
}
const auto& output_defs = node->OutputDefs();
for (const auto* output_def : output_defs) {
node_outputs.insert(output_def);
// if output is overall graph output we need to produce it.
if (graph_outputs.count(output_def) != 0) {
ordered_subgraph_outputs.push_back(output_def);
}
}
// if output connects to a node not in this subgraph we need to produce it
for (auto it = node->OutputEdgesBegin(), end = node->OutputEdgesEnd(); it != end; ++it) {
if (node_set.count(it->GetNode().Index()) == 0) {
const auto* output_def = output_defs[it->GetSrcArgIndex()];
if (subgraph_outputs.count(output_def) == 0) {
subgraph_outputs.insert(output_def);
ordered_subgraph_outputs.push_back(output_def);
}
}
}
}
// Assign inputs and outputs to subgraph's meta_def
const auto gen_metadef_name = [&]() {
uint64_t model_hash;
int metadef_id = GenerateMetaDefId(graph_viewer, model_hash);
auto meta_def = std::make_unique<::onnxruntime::IndexedSubGraph::MetaDef>();
meta_def->name = "COREML_" + std::to_string(model_hash) + "_" + std::to_string(metadef_id);
meta_def->domain = kMSDomain;
meta_def->since_version = 1;
meta_def->status = ONNX_NAMESPACE::EXPERIMENTAL;
return MakeString(COREML, "_", model_hash, "_", metadef_id);
};
for (const auto& input : ordered_subgraph_inputs) {
meta_def->inputs.push_back(input->Name());
}
result = utils::CreateSupportedPartitions(graph_viewer, supported_nodes, {},
gen_metadef_name, COREML);
for (const auto& output : ordered_subgraph_outputs) {
meta_def->outputs.push_back(output->Name());
}
const auto num_of_partitions = result.size();
const auto num_of_supported_nodes = std::transform_reduce(
result.begin(), result.end(),
size_t{0}, std::plus<>{},
[](const auto& partition) -> size_t {
return partition && partition->sub_graph ? partition->sub_graph->nodes.size() : 0;
});
sub_graph->SetMetaDef(std::move(meta_def));
result.push_back(std::make_unique<ComputeCapability>(std::move(sub_graph)));
}
auto num_of_partitions = result.size();
const auto summary_msg = MakeString(
"CoreMLExecutionProvider::GetCapability,",
" number of partitions supported by CoreML: ", num_of_partitions,