mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-17 21:10:43 +00:00
[CoreML EP] Use partitioning utils in CoreMLExecutionProvider::GetCapability(). (#8179)
Use partitioning utils in CoreMLExecutionProvider::GetCapability().
This commit is contained in:
parent
4993680e56
commit
665ecdf9ce
3 changed files with 46 additions and 143 deletions
|
|
@ -1,6 +1,9 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "core/providers/coreml/builders/helper.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
#ifdef __APPLE__
|
||||
|
|
@ -8,12 +11,10 @@
|
|||
#include <TargetConditionals.h>
|
||||
#endif
|
||||
|
||||
#include "helper.h"
|
||||
#include <core/graph/graph_viewer.h>
|
||||
|
||||
#include "core/graph/graph_viewer.h"
|
||||
#include "core/providers/common.h"
|
||||
#include "core/providers/coreml/builders/op_builder_factory.h"
|
||||
#include "core/providers/coreml/model/host_utils.h"
|
||||
#include "op_builder_factory.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace coreml {
|
||||
|
|
@ -74,45 +75,34 @@ bool IsInputSupported(const NodeArg& input, const std::string& parent_name, cons
|
|||
return true;
|
||||
}
|
||||
|
||||
std::vector<std::vector<NodeIndex>> GetSupportedNodes(const GraphViewer& graph_viewer, const logging::Logger& logger) {
|
||||
std::vector<std::vector<size_t>> supported_node_groups;
|
||||
std::unordered_set<const Node*> GetSupportedNodes(const GraphViewer& graph_viewer,
|
||||
const logging::Logger& logger) {
|
||||
std::unordered_set<const Node*> supported_nodes{};
|
||||
|
||||
if (!util::HasRequiredBaseOS()) {
|
||||
LOGS(logger, WARNING) << "All ops will fallback to CPU EP, because we do not have supported OS";
|
||||
return supported_node_groups;
|
||||
return supported_nodes;
|
||||
}
|
||||
|
||||
for (const auto* input : graph_viewer.GetInputs()) {
|
||||
if (!IsInputSupported(*input, "graph", logger)) {
|
||||
return supported_node_groups;
|
||||
}
|
||||
const auto& graph_inputs = graph_viewer.GetInputs();
|
||||
if (std::any_of(graph_inputs.begin(), graph_inputs.end(),
|
||||
[&](const NodeArg* input) { return !IsInputSupported(*input, "graph", logger); })) {
|
||||
return supported_nodes;
|
||||
}
|
||||
|
||||
std::vector<size_t> supported_node_group;
|
||||
const auto& node_indices = graph_viewer.GetNodesInTopologicalOrder();
|
||||
for (size_t i = 0; i < node_indices.size(); i++) {
|
||||
auto node_idx = node_indices[i];
|
||||
const auto* node(graph_viewer.GetNode(node_idx));
|
||||
bool supported = IsNodeSupported(*node, graph_viewer, logger);
|
||||
LOGS(logger, VERBOSE) << "Operator type: [" << node->OpType()
|
||||
<< "] index: [" << node_idx
|
||||
<< "] name: [" << node->Name()
|
||||
for (const auto& node : graph_viewer.Nodes()) {
|
||||
const bool supported = IsNodeSupported(node, graph_viewer, logger);
|
||||
LOGS(logger, VERBOSE) << "Operator type: [" << node.OpType()
|
||||
<< "] index: [" << node.Index()
|
||||
<< "] name: [" << node.Name()
|
||||
<< "] supported: [" << supported
|
||||
<< "]";
|
||||
if (supported) {
|
||||
supported_node_group.push_back(node_idx);
|
||||
} else {
|
||||
if (!supported_node_group.empty()) {
|
||||
supported_node_groups.push_back(supported_node_group);
|
||||
supported_node_group.clear();
|
||||
}
|
||||
supported_nodes.insert(&node);
|
||||
}
|
||||
}
|
||||
|
||||
if (!supported_node_group.empty()) {
|
||||
supported_node_groups.push_back(supported_node_group);
|
||||
}
|
||||
|
||||
return supported_node_groups;
|
||||
return supported_nodes;
|
||||
}
|
||||
|
||||
bool HasNeuralEngine(const logging::Logger& logger) {
|
||||
|
|
|
|||
|
|
@ -3,8 +3,8 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <core/common/status.h>
|
||||
#include <core/graph/basic_types.h>
|
||||
#include "core/common/status.h"
|
||||
#include "core/graph/basic_types.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
|
||||
|
|
@ -24,9 +24,9 @@ bool IsInputSupported(const NodeArg& node_arg, const std::string& parent_name, c
|
|||
|
||||
bool IsNodeSupported(const Node& node, const GraphViewer& graph_viewer, const logging::Logger& logger);
|
||||
|
||||
// Get a list of groups of supported nodes, each group represents a subgraph supported by CoreML EP
|
||||
std::vector<std::vector<NodeIndex>> GetSupportedNodes(const GraphViewer& graph_viewer,
|
||||
const logging::Logger& logger);
|
||||
// Gets the set of nodes that are supported by the CoreML EP.
|
||||
std::unordered_set<const Node*> GetSupportedNodes(const GraphViewer& graph_viewer,
|
||||
const logging::Logger& logger);
|
||||
|
||||
// CoreML is more efficient running using Apple Neural Engine
|
||||
// This is to detect if the current system has Apple Neural Engine
|
||||
|
|
|
|||
|
|
@ -1,17 +1,18 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "coreml_execution_provider.h"
|
||||
#include "core/providers/coreml/coreml_execution_provider.h"
|
||||
|
||||
#include "core/framework/allocatormgr.h"
|
||||
#include "core/framework/compute_capability.h"
|
||||
#include "core/graph/graph_viewer.h"
|
||||
#include "core/providers/partitioning_utils.h"
|
||||
#include "core/session/onnxruntime_cxx_api.h"
|
||||
|
||||
#include "model/model.h"
|
||||
#include "model/host_utils.h"
|
||||
#include "builders/helper.h"
|
||||
#include "builders/model_builder.h"
|
||||
#include "core/providers/coreml/builders/helper.h"
|
||||
#include "core/providers/coreml/builders/model_builder.h"
|
||||
#include "core/providers/coreml/model/host_utils.h"
|
||||
#include "core/providers/coreml/model/model.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
|
||||
|
|
@ -49,121 +50,33 @@ CoreMLExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_vie
|
|||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
Very basic search for groups of nodes that can be handled by the EP.
|
||||
This doesn't work perfectly if you have a scenario like the following where A and D could be handled by the EP
|
||||
but B is between them in the topological sort as you'll get two single node capabilities. However if can also
|
||||
be advantageous if C and E could be handled by the EP as they would be combined with D even though not connected.
|
||||
Not sure how often each of these scenarios happens.
|
||||
|
||||
A B C
|
||||
| / |
|
||||
D E
|
||||
| |
|
||||
|
||||
Would probably be better to walk the edges for each node the EP can handle as they are iterated in topological order,
|
||||
accumulating nodes (and saving which ones have been taken) until you run out. This would guarantee all
|
||||
connected nodes that can be handled are grouped together.
|
||||
*/
|
||||
|
||||
const auto& logger = *GetLogger();
|
||||
|
||||
bool has_neural_engine = coreml::HasNeuralEngine(logger);
|
||||
const bool has_neural_engine = coreml::HasNeuralEngine(logger);
|
||||
if ((coreml_flags_ & COREML_FLAG_ONLY_ENABLE_DEVICE_WITH_ANE) && !has_neural_engine) {
|
||||
LOGS(logger, VERBOSE) << "The current system does not have Apple Neural Engine";
|
||||
return result;
|
||||
}
|
||||
|
||||
const auto node_groups = coreml::GetSupportedNodes(graph_viewer, logger);
|
||||
const auto supported_nodes = coreml::GetSupportedNodes(graph_viewer, logger);
|
||||
|
||||
if (node_groups.empty()) {
|
||||
return result;
|
||||
}
|
||||
|
||||
const auto& graph_output_list = graph_viewer.GetOutputs();
|
||||
std::unordered_set<const NodeArg*> graph_outputs(graph_output_list.cbegin(), graph_output_list.cend());
|
||||
|
||||
size_t num_of_supported_nodes = 0;
|
||||
for (const auto& group : node_groups) {
|
||||
if (group.empty())
|
||||
continue;
|
||||
|
||||
num_of_supported_nodes += group.size();
|
||||
LOGS(logger, VERBOSE) << "CoreMLExecutionProvider::GetCapability, current supported node group size: "
|
||||
<< group.size();
|
||||
|
||||
std::unordered_set<NodeIndex> node_set;
|
||||
node_set.reserve(group.size());
|
||||
for (const auto& index : group) {
|
||||
node_set.insert(index);
|
||||
}
|
||||
|
||||
std::unique_ptr<IndexedSubGraph> sub_graph = std::make_unique<IndexedSubGraph>();
|
||||
|
||||
std::unordered_set<const NodeArg*> node_outputs;
|
||||
std::unordered_set<const NodeArg*> subgraph_inputs;
|
||||
std::unordered_set<const NodeArg*> subgraph_outputs;
|
||||
std::vector<const NodeArg*> ordered_subgraph_inputs;
|
||||
std::vector<const NodeArg*> ordered_subgraph_outputs;
|
||||
|
||||
for (const auto& index : group) {
|
||||
sub_graph->nodes.push_back(index);
|
||||
const auto* node = graph_viewer.GetNode(index);
|
||||
|
||||
for (const auto* input : node->InputDefs()) {
|
||||
// if the node input was not produced by this subgraph, add it to the subgraph inputs.
|
||||
if (node_outputs.count(input) == 0) {
|
||||
if (subgraph_inputs.count(input) == 0) {
|
||||
subgraph_inputs.insert(input);
|
||||
ordered_subgraph_inputs.push_back(input);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const auto& output_defs = node->OutputDefs();
|
||||
for (const auto* output_def : output_defs) {
|
||||
node_outputs.insert(output_def);
|
||||
// if output is overall graph output we need to produce it.
|
||||
if (graph_outputs.count(output_def) != 0) {
|
||||
ordered_subgraph_outputs.push_back(output_def);
|
||||
}
|
||||
}
|
||||
|
||||
// if output connects to a node not in this subgraph we need to produce it
|
||||
for (auto it = node->OutputEdgesBegin(), end = node->OutputEdgesEnd(); it != end; ++it) {
|
||||
if (node_set.count(it->GetNode().Index()) == 0) {
|
||||
const auto* output_def = output_defs[it->GetSrcArgIndex()];
|
||||
if (subgraph_outputs.count(output_def) == 0) {
|
||||
subgraph_outputs.insert(output_def);
|
||||
ordered_subgraph_outputs.push_back(output_def);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Assign inputs and outputs to subgraph's meta_def
|
||||
const auto gen_metadef_name = [&]() {
|
||||
uint64_t model_hash;
|
||||
int metadef_id = GenerateMetaDefId(graph_viewer, model_hash);
|
||||
auto meta_def = std::make_unique<::onnxruntime::IndexedSubGraph::MetaDef>();
|
||||
meta_def->name = "COREML_" + std::to_string(model_hash) + "_" + std::to_string(metadef_id);
|
||||
meta_def->domain = kMSDomain;
|
||||
meta_def->since_version = 1;
|
||||
meta_def->status = ONNX_NAMESPACE::EXPERIMENTAL;
|
||||
return MakeString(COREML, "_", model_hash, "_", metadef_id);
|
||||
};
|
||||
|
||||
for (const auto& input : ordered_subgraph_inputs) {
|
||||
meta_def->inputs.push_back(input->Name());
|
||||
}
|
||||
result = utils::CreateSupportedPartitions(graph_viewer, supported_nodes, {},
|
||||
gen_metadef_name, COREML);
|
||||
|
||||
for (const auto& output : ordered_subgraph_outputs) {
|
||||
meta_def->outputs.push_back(output->Name());
|
||||
}
|
||||
const auto num_of_partitions = result.size();
|
||||
const auto num_of_supported_nodes = std::transform_reduce(
|
||||
result.begin(), result.end(),
|
||||
size_t{0}, std::plus<>{},
|
||||
[](const auto& partition) -> size_t {
|
||||
return partition && partition->sub_graph ? partition->sub_graph->nodes.size() : 0;
|
||||
});
|
||||
|
||||
sub_graph->SetMetaDef(std::move(meta_def));
|
||||
|
||||
result.push_back(std::make_unique<ComputeCapability>(std::move(sub_graph)));
|
||||
}
|
||||
|
||||
auto num_of_partitions = result.size();
|
||||
const auto summary_msg = MakeString(
|
||||
"CoreMLExecutionProvider::GetCapability,",
|
||||
" number of partitions supported by CoreML: ", num_of_partitions,
|
||||
|
|
|
|||
Loading…
Reference in a new issue