[CoreML EP] Use partitioning utils in CoreMLExecutionProvider::GetCapability(). (#8179)

Use partitioning utils in CoreMLExecutionProvider::GetCapability().
2026-07-02 03:55:34 +00:00 · 2021-06-30 09:57:36 -07:00 · 2021-06-30 09:57:36 -07:00 · 665ecdf9ce
commit 665ecdf9ce
parent 4993680e56
3 changed files with 46 additions and 143 deletions
--- a/onnxruntime/core/providers/coreml/builders/helper.cc
+++ b/onnxruntime/core/providers/coreml/builders/helper.cc
@ -1,6 +1,9 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.

+#include "core/providers/coreml/builders/helper.h"
+
+#include <algorithm>
 #include <vector>

 #ifdef __APPLE__
@ -8,12 +11,10 @@
 #include <TargetConditionals.h>
 #endif

-#include "helper.h"
-#include <core/graph/graph_viewer.h>
-
+#include "core/graph/graph_viewer.h"
 #include "core/providers/common.h"
+#include "core/providers/coreml/builders/op_builder_factory.h"
 #include "core/providers/coreml/model/host_utils.h"
-#include "op_builder_factory.h"

 namespace onnxruntime {
 namespace coreml {
@ -74,45 +75,34 @@ bool IsInputSupported(const NodeArg& input, const std::string& parent_name, cons
  return true;
 }

-std::vector<std::vector<NodeIndex>> GetSupportedNodes(const GraphViewer& graph_viewer, const logging::Logger& logger) {
-  std::vector<std::vector<size_t>> supported_node_groups;
+std::unordered_set<const Node*> GetSupportedNodes(const GraphViewer& graph_viewer,
+                                                  const logging::Logger& logger) {
+  std::unordered_set<const Node*> supported_nodes{};
+
  if (!util::HasRequiredBaseOS()) {
    LOGS(logger, WARNING) << "All ops will fallback to CPU EP, because we do not have supported OS";
-    return supported_node_groups;
+    return supported_nodes;
  }

-  for (const auto* input : graph_viewer.GetInputs()) {
-    if (!IsInputSupported(*input, "graph", logger)) {
-      return supported_node_groups;
-    }
+  const auto& graph_inputs = graph_viewer.GetInputs();
+  if (std::any_of(graph_inputs.begin(), graph_inputs.end(),
+                  [&](const NodeArg* input) { return !IsInputSupported(*input, "graph", logger); })) {
+    return supported_nodes;
  }

-  std::vector<size_t> supported_node_group;
-  const auto& node_indices = graph_viewer.GetNodesInTopologicalOrder();
-  for (size_t i = 0; i < node_indices.size(); i++) {
-    auto node_idx = node_indices[i];
-    const auto* node(graph_viewer.GetNode(node_idx));
-    bool supported = IsNodeSupported(*node, graph_viewer, logger);
-    LOGS(logger, VERBOSE) << "Operator type: [" << node->OpType()
-                          << "] index: [" << node_idx
-                          << "] name: [" << node->Name()
+  for (const auto& node : graph_viewer.Nodes()) {
+    const bool supported = IsNodeSupported(node, graph_viewer, logger);
+    LOGS(logger, VERBOSE) << "Operator type: [" << node.OpType()
+                          << "] index: [" << node.Index()
+                          << "] name: [" << node.Name()
                          << "] supported: [" << supported
                          << "]";
    if (supported) {
-      supported_node_group.push_back(node_idx);
-    } else {
-      if (!supported_node_group.empty()) {
-        supported_node_groups.push_back(supported_node_group);
-        supported_node_group.clear();
-      }
+      supported_nodes.insert(&node);
    }
  }

-  if (!supported_node_group.empty()) {
-    supported_node_groups.push_back(supported_node_group);
-  }
-
-  return supported_node_groups;
+  return supported_nodes;
 }

 bool HasNeuralEngine(const logging::Logger& logger) {
--- a/onnxruntime/core/providers/coreml/builders/helper.h
+++ b/onnxruntime/core/providers/coreml/builders/helper.h
@ -3,8 +3,8 @@

 #pragma once

-#include <core/common/status.h>
-#include <core/graph/basic_types.h>
+#include "core/common/status.h"
+#include "core/graph/basic_types.h"

 namespace onnxruntime {

@ -24,9 +24,9 @@ bool IsInputSupported(const NodeArg& node_arg, const std::string& parent_name, c

 bool IsNodeSupported(const Node& node, const GraphViewer& graph_viewer, const logging::Logger& logger);

-// Get a list of groups of supported nodes, each group represents a subgraph supported by CoreML EP
-std::vector<std::vector<NodeIndex>> GetSupportedNodes(const GraphViewer& graph_viewer,
-                                                      const logging::Logger& logger);
+// Gets the set of nodes that are supported by the CoreML EP.
+std::unordered_set<const Node*> GetSupportedNodes(const GraphViewer& graph_viewer,
+                                                  const logging::Logger& logger);

 // CoreML is more efficient running using Apple Neural Engine
 // This is to detect if the current system has Apple Neural Engine
--- a/onnxruntime/core/providers/coreml/coreml_execution_provider.cc
+++ b/onnxruntime/core/providers/coreml/coreml_execution_provider.cc
@ -1,17 +1,18 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.

-#include "coreml_execution_provider.h"
+#include "core/providers/coreml/coreml_execution_provider.h"

 #include "core/framework/allocatormgr.h"
 #include "core/framework/compute_capability.h"
 #include "core/graph/graph_viewer.h"
+#include "core/providers/partitioning_utils.h"
 #include "core/session/onnxruntime_cxx_api.h"

-#include "model/model.h"
-#include "model/host_utils.h"
-#include "builders/helper.h"
-#include "builders/model_builder.h"
+#include "core/providers/coreml/builders/helper.h"
+#include "core/providers/coreml/builders/model_builder.h"
+#include "core/providers/coreml/model/host_utils.h"
+#include "core/providers/coreml/model/model.h"

 namespace onnxruntime {

@ -49,121 +50,33 @@ CoreMLExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_vie
    return result;
  }

-  /*
-  Very basic search for groups of nodes that can be handled by the EP.
-  This doesn't work perfectly if you have a scenario like the following where A and D could be handled by the EP
-  but B is between them in the topological sort as you'll get two single node capabilities. However if can also
-  be advantageous if C and E could be handled by the EP as they would be combined with D even though not connected.
-  Not sure how often each of these scenarios happens.
-
-    A  B  C
-    | /   |
-    D     E
-    |     |
-
-  Would probably be better to walk the edges for each node the EP can handle as they are iterated in topological order,
-  accumulating nodes (and saving which ones have been taken) until you run out. This would guarantee all
-  connected nodes that can be handled are grouped together.
-  */
-
  const auto& logger = *GetLogger();

-  bool has_neural_engine = coreml::HasNeuralEngine(logger);
+  const bool has_neural_engine = coreml::HasNeuralEngine(logger);
  if ((coreml_flags_ & COREML_FLAG_ONLY_ENABLE_DEVICE_WITH_ANE) && !has_neural_engine) {
    LOGS(logger, VERBOSE) << "The current system does not have Apple Neural Engine";
    return result;
  }

-  const auto node_groups = coreml::GetSupportedNodes(graph_viewer, logger);
+  const auto supported_nodes = coreml::GetSupportedNodes(graph_viewer, logger);

-  if (node_groups.empty()) {
-    return result;
-  }
-
-  const auto& graph_output_list = graph_viewer.GetOutputs();
-  std::unordered_set<const NodeArg*> graph_outputs(graph_output_list.cbegin(), graph_output_list.cend());
-
-  size_t num_of_supported_nodes = 0;
-  for (const auto& group : node_groups) {
-    if (group.empty())
-      continue;
-
-    num_of_supported_nodes += group.size();
-    LOGS(logger, VERBOSE) << "CoreMLExecutionProvider::GetCapability, current supported node group size: "
-                          << group.size();
-
-    std::unordered_set<NodeIndex> node_set;
-    node_set.reserve(group.size());
-    for (const auto& index : group) {
-      node_set.insert(index);
-    }
-
-    std::unique_ptr<IndexedSubGraph> sub_graph = std::make_unique<IndexedSubGraph>();
-
-    std::unordered_set<const NodeArg*> node_outputs;
-    std::unordered_set<const NodeArg*> subgraph_inputs;
-    std::unordered_set<const NodeArg*> subgraph_outputs;
-    std::vector<const NodeArg*> ordered_subgraph_inputs;
-    std::vector<const NodeArg*> ordered_subgraph_outputs;
-
-    for (const auto& index : group) {
-      sub_graph->nodes.push_back(index);
-      const auto* node = graph_viewer.GetNode(index);
-
-      for (const auto* input : node->InputDefs()) {
-        // if the node input was not produced by this subgraph, add it to the subgraph inputs.
-        if (node_outputs.count(input) == 0) {
-          if (subgraph_inputs.count(input) == 0) {
-            subgraph_inputs.insert(input);
-            ordered_subgraph_inputs.push_back(input);
-          }
-        }
-      }
-
-      const auto& output_defs = node->OutputDefs();
-      for (const auto* output_def : output_defs) {
-        node_outputs.insert(output_def);
-        // if output is overall graph output we need to produce it.
-        if (graph_outputs.count(output_def) != 0) {
-          ordered_subgraph_outputs.push_back(output_def);
-        }
-      }
-
-      // if output connects to a node not in this subgraph we need to produce it
-      for (auto it = node->OutputEdgesBegin(), end = node->OutputEdgesEnd(); it != end; ++it) {
-        if (node_set.count(it->GetNode().Index()) == 0) {
-          const auto* output_def = output_defs[it->GetSrcArgIndex()];
-          if (subgraph_outputs.count(output_def) == 0) {
-            subgraph_outputs.insert(output_def);
-            ordered_subgraph_outputs.push_back(output_def);
-          }
-        }
-      }
-    }
-
-    // Assign inputs and outputs to subgraph's meta_def
+  const auto gen_metadef_name = [&]() {
    uint64_t model_hash;
    int metadef_id = GenerateMetaDefId(graph_viewer, model_hash);
-    auto meta_def = std::make_unique<::onnxruntime::IndexedSubGraph::MetaDef>();
-    meta_def->name = "COREML_" + std::to_string(model_hash) + "_" + std::to_string(metadef_id);
-    meta_def->domain = kMSDomain;
-    meta_def->since_version = 1;
-    meta_def->status = ONNX_NAMESPACE::EXPERIMENTAL;
+    return MakeString(COREML, "_", model_hash, "_", metadef_id);
+  };

-    for (const auto& input : ordered_subgraph_inputs) {
-      meta_def->inputs.push_back(input->Name());
-    }
+  result = utils::CreateSupportedPartitions(graph_viewer, supported_nodes, {},
+                                            gen_metadef_name, COREML);

-    for (const auto& output : ordered_subgraph_outputs) {
-      meta_def->outputs.push_back(output->Name());
-    }
+  const auto num_of_partitions = result.size();
+  const auto num_of_supported_nodes = std::transform_reduce(
+      result.begin(), result.end(),
+      size_t{0}, std::plus<>{},
+      [](const auto& partition) -> size_t {
+        return partition && partition->sub_graph ? partition->sub_graph->nodes.size() : 0;
+      });

-    sub_graph->SetMetaDef(std::move(meta_def));
-
-    result.push_back(std::make_unique<ComputeCapability>(std::move(sub_graph)));
-  }
-
-  auto num_of_partitions = result.size();
  const auto summary_msg = MakeString(
      "CoreMLExecutionProvider::GetCapability,",
      " number of partitions supported by CoreML: ", num_of_partitions,