CoreML: ML Program Slice (#21433)

### Description  Add support for Slice ### Motivation and Context  High priority models.
2026-07-07 17:15:29 +00:00 · 2024-07-23 20:21:55 +10:00 · 2024-07-23 20:21:55 +10:00 · 0f1f3b7705
commit 0f1f3b7705
parent dd010edb37
7 changed files with 128 additions and 38 deletions
--- a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc
@ -164,6 +164,7 @@ void SetTensorTypeInfo(MILSpec::TensorType& tensor_type, MILSpec::DataType data_
 void SetTensorTypeInfo(MILSpec::TensorType& tensor_type, MILSpec::DataType data_type,
                       const ONNX_NAMESPACE::TensorShapeProto* shape, bool convert_scalar = false) {
  tensor_type.set_datatype(data_type);
+
  if (shape) {
    auto rank = shape->dim_size();
    if (convert_scalar && rank == 0) {
@ -313,7 +314,8 @@ void AddOperationInput(MILSpec::Operation& op, std::string_view input_name, std:
  (*op.mutable_inputs())[input_name] = std::move(arg);
 }

-void AddOperationOutput(COREML_SPEC::MILSpec::Operation& op, const NodeArg& output) {
+void AddOperationOutput(COREML_SPEC::MILSpec::Operation& op, const NodeArg& output,
+                        std::optional<int32_t> override_element_type) {
  auto& outputs = *op.mutable_outputs();
  auto& output_arg = *outputs.Add();
  output_arg.set_name(output.Name());
@ -321,8 +323,10 @@ void AddOperationOutput(COREML_SPEC::MILSpec::Operation& op, const NodeArg& outp
  MILSpec::ValueType& value = *output_arg.mutable_type();
  MILSpec::TensorType& tensor_type = *value.mutable_tensortype();

-  SetTensorTypeInfo(tensor_type, OnnxDataTypeToMILSpec(output.TypeAsProto()->tensor_type().elem_type()),
-                    output.Shape(), /*convert_scalar*/ true);
+  auto elem_type = override_element_type ? *override_element_type
+                                         : output.TypeAsProto()->tensor_type().elem_type();
+
+  SetTensorTypeInfo(tensor_type, OnnxDataTypeToMILSpec(elem_type), output.Shape(), /*convert_scalar*/ true);
 }

 void AddPadTypeAndPads(COREML_SPEC::MILSpec::Operation& op, ModelBuilder& model_builder, std::string_view op_type,
--- a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h
+++ b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h
@ -134,7 +134,12 @@ void AddOperationInput(COREML_SPEC::MILSpec::Operation& op,
 /// </summary>
 /// <param name="op">Operation to update.</param>
 /// <param name="output">NodeArg with details of output to add.</param>
-void AddOperationOutput(COREML_SPEC::MILSpec::Operation& op, const NodeArg& output);
+/// <param name="override_element_type">
+///   Override the element type. Only set to handle cases where we believe the data at runtime will be int32 but
+///   the original ONNX node has type int64.
+/// </param>
+void AddOperationOutput(COREML_SPEC::MILSpec::Operation& op, const NodeArg& output,
+                        std::optional<int32_t> override_element_type = std::nullopt);

 /// <summary>
 /// Add pad_type and pad values.
--- a/onnxruntime/core/providers/coreml/builders/impl/slice_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/slice_op_builder.cc
@ -4,6 +4,7 @@
 #include "core/optimizer/initializer.h"
 #include "core/providers/coreml/builders/helper.h"
 #include "core/providers/coreml/builders/impl/base_op_builder.h"
+#include "core/providers/coreml/builders/impl/builder_utils.h"
 #include "core/providers/coreml/builders/model_builder.h"
 #include "core/providers/coreml/builders/op_builder_factory.h"
 #include "core/providers/coreml/shape_utils.h"
@ -28,12 +29,14 @@ class SliceOpBuilder : public BaseOpBuilder {

  bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& builder_params,
                         const logging::Logger& logger) const override;
+
+  bool SupportsMLProgram() const override { return true; }
 };

 namespace {
-Status PrepareSliceComputeMetadataFromConstantInitializers(const Node& slice_node,
-                                                           const GraphViewer& graph_viewer,
-                                                           SliceOp::PrepareForComputeMetadata& compute_metadata) {
+Status PrepareSliceComputeMetadata(const Node& slice_node,
+                                   const GraphViewer& graph_viewer,
+                                   SliceOp::PrepareForComputeMetadata& compute_metadata) {
  // TODO largely copied from nnapi::SliceOpBuilder::AddToModelBuilderImpl. put it somewhere where it can be reused?

  const auto input_defs = slice_node.InputDefs();
@ -114,55 +117,113 @@ void SliceOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const No

 Status SliceOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                             const logging::Logger& logger) const {
+  const auto& input_defs = node.InputDefs();
+  const auto& output_defs = node.OutputDefs();
+
  std::vector<int64_t> data_shape;
  ORT_RETURN_IF_NOT(GetStaticShape(*node.InputDefs()[0], data_shape, logger), "Failed to get input shape.");
+  auto rank = data_shape.size();

  SliceOp::PrepareForComputeMetadata compute_metadata{data_shape};
-  ORT_RETURN_IF_ERROR(PrepareSliceComputeMetadataFromConstantInitializers(node, model_builder.GetGraphViewer(),
-                                                                          compute_metadata));
+  ORT_RETURN_IF_ERROR(PrepareSliceComputeMetadata(node, model_builder.GetGraphViewer(), compute_metadata));

-  auto layer = model_builder.CreateNNLayer(node);
-  *layer->mutable_input()->Add() = node.InputDefs()[0]->Name();
-  *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name();
-  auto* slice_static = layer->mutable_slicestatic();
+#if defined(COREML_ENABLE_MLPROGRAM)
+  if (model_builder.CreateMLProgram()) {
+    using namespace CoreML::Specification::MILSpec;  // NOLINT
+    // https://apple.github.io/coremltools/source/coremltools.converters.mil.mil.ops.defs.html#coremltools.converters.mil.mil.ops.defs.iOS15.tensor_transformation.slice_by_index

-  for (size_t i = 0; i < compute_metadata.starts_.size(); ++i) {
-    const auto step = compute_metadata.steps_[i],
-               start = compute_metadata.starts_[i],
-               end = compute_metadata.ends_[i];
+    const InlinedVector<bool> begin_mask_values(rank, false);
+    InlinedVector<bool> end_mask_values(rank, false);

-    slice_static->add_beginids(start);
-    slice_static->add_beginmasks(false);
-
-    if (step < 0 && end == -1) {
-      // Special case - stepping backwards up to and including the first index in the dimension.
-      // In ONNX Slice, we use end <= -(rank + 1) to represent this. In CoreML, setting endids like that doesn't work,
-      // so use endmasks to specify the rest of the dimension instead.
-      slice_static->add_endids(-1);  // ignored
-      slice_static->add_endmasks(true);
-    } else {
-      slice_static->add_endids(end);
-      slice_static->add_endmasks(false);
+    // Special case - stepping backwards up to and including the first index in the dimension.
+    // In ONNX Slice, we use end <= -(rank + 1) to represent this. In CoreML, setting endids like that doesn't work,
+    // so use endmasks to specify the rest of the dimension instead.
+    for (size_t i = 0; i < rank; ++i) {
+      if (compute_metadata.steps_[i] < 0 && compute_metadata.ends_[i] == -1) {
+        end_mask_values[i] = true;
+      }
    }

-    slice_static->add_strides(step);
+    // Only int32 and float are supported by CoreML slice_by_index.
+    // We convert any int64 model input to int32 when running the CoreML model for the partition.
+    // Any other integer data created at runtime is the output from CoreML operations, and should int32 not int64.
+    // Based on that, we assume that the actual input when running will be int32, so we override the output data
+    // type to reflect this.
+    // If we were to leave it as TensorProto_DataType_INT64 the CoreML model would be invalid.
+    std::optional<int32_t> output_datatype;
+
+    int32_t input_type;
+    ORT_RETURN_IF_NOT(GetType(*node.InputDefs()[0], input_type, logger), "Failed to get input type");
+
+    if (input_type == ONNX_NAMESPACE::TensorProto_DataType_INT64) {
+      output_datatype = ONNX_NAMESPACE::TensorProto_DataType_INT32;
+    }
+
+    auto op = model_builder.CreateOperation(node, "slice_by_index");
+
+    auto begin = model_builder.AddConstant(op->type(), "begin", AsSpan(compute_metadata.starts_));
+    auto end = model_builder.AddConstant(op->type(), "end", AsSpan(compute_metadata.ends_));
+    auto stride = model_builder.AddConstant(op->type(), "stride", AsSpan(compute_metadata.steps_));
+    auto begin_mask = model_builder.AddConstant(op->type(), "begin_mask", AsSpan(begin_mask_values));
+    auto end_mask = model_builder.AddConstant(op->type(), "end_mask", AsSpan(end_mask_values));
+
+    AddOperationInput(*op, "x", input_defs[0]->Name());
+    AddOperationInput(*op, "begin", begin);
+    AddOperationInput(*op, "end", end);
+    AddOperationInput(*op, "stride", stride);
+    AddOperationInput(*op, "begin_mask", begin_mask);
+    AddOperationInput(*op, "end_mask", end_mask);
+
+    AddOperationOutput(*op, *output_defs[0], output_datatype);
+
+    model_builder.AddOperation(std::move(op));
+
+  } else  // NOLINT
+#endif    // defined(COREML_ENABLE_MLPROGRAM)
+  {
+    auto layer = model_builder.CreateNNLayer(node);
+    *layer->mutable_input()->Add() = input_defs[0]->Name();
+    *layer->mutable_output()->Add() = output_defs[0]->Name();
+    auto* slice_static = layer->mutable_slicestatic();
+
+    for (size_t i = 0; i < rank; ++i) {
+      const auto step = compute_metadata.steps_[i],
+                 start = compute_metadata.starts_[i],
+                 end = compute_metadata.ends_[i];
+
+      slice_static->add_beginids(start);
+      slice_static->add_beginmasks(false);
+
+      if (step < 0 && end == -1) {
+        // Special case - stepping backwards up to and including the first index in the dimension.
+        // In ONNX Slice, we use end <= -(rank + 1) to represent this. In CoreML, setting endids like that doesn't work,
+        // so use endmasks to specify the rest of the dimension instead.
+        slice_static->add_endids(-1);  // ignored
+        slice_static->add_endmasks(true);
+      } else {
+        slice_static->add_endids(end);
+        slice_static->add_endmasks(false);
+      }
+
+      slice_static->add_strides(step);
+    }
+
+    model_builder.AddLayer(std::move(layer));
  }

-  model_builder.AddLayer(std::move(layer));
  return Status::OK();
 }

 bool SliceOpBuilder::HasSupportedInputsImpl(const Node& node, const OpBuilderInputParams& /*input_params*/,
                                            const logging::Logger& logger) const {
  int32_t input_type;
-  if (!GetType(*node.InputDefs()[0], input_type, logger))
+  if (!GetType(*node.InputDefs()[0], input_type, logger)) {
    return false;
+  }

  if (input_type != ONNX_NAMESPACE::TensorProto_DataType_FLOAT &&
      input_type != ONNX_NAMESPACE::TensorProto_DataType_INT64) {
-    LOGS(logger, VERBOSE) << "[" << node.OpType()
-                          << "] Input type: [" << input_type
-                          << "] is not supported for now";
+    LOGS(logger, VERBOSE) << "[" << node.OpType() << "] Input type: [" << input_type << "] is not supported";
    return false;
  }

@ -197,9 +258,14 @@ bool SliceOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputPar
  }

  SliceOp::PrepareForComputeMetadata compute_metadata{data_shape};
-  ORT_THROW_IF_ERROR(PrepareSliceComputeMetadataFromConstantInitializers(node, builder_params.graph_viewer,
-                                                                         compute_metadata));
+  auto status = PrepareSliceComputeMetadata(node, builder_params.graph_viewer, compute_metadata);
+  if (status != Status::OK()) {
+    LOGS(logger, VERBOSE) << "PrepareSliceComputeMetadata failed:" << status.ErrorMessage();
+    return false;
+  }
+
  if (!ValidateSliceComputeMetadataForCoreML(compute_metadata, logger)) {
+    // error logged in ValidateSliceComputeMetadataForCoreML
    return false;
  }

--- a/onnxruntime/core/providers/coreml/builders/model_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/model_builder.cc
@ -839,6 +839,13 @@ Status ModelBuilder::RegisterModelInputOutput(const NodeArg& node_arg, bool is_i
    if (is_input) {
      // the model inputs need to be wired up as args to the 'main' function.
      auto tensor_value_type = CreateNamedTensorValueType(node_arg, /*convert_scalar*/ true);
+
+      // we need to convert int64 to int32 here as well
+      if (data_type == ONNX_NAMESPACE::TensorProto_DataType_INT64) {
+        tensor_value_type.mutable_type()->mutable_tensortype()->set_datatype(
+            OnnxDataTypeToMILSpec(ONNX_NAMESPACE::TensorProto_DataType_INT32));
+      }
+
      tensor_value_type.set_name(name);

      mlprogram_main_fn_->mutable_inputs()->Add(std::move(tensor_value_type));
--- a/onnxruntime/core/providers/coreml/builders/model_builder.h
+++ b/onnxruntime/core/providers/coreml/builders/model_builder.h
@ -121,6 +121,13 @@ class ModelBuilder {
    return AddConstant(op_type, value_type, AsSpan(value), shape);
  }

+  // helper to convert a span of non-const data to const
+  template <typename T>
+  std::string_view AddConstant(std::string_view op_type, std::string_view value_type, gsl::span<T> value,
+                               std::optional<gsl::span<const int64_t>> shape = std::nullopt) {
+    return AddConstant(op_type, value_type, gsl::span<const T>(value), shape);
+  }
+
  /// <summary>
  /// Add a scalar value as a 'const' operation. See AddConstant for details.
  /// </summary>
--- a/onnxruntime/test/providers/cpu/tensor/slice_op.test.cc
+++ b/onnxruntime/test/providers/cpu/tensor/slice_op.test.cc
@ -90,7 +90,7 @@ void RunSliceTest(const std::vector<int64_t>& input_dims,

  run_test(false);

-  // NNAPI EP requires the starts/ends/axes/steps be initializers
+  // EPs like NNAPI and CoreML require the starts/ends/axes/steps be initializers
  run_test(true);
 }

--- a/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md
+++ b/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md
@ -18,6 +18,7 @@ Keep in sync with doco generated from /docs/execution-providers/CoreML-Execution
 |ai.onnx:Relu||
 |ai.onnx:Reshape||
 |ai.onnx:Resize|See [resize_op_builder.cc](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/core/providers/coreml/builders/impl/resize_op_builder.cc) implementation. There are too many permutations to describe the valid combinations.|
+|ai.onnx.Slice|starts/ends/axes/steps must be constant initializers.|
 |ai.onnx:Sub||
 |ai.onnx:Sigmoid||
 |ai:onnx:Tanh||