[CoreML EP] Add Resize Support (#7015)

* code placeholders * Add previously missing comments * [CoreML EP] Add Resize Support
2026-05-14 20:48:00 +00:00 · 2021-03-17 23:27:41 -07:00 · 2021-03-17 23:27:41 -07:00 · 7c7d6debe6
commit 7c7d6debe6
parent 514444d820
11 changed files with 328 additions and 27 deletions
--- a/onnxruntime/core/providers/coreml/builders/impl/pool_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/pool_op_builder.cc
@ -137,6 +137,9 @@ bool PoolOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& /* initializer
      return false;
    }

+    // TODO, add support of the ceil_mode by adjusting the padding
+    // See https://stackoverflow.com/questions/59906456/in-pytorchs-maxpool2d-is-padding-added-depending-on-ceil-mode
+    // and https://github.com/apple/coremltools/blob/1931758aae383c83daddfc56f11a24a9d2bf4b87/coremltools/converters/mil/frontend/torch/ops.py#L621-L644
    if (helper.Get("ceil_mode", 0) == 1) {
      LOGS(logger, VERBOSE) << "ceil_mode == 1 is not supported for pooling";
      return false;
--- a/onnxruntime/core/providers/coreml/builders/impl/resize_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/resize_op_builder.cc
@ -0,0 +1,273 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/providers/common.h"
+#include "core/providers/cpu/tensor/reshape_helper.h"
+
+#include "core/providers/shared/utils/utils.h"
+#include "core/providers/coreml/builders/helper.h"
+#include "core/providers/coreml/builders/model_builder.h"
+#include "core/providers/coreml/builders/op_builder_factory.h"
+
+#include "base_op_builder.h"
+
+namespace onnxruntime {
+namespace coreml {
+
+class ResizeOpBuilder : public BaseOpBuilder {
+  // Add operator related
+ public:
+  void AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const override;
+
+ private:
+  Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
+                               const logging::Logger& logger) const override ORT_MUST_USE_RESULT;
+
+  // Operator support related
+ private:
+  bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
+                         const logging::Logger& logger) const override;
+
+  // Resize opset 10- is very different than Resize opset 11+, with many key attributes missing
+  // We only support Resize opset 11+ here
+  int GetMinSupportedOpSet(const Node& /* node */) const override { return 11; }
+};
+
+// Helper functions
+bool GetResizeScales(const InitializedTensorSet& initializers, const Node& node, std::vector<float>& scales) {
+  const auto& input_defs = node.InputDefs();
+  if (input_defs.size() < 3)
+    return false;
+
+  const auto& scales_tensor = *initializers.at(input_defs[2]->Name());
+  if (scales_tensor.dims_size() != 1 || scales_tensor.dims()[0] != 4)
+    return false;
+
+  const float* scales_data = GetTensorFloatData(scales_tensor);
+  scales = std::vector<float>{scales_data, scales_data + 4};
+  return true;
+}
+
+bool GetResizeOutputSizes(const InitializedTensorSet& initializers, const Node& node, std::vector<int64_t>& sizes) {
+  const auto& input_defs = node.InputDefs();
+  if (input_defs.size() < 4)
+    return false;
+
+  const auto& sizes_tensor = *initializers.at(input_defs[3]->Name());
+  if (sizes_tensor.dims_size() != 1 || sizes_tensor.dims()[0] != 4)
+    return false;
+
+  const int64_t* sizes_data = GetTensorInt64Data(sizes_tensor);
+  sizes = std::vector<int64_t>{sizes_data, sizes_data + 4};
+  return true;
+}
+
+// Add operator related
+
+void ResizeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const {
+  // We don't really use ROI here, so add it to skipped list if it's an initializer tensor
+  model_builder.AddInitializerToSkip(node.InputDefs()[1]->Name());  // ROI
+  model_builder.AddInputToSkip(node.InputDefs()[1]->Name());        // ROI
+
+  // We will still add scales to the skipped list even sizes are present
+  // since there is no use of it, we will not process it later
+  model_builder.AddInitializerToSkip(node.InputDefs()[2]->Name());  // scales
+  model_builder.AddInputToSkip(node.InputDefs()[2]->Name());        // scales
+
+  if (node.InputDefs().size() > 3) {
+    model_builder.AddInitializerToSkip(node.InputDefs()[3]->Name());  // sizes
+    model_builder.AddInputToSkip(node.InputDefs()[3]->Name());        // sizes
+  }
+}
+
+Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
+                                              const Node& node,
+                                              const logging::Logger& logger) const {
+  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = CreateNNLayer(node);
+
+  auto* coreml_upsample = layer->mutable_upsample();
+  NodeAttrHelper helper(node);
+  const auto mode = helper.Get("mode", "nearest");
+  if (mode == "linear") {
+    coreml_upsample->set_mode(COREML_SPEC::UpsampleLayerParams_InterpolationMode_BILINEAR);
+  } else {  // we already checked the mode must be NN or Bilinear in IsOpSupportedImpl
+    coreml_upsample->set_mode(COREML_SPEC::UpsampleLayerParams_InterpolationMode_NN);
+  }
+
+  const auto& input_defs = node.InputDefs();
+  const auto& initializers(model_builder.GetInitializerTensors());
+
+  if (input_defs.size() == 3) {  // use scales
+    std::vector<float> scales;
+    ORT_RETURN_IF_NOT(GetResizeScales(initializers, node, scales), "Error getting resize scales");
+    coreml_upsample->add_scalingfactor(static_cast<int64_t>(scales[2]));
+    coreml_upsample->add_scalingfactor(static_cast<int64_t>(scales[3]));
+  } else {  // we already checked number of inputs in IsOpSupportedImpl
+    std::vector<int64_t> input_shape;
+    ORT_RETURN_IF_NOT(GetShape(*input_defs[0], input_shape, logger), "Error getting input shape");
+    std::vector<int64_t> output_sizes;
+    ORT_RETURN_IF_NOT(GetResizeOutputSizes(initializers, node, output_sizes), "Error getting resize output_sizes");
+    coreml_upsample->add_scalingfactor(static_cast<int64_t>(output_sizes[2] / input_shape[2]));
+    coreml_upsample->add_scalingfactor(static_cast<int64_t>(output_sizes[3] / input_shape[3]));
+  }
+
+  *layer->mutable_input()->Add() = input_defs[0]->Name();
+  *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name();
+
+  model_builder.AddLayer(std::move(layer));
+  return Status::OK();
+}
+
+// Operator support related
+
+bool ResizeOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
+                                        const logging::Logger& logger) const {
+  const auto& input_defs = node.InputDefs();
+
+  std::vector<int64_t> input_shape;
+  if (!GetShape(*input_defs[0], input_shape, logger))
+    return false;
+
+  const auto input_size = input_shape.size();
+  if (input_size != 4) {
+    LOGS(logger, VERBOSE) << "Resize only support 4d shape, input is "
+                          << input_size << "d shape";
+    return false;
+  }
+
+  {  // check attributes
+    NodeAttrHelper helper(node);
+    const auto mode = helper.Get("mode", "nearest");
+    bool is_linear_resize = mode == "linear";
+    bool is_nearest_resize = mode == "nearest";
+    if (!is_linear_resize && !is_nearest_resize) {
+      LOGS(logger, VERBOSE) << "Resize unsupported input mode, " << mode;
+      return false;
+    }
+
+    const auto exclude_outside = helper.Get("exclude_outside", 0);
+    if (exclude_outside != 0) {
+      LOGS(logger, VERBOSE) << "Resize does not support exclude_outside for now";
+      return false;
+    }
+
+    const auto coord_trans_mode = helper.Get("coordinate_transformation_mode", "half_pixel");
+    bool using_asymmetric = coord_trans_mode == "asymmetric";
+    if (is_linear_resize) {
+      // TODO, add support of align_corners and half_pixel
+      if (!using_asymmetric) {
+        LOGS(logger, VERBOSE) << "Resize bilinear, unsupported coord_trans_mode, " << coord_trans_mode;
+        return false;
+      }
+    } else {
+      // nearest neighbor resizing
+      // For resize using nearest neighbor, we only support coord_trans_mode == "asymmetric" && nearest_mode == "floor"
+      if (!using_asymmetric) {
+        LOGS(logger, VERBOSE) << "Resize nearest neighbor, unsupported coord_trans_mode, " << coord_trans_mode;
+        return false;
+      }
+
+      const auto nearest_mode = helper.Get("nearest_mode", "round_prefer_floor");
+      if (nearest_mode != "floor") {
+        LOGS(logger, VERBOSE) << "Resize nearest neighbor, unsupported nearest_mode, " << nearest_mode;
+        return false;
+      }
+    }
+  }
+
+  {  // scales and sizes (if present) must be initializers
+    if (input_defs.size() < 3) {
+      LOGS(logger, VERBOSE) << "Input scales or sizes of Resize must be known";
+      return false;
+    }
+
+    // scales
+    if (input_defs.size() == 3 && !Contains(initializers, input_defs[2]->Name())) {
+      LOGS(logger, VERBOSE) << "Input scales of Resize must be known";
+      return false;
+    }
+
+    // sizes
+    if (input_defs.size() > 3 && !Contains(initializers, input_defs[3]->Name())) {
+      LOGS(logger, VERBOSE) << "Input sizes of Resize must be known";
+      return false;
+    }
+
+    // We want to check if the scales or sizes are not trying to resize on N/C channels here
+    if (input_defs.size() == 3) {  // we are using scales
+      std::vector<float> scales;
+      if (!GetResizeScales(initializers, node, scales))
+        return false;
+
+      float scale_n = scales[0];
+      float scale_c = scales[1];
+      if (scale_n != 1.0f || scale_c != 1.0f) {
+        LOGS(logger, VERBOSE) << "Scales of N/C channel should be 1"
+                              << "Resize of N/C channels are not supported"
+                              << ", scale_n, " << scale_n << ", scale_c, " << scale_c;
+        return false;
+      }
+
+      // For now we only support upscale, so the scale_h and scale_w should be an integer >= 1
+      // TODO support ResizeBilinear
+      float scale_h = scales[2];
+      float scale_w = scales[3];
+
+      // Onnx spec requires scale to be a positive float, so we are not checking that here
+      if (roundf(scale_h) != scale_h) {
+        LOGS(logger, VERBOSE) << "Resize: scale_h: " << scale_h << " is not a whole number";
+        return false;
+      }
+
+      if (roundf(scale_w) != scale_w) {
+        LOGS(logger, VERBOSE) << "Resize: scale_w: " << scale_w << " is not a whole number";
+        return false;
+      }
+    } else {
+      // we are using sizes
+      std::vector<int64_t> output_sizes;
+      if (!GetResizeOutputSizes(initializers, node, output_sizes))
+        return false;
+
+      auto output_size_n = output_sizes[0];
+      auto output_size_c = output_sizes[1];
+      if (output_size_n != input_shape[0] || output_size_c != input_shape[1]) {
+        LOGS(logger, VERBOSE) << "Output sizes of N/C chanel should match the input sizes, "
+                              << "Resize of N/C channels are not supported"
+                              << ", input_size_n, " << input_shape[0] << ", output_size_n, " << output_size_n
+                              << ". input_size_c, " << input_shape[1] << ", output_size_c, " << output_size_c;
+        return false;
+      }
+
+      // For now we only support upscale, so the output_size_h and output_size_w should be an integer >= 1
+      // TODO support ResizeBilinear
+      auto output_size_h = output_sizes[2];
+      auto output_size_w = output_sizes[3];
+      auto input_size_h = input_shape[2];
+      auto input_size_w = input_shape[3];
+
+      // Onnx spec requires output sizes to be a positive integer, so we are not checking that here
+      if (output_size_h % input_size_h != 0) {
+        LOGS(logger, VERBOSE) << "Resize: output_size_h: " << output_size_h
+                              << " is not a mutliple of input_size_h: " << input_size_h;
+        return false;
+      }
+
+      if (output_size_w % input_size_w != 0) {
+        LOGS(logger, VERBOSE) << "Resize: output_size_w: " << output_size_w
+                              << " is not a mutliple of input_size_w: " << input_size_w;
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+void CreateResizeOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {
+  op_registrations.builders.push_back(onnxruntime::make_unique<ResizeOpBuilder>());
+  op_registrations.op_builder_map.emplace(op_type, op_registrations.builders.back().get());
+}
+
+}  // namespace coreml
+}  // namespace onnxruntime
--- a/onnxruntime/core/providers/coreml/builders/model_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/model_builder.cc
@ -90,19 +90,24 @@ Status ModelBuilder::RegisterInitializers() {
 }

 Status ModelBuilder::RegisterModelInputOutput(const NodeArg& node_arg, bool is_input) {
+  const auto& name = node_arg.Name();
+  const std::string input_output_type = is_input ? "input" : "output";
+
+  if (is_input) {
+    // input should not be an initializer
+    if (Contains(GetInitializerTensors(), name))
+      return Status::OK();
+
+    // This input will not be used
+    if (Contains(skipped_inputs_, name))
+      return Status::OK();
+  }
+
  auto* model_description = coreml_model_->mutable_description();
  auto& input_output = is_input
                           ? *model_description->mutable_input()->Add()
                           : *model_description->mutable_output()->Add();

-  const auto& name = node_arg.Name();
-  const std::string input_output_type = is_input ? "input" : "output";
-
-  // input should not be an initializer
-  if (is_input && Contains(GetInitializerTensors(), name)) {
-    return Status::OK();
-  }
-
  input_output.set_name(name);
  auto* multi_array = input_output.mutable_type()->mutable_multiarraytype();
  std::vector<int64_t> shape;
@ -226,6 +231,10 @@ void ModelBuilder::AddInitializerToSkip(const std::string& tensor_name) {
  skipped_initializers_.insert(tensor_name);
 }

+void ModelBuilder::AddInputToSkip(const std::string& input_name) {
+  skipped_inputs_.insert(input_name);
+}
+
 std::string ModelBuilder::GetUniqueName(const std::string& base_name) {
  std::string unique_name;
  do {
--- a/onnxruntime/core/providers/coreml/builders/model_builder.h
+++ b/onnxruntime/core/providers/coreml/builders/model_builder.h
@ -32,6 +32,10 @@ class ModelBuilder {
  // The initializer will be processed separately, skip it as an initializer
  void AddInitializerToSkip(const std::string& tensor_name);

+  // There are some input which will not be used, add it to a list which will not
+  // be added to CoreML model, since CoreML does not like input unused
+  void AddInputToSkip(const std::string& input_name);
+
  std::string GetUniqueName(const std::string& base_name);

 private:
@ -44,6 +48,7 @@ class ModelBuilder {
  std::unordered_map<std::string, OnnxTensorInfo> input_output_info_;

  std::unordered_set<std::string> skipped_initializers_;
+  std::unordered_set<std::string> skipped_inputs_;

  uint32_t name_token_{0};
  std::unordered_set<std::string> unique_names_;
--- a/onnxruntime/core/providers/coreml/builders/op_builder_factory.cc
+++ b/onnxruntime/core/providers/coreml/builders/op_builder_factory.cc
@ -52,6 +52,10 @@ static OpBuilderRegistrations CreateOpBuilderRegistrations() {
    CreateConcatOpBuilder("Concat", op_registrations);
  }

+  {  // Resize
+    CreateResizeOpBuilder("Resize", op_registrations);
+  }
+
  return op_registrations;
 }

--- a/onnxruntime/core/providers/coreml/builders/op_builder_factory.h
+++ b/onnxruntime/core/providers/coreml/builders/op_builder_factory.h
@ -23,6 +23,7 @@ void CreateTransposeOpBuilder(const std::string& op_type, OpBuilderRegistrations
 void CreateConvOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
 void CreateBatchNormalizationOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
 void CreateReshapeOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
+void CreateResizeOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
 void CreateConcatOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);

 void CreateActivationOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
@ -326,19 +326,6 @@ common::Status GetQuantizationZeroPoint(const InitializedTensorSet& initializers
  return Status::OK();
 }

-#define GET_TENSOR_DATA(FUNC_NAME, ELEMENT_TYPE, DATA)                                  \
-  const ELEMENT_TYPE* GetTensor##FUNC_NAME(const ONNX_NAMESPACE::TensorProto& tensor) { \
-    return tensor.DATA().empty()                                                        \
-               ? reinterpret_cast<const ELEMENT_TYPE*>(tensor.raw_data().data())        \
-               : tensor.DATA().data();                                                  \
-  }
-
-GET_TENSOR_DATA(FloatData, float, float_data)
-GET_TENSOR_DATA(Int32Data, int32_t, int32_data)
-GET_TENSOR_DATA(Int64Data, int64_t, int64_data)
-
-#undef GET_TENSOR_DATA
-
 bool GetShape(const NodeArg& node_arg, Shape& shape) {
  shape.clear();
  const auto* shape_proto = node_arg.Shape();
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h
@ -115,12 +115,6 @@ float GetQuantizationScale(const InitializedTensorSet& initializers, const Node&
 common::Status GetQuantizationZeroPoint(const InitializedTensorSet& initializers,
                                        const Node& node, size_t idx, int32_t& zero_point) ORT_MUST_USE_RESULT;

-// Get initialize tensort float/int32/int64 data without unpacking
-// TODO, move to ort framework
-const float* GetTensorFloatData(const ONNX_NAMESPACE::TensorProto& tensor);
-const int32_t* GetTensorInt32Data(const ONNX_NAMESPACE::TensorProto& tensor);
-const int64_t* GetTensorInt64Data(const ONNX_NAMESPACE::TensorProto& tensor);
-
 // Get Shape/Type of a NodeArg
 bool GetShape(const NodeArg& node_arg, Shape& shape);
 bool GetType(const NodeArg& node_arg, int32_t& type);
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc
@ -2346,6 +2346,9 @@ class ResizeOpBuilder : public BaseOpBuilder {
 };

 void ResizeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const {
+  // We don't really use ROI here, so add them to skipped list
+  model_builder.AddInitializerToSkip(node.InputDefs()[1]->Name());  // ROI
+
  // We will still add scales to the skipped list even sizes are present
  // since there is no use of it, we will not process it later
  model_builder.AddInitializerToSkip(node.InputDefs()[2]->Name());  // scales
--- a/onnxruntime/core/providers/shared/utils/utils.cc
+++ b/onnxruntime/core/providers/shared/utils/utils.cc
@ -11,6 +11,22 @@

 namespace onnxruntime {

+#define GET_TENSOR_DATA(FUNC_NAME, ELEMENT_TYPE, DATA)                                                    \
+  const ELEMENT_TYPE* GetTensor##FUNC_NAME(const ONNX_NAMESPACE::TensorProto& tensor) {                   \
+    bool has_external_data = tensor.has_data_location() &&                                                \
+                             tensor.data_location() == ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL; \
+    ORT_ENFORCE(!has_external_data, "tensor: ", tensor.name(), " has external data");                     \
+    return tensor.DATA().empty()                                                                          \
+               ? reinterpret_cast<const ELEMENT_TYPE*>(tensor.raw_data().data())                          \
+               : tensor.DATA().data();                                                                    \
+  }
+
+GET_TENSOR_DATA(FloatData, float, float_data)
+GET_TENSOR_DATA(Int32Data, int32_t, int32_data)
+GET_TENSOR_DATA(Int64Data, int64_t, int64_data)
+
+#undef GET_TENSOR_DATA
+
 NodeAttrHelper::NodeAttrHelper(const onnxruntime::Node& node)
    : node_attributes_(node.GetAttributes()) {}

--- a/onnxruntime/core/providers/shared/utils/utils.h
+++ b/onnxruntime/core/providers/shared/utils/utils.h
@ -13,6 +13,12 @@ namespace onnxruntime {

 class Node;

+// Get initialize tensort float/int32/int64 data without unpacking
+// NOTE!!! This will not work when the initializer has external data
+const float* GetTensorFloatData(const ONNX_NAMESPACE::TensorProto& tensor);
+const int32_t* GetTensorInt32Data(const ONNX_NAMESPACE::TensorProto& tensor);
+const int64_t* GetTensorInt64Data(const ONNX_NAMESPACE::TensorProto& tensor);
+
 /**
 * Wrapping onnxruntime::Node for retrieving attribute values
 */