CoreML: Add ML Program ConvTranspose (#21416)

### Description
<!-- Describe your changes. -->
Add ML Program ConvTranspose
- some limitations to simplify the implementation for now
- some limitations due to flaky CoreML output

Added support for non-contiguous MLMultiArray output as we see that with
some unit tests when the CPU-only flag is not set (e.g. innermost dim
has min size of 16 but test output only has 8 values).
- support only one non-contiguous dim to keep it simple
- manually tested as we don't have a setup that can test objective-c
code
- test code is in model.mm and can be enabled via ifdef if we need to
validate any future changes



### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->
Address operator gaps in high priority model.

---------

Co-authored-by: Edward Chen <18449977+edgchen1@users.noreply.github.com>
This commit is contained in:
Scott McKay 2024-07-24 16:08:20 +10:00 committed by GitHub
parent 6794dfd941
commit 2580d935cb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 506 additions and 160 deletions

View file

@ -679,7 +679,10 @@ if(onnxruntime_USE_RKNPU)
endif()
if(onnxruntime_USE_COREML)
list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/coreml/*)
list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/coreml/*.cc)
if(APPLE)
list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/coreml/*.mm)
endif()
list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_coreml coreml_proto)
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_coreml coreml_proto)
list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_coreml coreml_proto)

View file

@ -0,0 +1,218 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "core/providers/common.h"
#include "core/providers/coreml/builders/helper.h"
#include "core/providers/coreml/builders/impl/base_op_builder.h"
#include "core/providers/coreml/builders/impl/builder_utils.h"
#include "core/providers/coreml/builders/model_builder.h"
#include "core/providers/coreml/builders/op_builder_factory.h"
#include "core/providers/coreml/shape_utils.h"
#include "core/providers/shared/utils/utils.h"
using namespace CoreML::Specification;
namespace onnxruntime {
namespace coreml {
class ConvTransposeOpBuilder : public BaseOpBuilder {
Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
const logging::Logger& logger) const override;
bool IsOpSupportedImpl(const Node& /* node */, const OpBuilderInputParams& /* input_params */,
const logging::Logger& /* logger */) const override;
bool SupportsMLProgram() const override { return true; }
};
Status ConvTransposeOpBuilder::AddToModelBuilderImpl([[maybe_unused]] ModelBuilder& model_builder,
[[maybe_unused]] const Node& node,
const logging::Logger& /*logger*/) const {
#if defined(COREML_ENABLE_MLPROGRAM)
using namespace CoreML::Specification::MILSpec; // NOLINT
const auto input_defs = node.InputDefs();
const auto output_defs = node.OutputDefs();
const auto& input_name = input_defs[0]->Name();
NodeAttrHelper helper(node);
// https://apple.github.io/coremltools/source/coremltools.converters.mil.mil.ops.defs.html#coremltools.converters.mil.mil.ops.defs.iOS15.conv.conv_transpose
std::unique_ptr<Operation> op = model_builder.CreateOperation(node, "conv_transpose");
const auto& op_type = op->type();
AddOperationInput(*op, "x", input_name);
AddOperationInput(*op, "weight", input_defs[1]->Name());
if (input_defs.size() > 2) {
AddOperationInput(*op, "bias", input_defs[2]->Name());
}
// we know this input has a valid shape due to the check in IsOpSupportedImpl. ignore N and C dims.
const auto num_spatial_dims = input_defs[1]->Shape()->dim_size() - 2;
// Spec says strides/dilations/pads are optional but reality is they're required for at least the iOS15 target
// which is CoreML5. Due to that we just add everything for simplicity.
const auto strides = helper.Get("strides", std::vector<int64_t>(num_spatial_dims, 1));
const auto dilations = helper.Get("dilations", std::vector<int64_t>(num_spatial_dims, 1));
AddOperationInput(*op, "strides", model_builder.AddConstant(op_type, "strides", strides));
AddOperationInput(*op, "dilations", model_builder.AddConstant(op_type, "dilations", dilations));
const std::optional<int64_t> groups = helper.GetInt64("group");
if (groups) {
AddOperationInput(*op, "groups", model_builder.AddScalarConstant(op_type, "groups", *groups));
}
// if we can enable output_shape, this code works. see IsOpSupportedImpl for the reason it's disabled.
// const auto output_shape = helper.GetInt64s("output_shape");
// if (output_shape) {
// AddOperationInput(*op, "output_shape", model_builder.AddConstant(op_type, "output_shape", *output_shape));
// // these are required despite the spec saying otherwise
// AddOperationInput(*op, "pad_type", model_builder.AddScalarConstant(op_type, "pad_type", std::string("valid")));
// std::vector<int64_t> pads(num_spatial_dims * 2, 0);
// AddOperationInput(*op, "pad", model_builder.AddConstant(op_type, "pad", pads));
//} else {
// AddPadTypeAndPads(*op, model_builder, op_type, helper, num_spatial_dims);
//}
AddPadTypeAndPads(*op, model_builder, op_type, helper, num_spatial_dims);
AddOperationOutput(*op, *output_defs[0]);
model_builder.AddOperation(std::move(op));
#endif // defined(COREML_ENABLE_MLPROGRAM)
return Status::OK();
}
bool ConvTransposeOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
const logging::Logger& logger) const {
if (!input_params.create_mlprogram) {
LOGS(logger, VERBOSE) << "ConvTranspose: ML Program required";
return false;
}
// ML Program
// - const weight until CoreML7 (iOS17)
// - require constant for now as non-const would be unusual and we rely on the shape of W to be known to validate
// the kernel_shape can be used
// - const bias
// - const pad
// - if auto_pad is same_upper or same_lower the output[i] - (input[i] * strides[i]) must be divisible by 2
// as the pads must be equally split as there's no upper/lower option in CoreML
// - punting on supporting this for now
// - must be symmetric for CoreML to do the right thing
// - const strides/dilations/groups
// - output_shape CoreML output is inconsistent so disabled for now
//
// NOTE: need to test with/without the COREML_FLAG_USE_CPU_ONLY flag being set to get an idea of how flaky the CoreML
// behavior is.
// Update /onnxruntime/test/util/default_providers.cc:DefaultCoreMLExecutionProvider to do so
const auto& input_defs = node.InputDefs();
std::vector<int64_t> input_shape;
if (!GetShape(*input_defs[0], input_shape, logger)) {
// requires the rank at least to be known
LOGS(logger, VERBOSE) << "ConvTranspose: failed to get input shape";
return false;
}
// for simplicity require weight to be constant
const auto& weight_arg = *input_defs[1];
const auto& weight_name = input_defs[1]->Name();
const auto* weight = input_params.graph_viewer.GetConstantInitializer(weight_name);
if (!weight) {
LOGS(logger, VERBOSE) << "ConvTranspose: weight must be constant";
return false;
}
if (input_defs.size() > 2 && !input_params.graph_viewer.GetConstantInitializer(input_defs[2]->Name())) {
LOGS(logger, VERBOSE) << "ConvTranspose: bias must be constant";
return false;
}
std::vector<int64_t> weight_shape;
if (!GetShape(weight_arg, weight_shape, logger)) {
// impossible as it's a constant initializer
LOGS(logger, VERBOSE) << "ConvTranspose: failed to get weight shape";
return false;
}
int64_t num_spatial_dims = narrow<int64_t>(weight_shape.size()) - 2;
NodeAttrHelper helper(node);
// Punt on SAME_UPPER/SAME_LOWER for now.
// We could infer that 'same' -> 'same_upper' based on the CoreML conv spec having 'same' and 'same_lower' but
// need to validate that assertion.
// Additionally, if the pads size is equal, there's no difference between same_upper and same_lower.
// To do that we'd need the 'output_shape' attribute to check against.
// Can add this handling if/when needed.
auto autopad = StringToAutoPadType(helper.Get("auto_pad", "NOTSET"));
if (autopad == AutoPadType::SAME_LOWER || autopad == AutoPadType::SAME_UPPER) {
LOGS(logger, VERBOSE) << "ConvTranspose: support for SAME_LOWER/SAME_UPPER is not implemented yet";
return false;
} else if (autopad == AutoPadType::NOTSET) {
// CoreML output is inconsistent between CPU_ONLY and ALL if the pads aren't all the same value.
// CPU matches the expected output, but other devices don't seem to (at least on macOS).
auto onnx_pads = *helper.GetInt64s("pads"); // 'pads' are required if auto_pad is NOTSET
const auto pad_value = onnx_pads[0];
if (!std::all_of(onnx_pads.begin() + 1, onnx_pads.end(),
[pad_value](auto value) { return value == pad_value; })) {
LOGS(logger, VERBOSE) << "ConvTranspose: all pad values must be the same for CoreML to return "
"consistent results";
return false;
}
}
// there's no input to specify a kernel shape in CoreML.
// it's OK if a specified kernel_shape matches kH and kW dims of the weight input.
auto kernel_shape = helper.GetInt64s("kernel_shape");
if (kernel_shape) {
bool valid = true;
if (static_cast<int64_t>(kernel_shape->size()) == num_spatial_dims) {
for (int i = 0; i < num_spatial_dims; ++i) {
// check the specified kernel shape matches the weight shape. skip the initial N and C dims in the latter.
if ((*kernel_shape)[i] != weight_shape[i + 2]) {
valid = false;
break;
}
}
} else {
valid = false;
}
if (!valid) {
LOGS(logger, VERBOSE) << "ConvTranspose: kernel_shape attribute does not match the weight shape";
return false;
}
}
// In theory this can be supported, but running with COREML_FLAG_USE_CPU_ONLY produces output that doesn't match
// ONNX. Running without that flag produces the expected output. Madness...
auto output_shape = helper.GetInt64s("output_shape");
if (output_shape) {
LOGS(logger, VERBOSE) << "ConvTranspose: output_shape is not supported as the CoreML output is inconsistent";
return false;
}
// output_padding, if specified, must be the default value of all zeros as there's no equivalent in CoreML.
auto output_padding = helper.GetInt64s("output_padding");
if (output_padding &&
std::any_of(output_padding->begin(), output_padding->end(), [](auto value) { return value != 0; })) {
LOGS(logger, VERBOSE) << "ConvTranspose: output_padding is not supported";
return false;
}
return true;
}
void CreateConvTransposeOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {
op_registrations.builders.push_back(std::make_unique<ConvTransposeOpBuilder>());
op_registrations.op_builder_map.emplace(op_type, op_registrations.builders.back().get());
}
} // namespace coreml
} // namespace onnxruntime

View file

@ -427,13 +427,13 @@ bool ResizeOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputPa
auto h_in = input_shape[input_rank - 2];
auto w_in = input_shape[input_rank - 1];
if (!utils::IsScalingByAFactorOfN(h_in, scale_h)) {
if (!utils::ReciprocalIsAFactorOfN(h_in, scale_h)) {
LOGS(logger, VERBOSE) << "Resize: downsampling scale " << scale_h
<< " is not a factor of input height: " << h_in;
return false;
}
if (!utils::IsScalingByAFactorOfN(w_in, scale_w)) {
if (!utils::ReciprocalIsAFactorOfN(w_in, scale_w)) {
LOGS(logger, VERBOSE) << "Resize: downsampling scale " << scale_w
<< " is not a factor of input width: " << w_in;
return false;

View file

@ -15,120 +15,56 @@ namespace coreml {
static OpBuilderRegistrations CreateOpBuilderRegistrations() {
OpBuilderRegistrations op_registrations;
{ // Add/Mul/Pow/Sub/Div
CreateBinaryOpBuilder("Add", op_registrations);
CreateBinaryOpBuilder("Mul", op_registrations);
CreateBinaryOpBuilder("Pow", op_registrations);
CreateBinaryOpBuilder("Sub", op_registrations);
CreateBinaryOpBuilder("Div", op_registrations);
}
// Unary ops
CreateUnaryOpBuilder("Sqrt", op_registrations);
CreateUnaryOpBuilder("Reciprocal", op_registrations);
{ // Activations
CreateActivationOpBuilder("Sigmoid", op_registrations);
CreateActivationOpBuilder("Tanh", op_registrations);
CreateActivationOpBuilder("Relu", op_registrations);
CreateActivationOpBuilder("PRelu", op_registrations);
CreateActivationOpBuilder("LeakyRelu", op_registrations);
}
// Binary elementwise ops
CreateBinaryOpBuilder("Add", op_registrations);
CreateBinaryOpBuilder("Mul", op_registrations);
CreateBinaryOpBuilder("Pow", op_registrations);
CreateBinaryOpBuilder("Sub", op_registrations);
CreateBinaryOpBuilder("Div", op_registrations);
{ // Transpose
CreateTransposeOpBuilder("Transpose", op_registrations);
}
// Activations
CreateActivationOpBuilder("Sigmoid", op_registrations);
CreateActivationOpBuilder("Tanh", op_registrations);
CreateActivationOpBuilder("Relu", op_registrations);
CreateActivationOpBuilder("PRelu", op_registrations);
CreateActivationOpBuilder("LeakyRelu", op_registrations);
{ // Conv
CreateConvOpBuilder("Conv", op_registrations);
}
// Pooling ops
CreatePoolOpBuilder("GlobalAveragePool", op_registrations);
CreatePoolOpBuilder("GlobalMaxPool", op_registrations);
CreatePoolOpBuilder("AveragePool", op_registrations);
CreatePoolOpBuilder("MaxPool", op_registrations);
{ // Batch Normalization
CreateBatchNormalizationOpBuilder("BatchNormalization", op_registrations);
}
// Reduction ops
CreateReductionOpBuilder("ReduceMean", op_registrations);
CreateReductionOpBuilder("ReduceSum", op_registrations);
{ // Reshape
CreateReshapeOpBuilder("Reshape", op_registrations);
}
{ // DepthToSpace
CreateDepthToSpaceOpBuilder("DepthToSpace", op_registrations);
}
{ // Pool
CreatePoolOpBuilder("GlobalAveragePool", op_registrations);
CreatePoolOpBuilder("GlobalMaxPool", op_registrations);
CreatePoolOpBuilder("AveragePool", op_registrations);
CreatePoolOpBuilder("MaxPool", op_registrations);
}
{ // Concat
CreateConcatOpBuilder("Concat", op_registrations);
}
{ // Resize
CreateResizeOpBuilder("Resize", op_registrations);
}
{ // Gemm/MatMul
CreateGemmOpBuilder("Gemm", op_registrations);
CreateGemmOpBuilder("MatMul", op_registrations);
}
{ // Clip
CreateClipOpBuilder("Clip", op_registrations);
}
{ // Squeeze
CreateSqueezeOpBuilder("Squeeze", op_registrations);
}
{ // ArgMax
CreateArgMaxOpBuilder("ArgMax", op_registrations);
}
{ // Cast
CreateCastOpBuilder("Cast", op_registrations);
}
{ // Flatten
CreateFlattenOpBuilder("Flatten", op_registrations);
}
{ // LRN
CreateLRNOpBuilder("LRN", op_registrations);
}
{ // Pad
CreatePadOpBuilder("Pad", op_registrations);
}
{ // Unary
CreateUnaryOpBuilder("Sqrt", op_registrations);
CreateUnaryOpBuilder("Reciprocal", op_registrations);
}
{ // Reduction
// ReduceMean is used in layer normalization which seems to be problematic in Python tests.
CreateReductionOpBuilder("ReduceMean", op_registrations);
CreateReductionOpBuilder("ReduceSum", op_registrations);
}
{ // Shape
CreateShapeOpBuilder("Shape", op_registrations);
}
{ // Gather
CreateGatherOpBuilder("Gather", op_registrations);
}
{ // Slice
CreateSliceOpBuilder("Slice", op_registrations);
}
{ // Softmax
CreateSoftmaxOpBuilder("Softmax", op_registrations);
}
{ // Split
CreateSplitOpBuilder("Split", op_registrations);
}
CreateArgMaxOpBuilder("ArgMax", op_registrations);
CreateBatchNormalizationOpBuilder("BatchNormalization", op_registrations);
CreateCastOpBuilder("Cast", op_registrations);
CreateClipOpBuilder("Clip", op_registrations);
CreateConcatOpBuilder("Concat", op_registrations);
CreateConvOpBuilder("Conv", op_registrations);
CreateConvTransposeOpBuilder("ConvTranspose", op_registrations);
CreateDepthToSpaceOpBuilder("DepthToSpace", op_registrations);
CreateFlattenOpBuilder("Flatten", op_registrations);
CreateGatherOpBuilder("Gather", op_registrations);
CreateGemmOpBuilder("Gemm", op_registrations);
CreateLRNOpBuilder("LRN", op_registrations);
CreateGemmOpBuilder("MatMul", op_registrations);
CreatePadOpBuilder("Pad", op_registrations);
CreateReshapeOpBuilder("Reshape", op_registrations);
CreateResizeOpBuilder("Resize", op_registrations);
CreateShapeOpBuilder("Shape", op_registrations);
CreateSliceOpBuilder("Slice", op_registrations);
CreateSplitOpBuilder("Split", op_registrations);
CreateSoftmaxOpBuilder("Softmax", op_registrations);
CreateSqueezeOpBuilder("Squeeze", op_registrations);
CreateTransposeOpBuilder("Transpose", op_registrations);
CreateGridSampleOpBuilder("GridSample", op_registrations);

View file

@ -24,6 +24,7 @@ void CreateCastOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_
void CreateClipOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
void CreateConcatOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
void CreateConvOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
void CreateConvTransposeOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
void CreateDepthToSpaceOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
void CreateFlattenOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
void CreateGatherOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);

View file

@ -13,6 +13,10 @@
#include "core/common/status.h"
#include "core/platform/ort_mutex.h"
#if defined(__OBJC__)
@class MLMultiArray;
#endif
namespace onnxruntime {
namespace coreml {
@ -32,6 +36,15 @@ using GetOutputTensorMutableRawDataFn = std::function<void*(const std::string& n
int32_t requested_onnx_tensor_element_type,
gsl::span<const int64_t> static_shape)>;
#if defined(__OBJC__)
// helper function that we unit test.
// Handles an MLMultiArray that is contiguous, or has one non-contiguous dimension.
// The output values can be used to copy the array data to a contiguous buffer.
// Loop num_blocks times, copying block_size elements each time, moving stride elements between copies.
// A contiguous array will have num_blocks == 1, block_size == total_size (i.e. can be copied in a single operation)
Status GetMLMultiArrayCopyInfo(const MLMultiArray* array, int64_t& num_blocks, int64_t& block_size, int64_t& stride);
#endif
class Model {
public:
Model(const std::string& path,

View file

@ -174,51 +174,69 @@ Status CreateInputFeatureProvider(const std::unordered_map<std::string, OnnxTens
return Status::OK();
}
bool IsArrayContiguous(const MLMultiArray* array) {
int64_t batch_stride = [array.strides[0] longLongValue];
const auto* shape = array.shape;
int64_t batch_elems = 1;
for (unsigned long i = 1; i < shape.count; i++) batch_elems *= [shape[i] longLongValue];
return batch_stride == batch_elems;
}
Status CopyMLMultiArrayBuffer(const void* mlmultiarray_buffer, void* tensor_buffer,
const MLMultiArray* array_info,
const OnnxTensorInfo* tensor_info,
const std::optional<unsigned long> mlmultiarray_buffer_size) {
const MLMultiArray* array,
const int64_t num_blocks, const int64_t block_size, const int64_t stride,
const OnnxTensorInfo* tensor_info) {
if (mlmultiarray_buffer == nullptr) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "mlmultiarray_buffer has no data");
}
const size_t num_elements = array_info.count;
// total including non-contiguous space
int64_t array_total_elements = [array.strides[0] longLongValue] * [array.shape[0] longLongValue];
const int64_t num_elements = array.count;
ORT_RETURN_IF(array_total_elements != num_blocks * stride ||
num_elements != num_blocks * block_size,
"MLMultiArray size does not match the copy info");
const auto onnx_data_type = tensor_info->data_type;
switch (onnx_data_type) {
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: {
const auto output_data_byte_size = num_elements * sizeof(float);
ORT_RETURN_IF_NOT(!mlmultiarray_buffer_size || mlmultiarray_buffer_size == output_data_byte_size,
"CoreML output buffer size and expected output size differ");
memcpy(tensor_buffer, mlmultiarray_buffer, output_data_byte_size);
const auto* src_buffer = static_cast<const float*>(mlmultiarray_buffer);
auto* dst_buffer = static_cast<float*>(tensor_buffer);
const auto block_byte_size = block_size * sizeof(float);
for (int64_t idx = 0; idx < num_blocks; ++idx) {
memcpy(dst_buffer, src_buffer, block_byte_size);
src_buffer += stride;
dst_buffer += block_size;
}
break;
}
case ONNX_NAMESPACE::TensorProto_DataType_INT32: {
const auto output_data_byte_size = num_elements * sizeof(int32_t);
ORT_RETURN_IF_NOT(!mlmultiarray_buffer_size || mlmultiarray_buffer_size == output_data_byte_size,
"CoreML output buffer size and expected output size differ");
memcpy(tensor_buffer, mlmultiarray_buffer, output_data_byte_size);
const auto* src_buffer = static_cast<const int32_t*>(mlmultiarray_buffer);
auto* dst_buffer = static_cast<int32_t*>(tensor_buffer);
const auto block_byte_size = block_size * sizeof(int32_t);
for (int64_t idx = 0; idx < num_blocks; ++idx) {
memcpy(dst_buffer, src_buffer, block_byte_size);
src_buffer += stride;
dst_buffer += block_size;
}
break;
}
// For this case, since Coreml Spec only uses int32 for model output while onnx provides
// int64 for model output data type. We are doing a type casting (int32 -> int64) here
// when copying the model to ORT
case ONNX_NAMESPACE::TensorProto_DataType_INT64: {
ORT_RETURN_IF_NOT(array_info.dataType == MLMultiArrayDataTypeInt32,
"CoreML output data type is not MLMultiArrayDataTypeInt32");
ORT_RETURN_IF_NOT(!mlmultiarray_buffer_size || mlmultiarray_buffer_size == num_elements * sizeof(int32_t),
"CoreML output buffer size and expected output size differ");
const auto model_output_span = gsl::span{static_cast<const int32_t*>(mlmultiarray_buffer), num_elements};
const auto output_span = gsl::span{static_cast<int64_t*>(tensor_buffer), num_elements};
std::transform(model_output_span.begin(), model_output_span.end(), output_span.begin(),
[](int32_t v) { return static_cast<int64_t>(v); });
ORT_RETURN_IF(array.dataType != MLMultiArrayDataTypeInt32,
"CoreML output data type is not MLMultiArrayDataTypeInt32");
const int32_t* src_buffer = static_cast<const int32_t*>(mlmultiarray_buffer);
int64_t* dst_buffer = static_cast<int64_t*>(tensor_buffer);
for (int64_t idx = 0; idx < num_blocks; ++idx) {
auto input_span = gsl::span{src_buffer, static_cast<size_t>(block_size)};
auto output_span = gsl::span{dst_buffer, static_cast<size_t>(block_size)};
std::transform(input_span.begin(), input_span.end(), output_span.begin(),
[](int32_t v) { return static_cast<int64_t>(v); });
src_buffer += stride;
dst_buffer += block_size;
}
break;
}
default:
@ -250,8 +268,7 @@ NS_ASSUME_NONNULL_BEGIN
- (Status)loadModel API_AVAILABLE_COREML3;
- (Status)predict:(const std::unordered_map<std::string, OnnxTensorData>&)inputs
outputs:(const std::unordered_map<std::string, OnnxTensorInfo>&)outputs
getOutputTensorDataFn:(const GetOutputTensorMutableRawDataFn&)
get_output_tensor_mutable_raw_data_fn
getOutputTensorDataFn:(const GetOutputTensorMutableRawDataFn&)get_output_tensor_mutable_raw_data_fn
API_AVAILABLE_COREML3;
@property(nullable) MLModel* model API_AVAILABLE_COREML3;
@ -397,21 +414,27 @@ NS_ASSUME_NONNULL_BEGIN
") do not match");
}
ORT_RETURN_IF_NOT(IsArrayContiguous(data),
"Non-contiguous output MLMultiArray is not currently supported");
// support a non-contiguous array, provided only one dimension is not contiguous
int64_t num_blocks = 0;
int64_t block_size = 0;
int64_t stride = 0;
ORT_RETURN_IF_ERROR(GetMLMultiArrayCopyInfo(data, num_blocks, block_size, stride));
__block Status copy_status;
const auto* tensor_info = &output_tensor_info;
// `getBytesWithHandler` replaces deprecated `.dataPointer` on new versions
if (@available(macOS 12.3, iOS 15.4, *)) {
[data getBytesWithHandler:^(const void* bytes, NSInteger size) {
copy_status = CopyMLMultiArrayBuffer(bytes, output_buffer, data, tensor_info, size);
copy_status = CopyMLMultiArrayBuffer(bytes, output_buffer, data,
num_blocks, block_size, stride, tensor_info);
}];
} else {
// disable size check as old API does not return buffer length
copy_status = CopyMLMultiArrayBuffer(data.dataPointer, output_buffer, data, tensor_info, std::nullopt);
copy_status = CopyMLMultiArrayBuffer(data.dataPointer, output_buffer, data,
num_blocks, block_size, stride, tensor_info);
}
if (!copy_status.IsOK())
return copy_status;
ORT_RETURN_IF_ERROR(copy_status);
}
}
}
@ -431,6 +454,49 @@ NS_ASSUME_NONNULL_END
namespace onnxruntime {
namespace coreml {
Status GetMLMultiArrayCopyInfo(const MLMultiArray* _Nonnull array,
int64_t& num_blocks, int64_t& block_size, int64_t& stride) {
const auto* shape = array.shape;
const auto rank = shape.count;
int64_t array_total_elements = [array.strides[0] longLongValue] * [shape[0] longLongValue];
int64_t data_elems = 1; // actual values
int64_t total_elems = 1; // elems including empty slots if non-contiguous
for (unsigned long i = 1; i <= rank; i++) {
int64_t this_stride = [array.strides[rank - i] longLongValue];
if (this_stride != total_elems) {
// non-contiguous
if (block_size != 0) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
"Multiple non-contiguous dimensions in MLMultiArray are not supported.");
}
block_size = data_elems;
stride = this_stride;
}
const auto elems_this_dim = [shape[rank - i] longLongValue];
data_elems *= elems_this_dim;
total_elems = elems_this_dim * this_stride;
}
if (block_size == 0) {
// all data is contiguous
block_size = data_elems;
stride = array_total_elements;
assert(block_size == stride);
}
num_blocks = data_elems / block_size;
ORT_ENFORCE(array_total_elements == total_elems, "Logic error calculating copy info");
ORT_ENFORCE(stride >= block_size, "Logic error calculating copy info");
ORT_ENFORCE(stride * num_blocks == total_elems, "Logic error calculating copy info");
return Status::OK();
}
// Internal Execution class
// This class will bridge Model (c++) with CoreMLExecution (objective c++)
class Execution {

View file

@ -274,8 +274,8 @@ bool ResizeOpBuilder::IsOpSupportedImpl(const GraphViewer& graph_viewer, const N
return false;
}
if (!utils::IsScalingByAFactorOfN(h_in, scale_h) ||
!utils::IsScalingByAFactorOfN(w_in, scale_w)) {
if (!utils::ReciprocalIsAFactorOfN(h_in, scale_h) ||
!utils::ReciprocalIsAFactorOfN(w_in, scale_w)) {
LOGS_DEFAULT(VERBOSE) << "Input size must be evenly divisible by output size when downsampling";
return false;
}

View file

@ -24,7 +24,7 @@ common::Status OutputOptionalWithoutDataHelper(const ONNX_NAMESPACE::TypeProto&
}
#endif
bool IsScalingByAFactorOfN(int64_t n, float scale) {
bool ReciprocalIsAFactorOfN(int64_t n, float scale) {
bool is_factor = false;
if (scale > 0.f && scale < 1.f) {
const double factor = 1.0 / scale;

View file

@ -19,6 +19,6 @@ common::Status OutputOptionalWithoutDataHelper(const ONNX_NAMESPACE::TypeProto&
/// Check if the reciprocal of 'scale' is a factor of 'n'.
/// e.g. a scale of 0.5 is 1/2, the reciprocal is 2, and 2 is a factor of any even number.
/// </summary>
bool IsScalingByAFactorOfN(int64_t n, float scale);
bool ReciprocalIsAFactorOfN(int64_t n, float scale);
} // namespace utils
} // namespace onnxruntime

View file

@ -24,7 +24,7 @@ Status ConvTranspose::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr
const auto rank = orig_shape.NumDimensions();
if (conv_transpose_attrs_.group > 1) {
// Xnnpack [G, Oc, H, W Ic/G]
// Xnnpack [G, Oc, H, W, Ic/G]
// (ref: https://github.com/google/XNNPACK/blob/ecd8311c8fd3d9ab47edbc3df5f2b5de7dabe75f/test/deconvolution-operator-tester.h#L678)
if (rank == 4) {
// split C (dim 0) into {group, C/group}

View file

@ -85,8 +85,8 @@ bool Resize::IsOnnxNodeSupported(const NodeUnit& node_unit,
float scale_h = scales[2];
float scale_w = scales[3];
if (!utils::IsScalingByAFactorOfN(h_in, scale_h) ||
!utils::IsScalingByAFactorOfN(w_in, scale_w)) {
if (!utils::ReciprocalIsAFactorOfN(h_in, scale_h) ||
!utils::ReciprocalIsAFactorOfN(w_in, scale_w)) {
break;
}
}

View file

@ -0,0 +1,108 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#import <CoreML/CoreML.h>
#include "gtest/gtest.h"
#include "gmock/gmock.h"
#include "core/providers/coreml/model/model.h"
#include "test/util/include/asserts.h"
namespace onnxruntime {
namespace test {
namespace {
auto ValidateGetInfo(MLMultiArray* array,
int64_t expected_num_blocks, int64_t expected_block_size, int64_t expected_stride,
bool expect_valid) {
int64_t num_blocks = 0;
int64_t block_size = 0;
int64_t stride = 0;
auto status = coreml::GetMLMultiArrayCopyInfo(array, num_blocks, block_size, stride);
if (!expect_valid) {
ASSERT_STATUS_NOT_OK(status);
return;
}
ASSERT_STATUS_OK(status);
ASSERT_EQ(num_blocks, expected_num_blocks);
ASSERT_EQ(block_size, expected_block_size);
ASSERT_EQ(stride, expected_stride);
}
} // namespace
TEST(CoreMLUtils, GetMLMultiArrayReadInfo) {
// fake pointer. we don't read any data but initWithDataPointer requires a non-null address
void* data = reinterpret_cast<void*>(0xfeedf00d);
// a dim is non-contiguous if the stride is > the total number of elements in its inner dimensions
// dim -1 with non-contiguous data. 1 element (as it's the inner-most dimension) but the stride is 2.
{
NSArray<NSNumber*>* shape = @[ @1, @1, @8, @8 ];
NSArray<NSNumber*>* strides = @[ @128, @128, @16, @2 ];
auto* array = [[MLMultiArray alloc] initWithDataPointer:data
shape:shape
dataType:MLMultiArrayDataTypeInt32
strides:strides
deallocator:^(void* /* bytes */) {
}
error:nil];
ValidateGetInfo(array, 64, 1, 2, true);
}
// dim -2 with non-contiguous data. 8 elements in the inner dimension but the stride is 16.
{
NSArray<NSNumber*>* shape = @[ @1, @1, @8, @8 ];
NSArray<NSNumber*>* strides = @[ @128, @128, @16, @1 ];
auto* array = [[MLMultiArray alloc] initWithDataPointer:data
shape:shape
dataType:MLMultiArrayDataTypeInt32
strides:strides
deallocator:^(void* /* bytes */) {
}
error:nil];
ValidateGetInfo(array, 8, 8, 16, true);
}
// dim -3 with non-contiguous data. 16 elements in the inner dimensions but stride is 24.
{
NSArray<NSNumber*>* shape = @[ @1, @2, @4, @4 ];
NSArray<NSNumber*>* strides = @[ @48, @24, @4, @1 ];
auto* array = [[MLMultiArray alloc] initWithDataPointer:data
shape:shape
dataType:MLMultiArrayDataTypeInt32
strides:strides
deallocator:^(void* /* bytes */) {
}
error:nil];
ValidateGetInfo(array, 2, 16, 24, true);
}
// two non-contiguous dims (dim -2 and dim -3)
// dim -2 has 4 elements in the inner dimension and stride of 8
// dim -3 has 32 elements in the inner dimensions (we need to include the empty elements from the non-contiguous data
// in dim -2) and stride of 48
{
// dim
NSArray<NSNumber*>* shape = @[ @1, @2, @4, @4 ];
NSArray<NSNumber*>* strides = @[ @96, @48, @8, @1 ];
auto* array = [[MLMultiArray alloc] initWithDataPointer:data
shape:shape
dataType:MLMultiArrayDataTypeInt32
strides:strides
deallocator:^(void* /* bytes */) {
}
error:nil];
ValidateGetInfo(array, 0, 0, 0, false);
}
}
} // namespace test
} // namespace onnxruntime

View file

@ -27,7 +27,7 @@ void TestConvTransposeOpInitializer(const ConvTransposeOpAttributes& attributes,
const vector<vector<int64_t>>& input_shapes,
const std::initializer_list<float>& expected_output,
const vector<int64_t>& expected_output_shape,
bool is_filter_initializer = false,
bool is_weight_and_bias_initializer = false,
OpTester::ExpectResult expect_result = OpTester::ExpectResult::kExpectSuccess,
const std::string& err_str = "",
const std::unordered_set<std::string>& excluded_provider_types = {kTensorrtExecutionProvider}) {
@ -58,10 +58,10 @@ void TestConvTransposeOpInitializer(const ConvTransposeOpAttributes& attributes,
}
ORT_ENFORCE(inputs.size() <= 3, "Our name array is only setup to handle 3 inputs");
const char* szNames[] = {"X", "W", "B"};
bool isInitializers[] = {false, is_filter_initializer, false};
const char* input_names[] = {"X", "W", "B"};
bool is_initializers[] = {false, is_weight_and_bias_initializer, is_weight_and_bias_initializer};
for (size_t i = 0; i < inputs.size(); i++) {
test.AddInput<float>(szNames[i], input_shapes[i], inputs[i], isInitializers[i]);
test.AddInput<float>(input_names[i], input_shapes[i], inputs[i], is_initializers[i]);
}
test.AddOutput<float>("Y", expected_output_shape, expected_output);

View file

@ -7,6 +7,7 @@ Keep in sync with doco generated from /docs/execution-providers/CoreML-Execution
|ai.onnx:AveragePool|Only 2D Pool is supported currently. 3D and 5D support can be added if needed.|
|ai.onnx:Clip||
|ai.onnx:Conv|Only 1D/2D Conv is supported.<br/>Bias if provided must be constant.|
|ai.onnx:ConvTranspose|Weight and bias must be constant.<br/>padding_type of SAME_UPPER/SAME_LOWER is not supported.<br/>kernel_shape must have default values.<br/>output_shape is not supported.<br/>output_padding must have default values.|
|ai.onnx:Div||
|ai.onnx:Gemm|Input B must be constant.|
|ai.onnx:GlobalAveragePool|Only 2D Pool is supported currently. 3D and 5D support can be added if needed.|