From 755161100a56454463a02c83ce114f2f736fe78b Mon Sep 17 00:00:00 2001 From: Edward Chen <18449977+edgchen1@users.noreply.github.com> Date: Tue, 21 Feb 2023 14:08:03 -0800 Subject: [PATCH] Fix CoreML API usage memory leak. (#14738) - Fix CoreML API usage memory leak by putting CoreML API prediction call in an `@autoreleasepool` block as suggested in #14455 and [here](https://developer.apple.com/forums/thread/692425). Conservatively wrapping all CoreML API usage. - Use MLModelConfiguration.computeUnits instead of deprecated MLPredictionOptions.usesCPUOnly (originally in #11382). --- .../core/providers/coreml/model/model.mm | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/onnxruntime/core/providers/coreml/model/model.mm b/onnxruntime/core/providers/coreml/model/model.mm index 37c4a2d227..cb2f70294c 100644 --- a/onnxruntime/core/providers/coreml/model/model.mm +++ b/onnxruntime/core/providers/coreml/model/model.mm @@ -190,7 +190,9 @@ asm(".linker_option \"-framework\", \"CoreML\""); compiled_model_path_ = [compileUrl path]; MLModelConfiguration* config = [MLModelConfiguration alloc]; - config.computeUnits = MLComputeUnitsAll; + config.computeUnits = (coreml_flags_ & COREML_FLAG_USE_CPU_ONLY) + ? MLComputeUnitsCPUOnly + : MLComputeUnitsAll; _model = [MLModel modelWithContentsOfURL:compileUrl configuration:config error:&error]; if (error != NULL) { @@ -216,7 +218,6 @@ asm(".linker_option \"-framework\", \"CoreML\""); } MLPredictionOptions* options = [[MLPredictionOptions alloc] init]; - options.usesCPUOnly = coreml_flags_ & COREML_FLAG_USE_CPU_ONLY; NSError* error = nil; id output_feature = [_model predictionFromFeatures:input_feature options:options @@ -268,7 +269,7 @@ asm(".linker_option \"-framework\", \"CoreML\""); break; } // For this case, since Coreml Spec only uses int32 for model output while onnx provides - // int64 for model output data type. We are doing a type casting (int32 -> int64) here + // int64 for model output data type. We are doing a type casting (int32 -> int64) here // when copying the model to ORT case ONNX_NAMESPACE::TensorProto_DataType_INT64: if (model_output_type == MLMultiArrayDataTypeInt32) { @@ -313,9 +314,11 @@ class Execution { }; Execution::Execution(const std::string& path, const logging::Logger& logger, uint32_t coreml_flags) { - execution_ = [[CoreMLExecution alloc] initWithPath:path - logger:logger - coreml_flags:coreml_flags]; + @autoreleasepool { + execution_ = [[CoreMLExecution alloc] initWithPath:path + logger:logger + coreml_flags:coreml_flags]; + } } Status Execution::LoadModel() { @@ -324,7 +327,10 @@ Status Execution::LoadModel() { } if (HAS_VALID_BASE_OS_VERSION) { - auto status = [execution_ loadModel]; + Status status{}; + @autoreleasepool { + status = [execution_ loadModel]; + } model_loaded = status.IsOK(); return status; } @@ -337,7 +343,9 @@ Status Execution::Predict(const std::unordered_map& ORT_RETURN_IF_NOT(model_loaded, "Execution::Predict requires Execution::LoadModel"); if (HAS_VALID_BASE_OS_VERSION) { - return [execution_ predict:inputs outputs:outputs]; + @autoreleasepool { + return [execution_ predict:inputs outputs:outputs]; + } } return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Execution::LoadModel requires macos 10.15+ or ios 13+ "); @@ -371,4 +379,4 @@ const OnnxTensorInfo& Model::GetInputOutputInfo(const std::string& name) const { } } // namespace coreml -} // namespace onnxruntime \ No newline at end of file +} // namespace onnxruntime