Fix CoreML API usage memory leak. (#14738)

- Fix CoreML API usage memory leak by putting CoreML API prediction call in an `@autoreleasepool` block as suggested in #14455 and [here](https://developer.apple.com/forums/thread/692425). Conservatively wrapping all CoreML API usage. - Use MLModelConfiguration.computeUnits instead of deprecated MLPredictionOptions.usesCPUOnly (originally in #11382).
2026-07-16 18:31:27 +00:00 · 2023-02-21 14:08:03 -08:00 · 2023-02-21 14:08:03 -08:00 · 755161100a
commit 755161100a
parent 973aaf110b
1 changed files with 17 additions and 9 deletions
--- a/onnxruntime/core/providers/coreml/model/model.mm
+++ b/onnxruntime/core/providers/coreml/model/model.mm
@ -190,7 +190,9 @@ asm(".linker_option \"-framework\", \"CoreML\"");
  compiled_model_path_ = [compileUrl path];

  MLModelConfiguration* config = [MLModelConfiguration alloc];
-  config.computeUnits = MLComputeUnitsAll;
+  config.computeUnits = (coreml_flags_ & COREML_FLAG_USE_CPU_ONLY)
+                            ? MLComputeUnitsCPUOnly
+                            : MLComputeUnitsAll;
  _model = [MLModel modelWithContentsOfURL:compileUrl configuration:config error:&error];

  if (error != NULL) {
@ -216,7 +218,6 @@ asm(".linker_option \"-framework\", \"CoreML\"");
  }

  MLPredictionOptions* options = [[MLPredictionOptions alloc] init];
-  options.usesCPUOnly = coreml_flags_ & COREML_FLAG_USE_CPU_ONLY;
  NSError* error = nil;
  id<MLFeatureProvider> output_feature = [_model predictionFromFeatures:input_feature
                                                                options:options
@ -268,7 +269,7 @@ asm(".linker_option \"-framework\", \"CoreML\"");
        break;
      }
      // For this case, since Coreml Spec only uses int32 for model output while onnx provides
-      // int64 for model output data type. We are doing a type casting (int32 -> int64) here 
+      // int64 for model output data type. We are doing a type casting (int32 -> int64) here
      // when copying the model to ORT
      case ONNX_NAMESPACE::TensorProto_DataType_INT64:
        if (model_output_type == MLMultiArrayDataTypeInt32) {
@ -313,9 +314,11 @@ class Execution {
 };

 Execution::Execution(const std::string& path, const logging::Logger& logger, uint32_t coreml_flags) {
-  execution_ = [[CoreMLExecution alloc] initWithPath:path
-                                              logger:logger
-                                        coreml_flags:coreml_flags];
+  @autoreleasepool {
+    execution_ = [[CoreMLExecution alloc] initWithPath:path
+                                                logger:logger
+                                          coreml_flags:coreml_flags];
+  }
 }

 Status Execution::LoadModel() {
@ -324,7 +327,10 @@ Status Execution::LoadModel() {
  }

  if (HAS_VALID_BASE_OS_VERSION) {
-    auto status = [execution_ loadModel];
+    Status status{};
+    @autoreleasepool {
+      status = [execution_ loadModel];
+    }
    model_loaded = status.IsOK();
    return status;
  }
@ -337,7 +343,9 @@ Status Execution::Predict(const std::unordered_map<std::string, OnnxTensorData>&
  ORT_RETURN_IF_NOT(model_loaded, "Execution::Predict requires Execution::LoadModel");

  if (HAS_VALID_BASE_OS_VERSION) {
-    return [execution_ predict:inputs outputs:outputs];
+    @autoreleasepool {
+      return [execution_ predict:inputs outputs:outputs];
+    }
  }

  return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Execution::LoadModel requires macos 10.15+ or ios 13+ ");
@ -371,4 +379,4 @@ const OnnxTensorInfo& Model::GetInputOutputInfo(const std::string& name) const {
 }

 }  // namespace coreml
-}  // namespace onnxruntime
+}  // namespace onnxruntime