diff --git a/onnxruntime/core/providers/coreml/model/model.mm b/onnxruntime/core/providers/coreml/model/model.mm
index 37c4a2d227..cb2f70294c 100644
--- a/onnxruntime/core/providers/coreml/model/model.mm
+++ b/onnxruntime/core/providers/coreml/model/model.mm
@@ -190,7 +190,9 @@ asm(".linker_option \"-framework\", \"CoreML\"");
   compiled_model_path_ = [compileUrl path];
 
   MLModelConfiguration* config = [MLModelConfiguration alloc];
-  config.computeUnits = MLComputeUnitsAll;
+  config.computeUnits = (coreml_flags_ & COREML_FLAG_USE_CPU_ONLY)
+                            ? MLComputeUnitsCPUOnly
+                            : MLComputeUnitsAll;
   _model = [MLModel modelWithContentsOfURL:compileUrl configuration:config error:&error];
 
   if (error != NULL) {
@@ -216,7 +218,6 @@ asm(".linker_option \"-framework\", \"CoreML\"");
   }
 
   MLPredictionOptions* options = [[MLPredictionOptions alloc] init];
-  options.usesCPUOnly = coreml_flags_ & COREML_FLAG_USE_CPU_ONLY;
   NSError* error = nil;
   id<MLFeatureProvider> output_feature = [_model predictionFromFeatures:input_feature
                                                                 options:options
@@ -268,7 +269,7 @@ asm(".linker_option \"-framework\", \"CoreML\"");
         break;
       }
       // For this case, since Coreml Spec only uses int32 for model output while onnx provides
-      // int64 for model output data type. We are doing a type casting (int32 -> int64) here 
+      // int64 for model output data type. We are doing a type casting (int32 -> int64) here
       // when copying the model to ORT
       case ONNX_NAMESPACE::TensorProto_DataType_INT64:
         if (model_output_type == MLMultiArrayDataTypeInt32) {
@@ -313,9 +314,11 @@ class Execution {
 };
 
 Execution::Execution(const std::string& path, const logging::Logger& logger, uint32_t coreml_flags) {
-  execution_ = [[CoreMLExecution alloc] initWithPath:path
-                                              logger:logger
-                                        coreml_flags:coreml_flags];
+  @autoreleasepool {
+    execution_ = [[CoreMLExecution alloc] initWithPath:path
+                                                logger:logger
+                                          coreml_flags:coreml_flags];
+  }
 }
 
 Status Execution::LoadModel() {
@@ -324,7 +327,10 @@ Status Execution::LoadModel() {
   }
 
   if (HAS_VALID_BASE_OS_VERSION) {
-    auto status = [execution_ loadModel];
+    Status status{};
+    @autoreleasepool {
+      status = [execution_ loadModel];
+    }
     model_loaded = status.IsOK();
     return status;
   }
@@ -337,7 +343,9 @@ Status Execution::Predict(const std::unordered_map<std::string, OnnxTensorData>&
   ORT_RETURN_IF_NOT(model_loaded, "Execution::Predict requires Execution::LoadModel");
 
   if (HAS_VALID_BASE_OS_VERSION) {
-    return [execution_ predict:inputs outputs:outputs];
+    @autoreleasepool {
+      return [execution_ predict:inputs outputs:outputs];
+    }
   }
 
   return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Execution::LoadModel requires macos 10.15+ or ios 13+ ");
@@ -371,4 +379,4 @@ const OnnxTensorInfo& Model::GetInputOutputInfo(const std::string& name) const {
 }
 
 }  // namespace coreml
-}  // namespace onnxruntime
\ No newline at end of file
+}  // namespace onnxruntime