Fix CoreML API usage memory leak. (#14738)

- Fix CoreML API usage memory leak by putting CoreML API prediction call in an `@autoreleasepool` block as suggested in #14455 and [here](https://developer.apple.com/forums/thread/692425). Conservatively wrapping all CoreML API usage.

- Use MLModelConfiguration.computeUnits instead of deprecated MLPredictionOptions.usesCPUOnly (originally in #11382).
This commit is contained in:
Edward Chen 2023-02-21 14:08:03 -08:00 committed by GitHub
parent 973aaf110b
commit 755161100a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -190,7 +190,9 @@ asm(".linker_option \"-framework\", \"CoreML\"");
compiled_model_path_ = [compileUrl path];
MLModelConfiguration* config = [MLModelConfiguration alloc];
config.computeUnits = MLComputeUnitsAll;
config.computeUnits = (coreml_flags_ & COREML_FLAG_USE_CPU_ONLY)
? MLComputeUnitsCPUOnly
: MLComputeUnitsAll;
_model = [MLModel modelWithContentsOfURL:compileUrl configuration:config error:&error];
if (error != NULL) {
@ -216,7 +218,6 @@ asm(".linker_option \"-framework\", \"CoreML\"");
}
MLPredictionOptions* options = [[MLPredictionOptions alloc] init];
options.usesCPUOnly = coreml_flags_ & COREML_FLAG_USE_CPU_ONLY;
NSError* error = nil;
id<MLFeatureProvider> output_feature = [_model predictionFromFeatures:input_feature
options:options
@ -268,7 +269,7 @@ asm(".linker_option \"-framework\", \"CoreML\"");
break;
}
// For this case, since Coreml Spec only uses int32 for model output while onnx provides
// int64 for model output data type. We are doing a type casting (int32 -> int64) here
// int64 for model output data type. We are doing a type casting (int32 -> int64) here
// when copying the model to ORT
case ONNX_NAMESPACE::TensorProto_DataType_INT64:
if (model_output_type == MLMultiArrayDataTypeInt32) {
@ -313,9 +314,11 @@ class Execution {
};
Execution::Execution(const std::string& path, const logging::Logger& logger, uint32_t coreml_flags) {
execution_ = [[CoreMLExecution alloc] initWithPath:path
logger:logger
coreml_flags:coreml_flags];
@autoreleasepool {
execution_ = [[CoreMLExecution alloc] initWithPath:path
logger:logger
coreml_flags:coreml_flags];
}
}
Status Execution::LoadModel() {
@ -324,7 +327,10 @@ Status Execution::LoadModel() {
}
if (HAS_VALID_BASE_OS_VERSION) {
auto status = [execution_ loadModel];
Status status{};
@autoreleasepool {
status = [execution_ loadModel];
}
model_loaded = status.IsOK();
return status;
}
@ -337,7 +343,9 @@ Status Execution::Predict(const std::unordered_map<std::string, OnnxTensorData>&
ORT_RETURN_IF_NOT(model_loaded, "Execution::Predict requires Execution::LoadModel");
if (HAS_VALID_BASE_OS_VERSION) {
return [execution_ predict:inputs outputs:outputs];
@autoreleasepool {
return [execution_ predict:inputs outputs:outputs];
}
}
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Execution::LoadModel requires macos 10.15+ or ios 13+ ");
@ -371,4 +379,4 @@ const OnnxTensorInfo& Model::GetInputOutputInfo(const std::string& name) const {
}
} // namespace coreml
} // namespace onnxruntime
} // namespace onnxruntime