onnxruntime/tools/python/convert_onnx_models_to_ort.py
Scott McKay c84bb9df9f
Add ability to track per operator types in reduced build config. (#6428)
* Add ability to generate configuration that includes required types for individual operators, to allow build size reduction based on that.
  - Add python bindings for ORT format models
    - Add script to update bindings and help info
  - Add parsing of ORT format models
  - Add ability to enable type reduction to config generation
  - Update build.py to only allow operator/type reduction via config
    - simpler to require config to be generated first
    - can't mix a type aware (ORT format model only) and non-type aware config as that may result in insufficient types being enabled
  - Add script to create reduced build config
  - Update CIs
2021-01-29 07:59:51 +10:00

145 lines
7.8 KiB
Python

#!/usr/bin/env python3
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import argparse
import glob
import os
import re
import tempfile
import onnxruntime as ort
def _create_config_file_from_ort_models(optimized_model_path, enable_type_reduction: bool):
config_file_path = os.path.join(optimized_model_path, 'required_operators.config')
print("Creating configuration file for operators required by ORT format models in {}.".format(config_file_path))
from util.ort_format_model import create_config_from_models
create_config_from_models(optimized_model_path, config_file_path, enable_type_reduction)
def _convert(model_path: str, optimization_level: ort.GraphOptimizationLevel, use_nnapi: bool):
models = glob.glob(os.path.join(model_path, '**', '*.onnx'), recursive=True)
if len(models) == 0:
raise ValueError("No .onnx files were found in " + model_path)
# create temp directory to create optimized onnx format models in. currently we need this to create the
# config file with required operators. long term we could potentially do this from the ORT format model,
# however that requires a lot of infrastructure to be able to parse the flatbuffers schema for those files
with tempfile.TemporaryDirectory() as tmpdirname:
for model in models:
model_filename = os.path.basename(model)
# create .optimized.onnx file in temp dir
onnx_target_path = os.path.join(tmpdirname, re.sub('.onnx$', '.optimized.onnx', model_filename))
# create .ort file in same dir as original onnx model
ort_target_path = re.sub('.onnx$', '.ort', model)
so = ort.SessionOptions()
so.optimized_model_filepath = onnx_target_path
so.graph_optimization_level = optimization_level
print("Optimizing ONNX model {}".format(model))
# creating the session will result in the optimized model being saved. we use just the CPU EP for this step
providers = ['CPUExecutionProvider']
_ = ort.InferenceSession(model, sess_options=so, providers=providers)
# special case if we're enabling a compiling EP like NNAPI. we don't currently have a way to read the
# required ops from an ORT format model, so we need an ONNX model that is only optimized to 'basic' level
# to ensure all the nodes that NNAPI may take still exist. we can merge the required operators from that
# with the required operators from an ONNX model optimized to a higher level (if the user requested that).
# we must use this model with creating the ORT format model to maximize the nodes that NNAPI can potentially
# take, so replace onnx_target_path with the new path.
if use_nnapi and \
(optimization_level == ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED or
optimization_level == ort.GraphOptimizationLevel.ORT_ENABLE_ALL):
onnx_target_path = os.path.join(tmpdirname, re.sub('.onnx$', '.optimized.basic.onnx', model_filename))
so.optimized_model_filepath = onnx_target_path
so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_BASIC
_ = ort.InferenceSession(model, sess_options=so, providers=providers)
# Second, convert optimized ONNX model to ORT format
# we enable the compiling EPs when we generate the ORT format model so that we preserve the nodes it may
# take, but allow optimization on any others
if use_nnapi:
# providers are priority based, so register NNAPI first
providers.insert(0, 'NnapiExecutionProvider')
so.optimized_model_filepath = ort_target_path
# Use original optimization level so that if NNAPI is enabled we optimize nodes it is not taking
so.graph_optimization_level = optimization_level
so.add_session_config_entry('session.save_model_format', 'ORT')
print("Converting optimized ONNX model to ORT format model {}".format(ort_target_path))
_ = ort.InferenceSession(onnx_target_path, sess_options=so, providers=providers)
# orig_size = os.path.getsize(onnx_target_path)
# new_size = os.path.getsize(ort_target_path)
# print("Serialized {} to {}. Sizes: orig={} new={} diff={} new:old={:.4f}:1.0".format(
# onnx_target_path, ort_target_path, orig_size, new_size, new_size - orig_size, new_size / orig_size))
def _get_optimization_level(level):
if level == 'disable':
return ort.GraphOptimizationLevel.ORT_DISABLE_ALL
if level == 'basic':
# Constant folding and other optimizations that only use ONNX operators
return ort.GraphOptimizationLevel.ORT_ENABLE_BASIC
if level == 'extended':
# Optimizations using custom operators, excluding NCHWc optimizations
return ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED
if level == 'all':
# all optimizations, including NCHWc (which has hardware specific logic)
print('WARNING: Enabling layout optimizations is not recommended unless the ORT format model will be executed '
'on the same hardware used to create the model.')
return ort.GraphOptimizationLevel.ORT_ENABLE_ALL
raise ValueError('Invalid optimization level of ' + level)
def parse_args():
parser = argparse.ArgumentParser(
os.path.basename(__file__),
description='''Convert the ONNX format model/s in the provided directory to ORT format models.
All files with a `.onnx` extension will be processed. For each one, an ORT format model will be created in the
same directory. A configuration file will also be created called `required_operators.config`, and will contain
the list of required operators for all converted models.
This configuration file should be used as input to the minimal build via the `--include_ops_by_config`
parameter.
'''
)
parser.add_argument('--use_nnapi', action='store_true',
help='Enable the NNAPI Execution Provider when creating models and determining required '
'operators. Note that this will limit the optimizations possible on nodes that the '
'NNAPI execution provider takes, in order to preserve those nodes in the ORT format '
'model.')
parser.add_argument('--optimization_level', default='extended',
choices=['disable', 'basic', 'extended', 'all'],
help="Level to optimize ONNX model with, prior to converting to ORT format model. "
"These map to the onnxruntime.GraphOptimizationLevel values. "
"NOTE: It is NOT recommended to use 'all' unless you are creating the ORT format model on "
"the device you will run it on, as the generated model may not be valid on other hardware."
)
parser.add_argument('--enable_type_reduction', action='store_true',
help='Add operator specific type information to the configuration file to potentially reduce '
'the types supported by individual operator implementations.')
parser.add_argument('model_path', help='Provide path to directory containing ONNX model/s to convert. '
'Files with .onnx extension will be processed.')
return parser.parse_args()
def main():
args = parse_args()
optimization_level = _get_optimization_level(args.optimization_level)
_convert(args.model_path, optimization_level, args.use_nnapi)
_create_config_file_from_ort_models(args.model_path, args.enable_type_reduction)
if __name__ == '__main__':
main()