mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-16 21:00:14 +00:00
* Add initial helper for optimizing a QDQ format model for usage with ORT. If a DQ node has multiple consumers it will end up in multiple QDQ node units. This is complicated to handle as each qdq unit could end up being handled by different execution providers. By duplicating the DQ node we simplify this logic. Generally the duplicate nodes will disappear when the qdq node unit is converted to a single node with a quantized operator. If there are qdq node units that are not able to be converted to use a quantized operator the ORT cleanup (pending) to drop remaining Q->DQ pairs between fp32 nodes can remove any remaining DQ nodes. * Fix pep8 warning Co-authored-by: Guoyu Wang <wanggy@outlook.com>
34 lines
1.1 KiB
Python
34 lines
1.1 KiB
Python
#!/usr/bin/env python3
|
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
# Licensed under the MIT License.
|
|
|
|
import argparse
|
|
import onnx
|
|
import os
|
|
import pathlib
|
|
|
|
from .qdq_model_utils import fix_dq_nodes_with_multiple_consumers
|
|
|
|
|
|
def optimize_qdq_model():
|
|
parser = argparse.ArgumentParser(os.path.basename(__file__),
|
|
description='''
|
|
Update a QDQ format ONNX model to ensure optimal performance when executed using
|
|
ONNX Runtime.
|
|
''')
|
|
|
|
parser.add_argument('input_model', type=pathlib.Path, help='Provide path to ONNX model to update.')
|
|
parser.add_argument('output_model', type=pathlib.Path, help='Provide path to write updated ONNX model to.')
|
|
|
|
args = parser.parse_args()
|
|
|
|
model = onnx.load(str(args.input_model.resolve(strict=True)))
|
|
|
|
# there's just one utility to run currently but we expect that will grow
|
|
fix_dq_nodes_with_multiple_consumers(model)
|
|
|
|
onnx.save(model, str(args.output_model.resolve()))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
optimize_qdq_model()
|