mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-07 00:13:17 +00:00
Refactor some for the calibate.py. Add QLinearAdd and QLinearMul support. Fix bugs loading jpgs not strict RGB, and typoes in load_batch call. (#3542)
This commit is contained in:
parent
db9566f70d
commit
c365822808
3 changed files with 89 additions and 15 deletions
|
|
@ -21,21 +21,24 @@ import re
|
|||
import subprocess
|
||||
import json
|
||||
|
||||
|
||||
def augment_graph(model):
|
||||
def augment_graph(model, quantization_candidates=['Conv', 'MatMul'], black_nodes=[], white_nodes=[]):
|
||||
'''
|
||||
Adds ReduceMin and ReduceMax nodes to all Conv and MatMul nodes in
|
||||
Adds ReduceMin and ReduceMax nodes to all quantization_candidates op type nodes in
|
||||
model and ensures their outputs are stored as part of the graph output
|
||||
parameter model: loaded FP32 ONNX model to quantize
|
||||
parameter quantization_candidates: node op types for nodes to be quantized.
|
||||
Calibraton will be done for them.
|
||||
parameter black_nodes: nodes with these names will be force ignored by this
|
||||
calibration augmentation, no mather what's their op type.
|
||||
parameter white_nodes: nodes with these names will be force to be calibration augmented.
|
||||
return: augmented ONNX model
|
||||
'''
|
||||
# Candidate nodes for quantization. Calibration will be done for these nodes only
|
||||
# When more nodes are extended to support quantization, add them to this list
|
||||
quantization_candidates = ['Conv', 'MatMul']
|
||||
|
||||
added_nodes = []
|
||||
added_outputs = []
|
||||
for node in model.graph.node:
|
||||
if node.op_type in quantization_candidates:
|
||||
should_be_calibrate = ((node.op_type in quantization_candidates) and (node.name not in black_nodes)) or (node.name in white_nodes)
|
||||
if should_be_calibrate:
|
||||
input_name = node.output[0]
|
||||
# Adding ReduceMin nodes
|
||||
reduce_min_name = ''
|
||||
|
|
@ -217,6 +220,15 @@ def main():
|
|||
parser = argparse.ArgumentParser(description='parsing model and test data set paths')
|
||||
parser.add_argument('--model_path', required=True)
|
||||
parser.add_argument('--dataset_path', required=True)
|
||||
parser.add_argument('--force_fusions', default=False, action='store_true')
|
||||
parser.add_argument('--op_types', type=str, default='Conv,MatMul',
|
||||
help='comma delimited operator types to be calibrated and quantized')
|
||||
parser.add_argument('--black_nodes', type=str, default='',
|
||||
help='comma delimited operator names that should not be quantized')
|
||||
parser.add_argument('--white_nodes', type=str, default='',
|
||||
help='comma delimited operator names force to be quantized')
|
||||
parser.add_argument('--augmented_model_path', type=str, default = 'augmented_model.onnx',
|
||||
help='save augmented model to this file for verification purpose')
|
||||
parser.add_argument('--output_model_path', type=str, default='calibrated_quantized_model.onnx')
|
||||
parser.add_argument('--dataset_size',
|
||||
type=int,
|
||||
|
|
@ -228,6 +240,9 @@ def main():
|
|||
choices=['preprocess_method1', 'preprocess_method2', 'None'],
|
||||
help="Refer to Readme.md for guidance on choosing this option.")
|
||||
args = parser.parse_args()
|
||||
calibrate_op_types = args.op_types.split(',')
|
||||
black_nodes = args.black_nodes.split(',')
|
||||
white_nodes = args.white_nodes.split(',')
|
||||
model_path = args.model_path
|
||||
output_model_path = args.output_model_path
|
||||
images_folder = args.dataset_path
|
||||
|
|
@ -235,25 +250,25 @@ def main():
|
|||
size_limit = args.dataset_size
|
||||
|
||||
# Generating augmented ONNX model
|
||||
augmented_model_path = 'augmented_model.onnx'
|
||||
model = onnx.load(model_path)
|
||||
augmented_model = augment_graph(model)
|
||||
onnx.save(augmented_model, augmented_model_path)
|
||||
augmented_model = augment_graph(model, calibrate_op_types, black_nodes, white_nodes)
|
||||
onnx.save(augmented_model, args.augmented_model_path)
|
||||
|
||||
# Conducting inference
|
||||
session = onnxruntime.InferenceSession(augmented_model_path, None)
|
||||
session = onnxruntime.InferenceSession(args.augmented_model_path, None)
|
||||
(samples, channels, height, width) = session.get_inputs()[0].shape
|
||||
|
||||
# Generating inputs for quantization
|
||||
if args.data_preprocess == "None":
|
||||
inputs = load_pb_file(images_folder, args.dataset_size, samples, channels, height, width)
|
||||
else:
|
||||
inputs = load_batch(images_folder, height, width, size_limit, args.data_preprocess)
|
||||
inputs = load_batch(images_folder, height, width, args.data_preprocess, size_limit)
|
||||
print(inputs.shape)
|
||||
dict_for_quantization = get_intermediate_outputs(model_path, session, inputs, calib_mode)
|
||||
quantization_params_dict = calculate_quantization_params(model, quantization_thresholds=dict_for_quantization)
|
||||
calibrated_quantized_model = quantize(onnx.load(model_path),
|
||||
quantization_mode=QuantizationMode.QLinearOps,
|
||||
force_fusions=args.force_fusions,
|
||||
quantization_params=quantization_params_dict)
|
||||
onnx.save(calibrated_quantized_model, output_model_path)
|
||||
|
||||
|
|
|
|||
|
|
@ -30,7 +30,8 @@ def preprocess_method1(image_filepath, height, width):
|
|||
parameter width: image width in pixels
|
||||
return: matrix characterizing image
|
||||
'''
|
||||
pillow_img = Image.open(image_filepath).resize((width, height))
|
||||
pillow_img = Image.new("RGB", (width, height))
|
||||
pillow_img.paste(Image.open(image_filepath).resize((width, height)))
|
||||
input_data = np.float32(pillow_img) / 127.5 - 1.0 # normalization
|
||||
input_data -= np.mean(input_data) # normalization
|
||||
nhwc_data = np.expand_dims(input_data, axis=0)
|
||||
|
|
@ -47,7 +48,8 @@ def preprocess_method2(image_filepath, height, width):
|
|||
parameter width: image width in pixels
|
||||
return: matrix characterizing image
|
||||
'''
|
||||
pillow_img = Image.open(image_filepath).resize((width, height))
|
||||
pillow_img = Image.new("RGB", (width, height))
|
||||
pillow_img.paste(Image.open(image_filepath).resize((width, height)))
|
||||
input_data = np.float32(pillow_img) - \
|
||||
np.array([123.68, 116.78, 103.94], dtype=np.float32)
|
||||
nhwc_data = np.expand_dims(input_data, axis=0)
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ from onnx import shape_inference
|
|||
__producer__ = "onnx.quantize"
|
||||
__version__ = "0.1.0"
|
||||
onnx_domain = "ai.onnx"
|
||||
ms_domain = "com.microsoft"
|
||||
onnx_op_set_version = 11
|
||||
|
||||
type_to_name = {
|
||||
|
|
@ -314,6 +315,8 @@ class ONNXQuantizer:
|
|||
new_list += self._quantize_matmul(node, new_list)
|
||||
elif node.op_type == 'Gather' and self._is_valid_quantize_value(node.input[0]):
|
||||
new_list += self._quantize_gather_ops(node, new_list)
|
||||
elif node.op_type == 'Add' or node.op_type == 'Mul':
|
||||
new_list += self._quantize_binary_math_ops(node, new_list)
|
||||
elif node.op_type == 'Relu' or node.op_type == 'Clip':
|
||||
new_list += self._handle_activation_ops(node, new_list)
|
||||
else:
|
||||
|
|
@ -875,7 +878,6 @@ class ONNXQuantizer:
|
|||
List of scale names used for input quantization,
|
||||
List of new QuantizeLinear nodes created)
|
||||
'''
|
||||
assert (node.op_type == "Conv" or node.op_type == "MatMul" or node.op_type == "Gather")
|
||||
|
||||
quantized_input_names = []
|
||||
zero_point_names = []
|
||||
|
|
@ -1018,6 +1020,61 @@ class ONNXQuantizer:
|
|||
|
||||
return []
|
||||
|
||||
def _quantize_binary_math_ops(self, node, new_nodes_list):
|
||||
'''
|
||||
Used when self.mode is QuantizationMode.QLinearOps.
|
||||
Quantize the given binary math op, like Add, Mul, etc, to QLinearAdd, QLinearMul...
|
||||
|
||||
parameter node: Current binary math node
|
||||
parameter new_nodes_list: List of new nodes created before processing current node
|
||||
return: List of nodes in topological order that represents quantized binary math node
|
||||
'''
|
||||
if self.mode is not QuantizationMode.QLinearOps:
|
||||
return self._handle_other_ops(node, new_nodes_list)
|
||||
|
||||
data_found, output_scale_name, output_zp_name, _, _ = \
|
||||
self._get_quantization_params(node.output[0])
|
||||
if (not data_found): # only try to quantize when given quantization parameters for it
|
||||
return self._handle_other_ops(node, new_nodes_list)
|
||||
|
||||
(quantized_input_names, zero_point_names, scale_names, nodes) = \
|
||||
self._quantize_inputs(node, [0, 1], new_nodes_list)
|
||||
|
||||
qlinear_binary_math_output = node.output[0] + "_quantized"
|
||||
qlinear_binary_math_name = ""
|
||||
if node.name != "":
|
||||
qlinear_binary_math_name = node.name + "_quant"
|
||||
kwargs = {}
|
||||
for attribute in node.attribute:
|
||||
kwargs.update(_attribute_to_kwarg(attribute))
|
||||
kwargs["domain"]=ms_domain
|
||||
|
||||
qlinear_binary_math_inputs = []
|
||||
# Input 0
|
||||
qlinear_binary_math_inputs.append(quantized_input_names[0])
|
||||
qlinear_binary_math_inputs.append(scale_names[0])
|
||||
qlinear_binary_math_inputs.append(zero_point_names[0])
|
||||
# Input 1
|
||||
qlinear_binary_math_inputs.append(quantized_input_names[1])
|
||||
qlinear_binary_math_inputs.append(scale_names[1])
|
||||
qlinear_binary_math_inputs.append(zero_point_names[1])
|
||||
|
||||
# Output
|
||||
qlinear_binary_math_inputs.append(output_scale_name)
|
||||
qlinear_binary_math_inputs.append(output_zp_name)
|
||||
|
||||
qlinear_binary_math_node = onnx.helper.make_node(
|
||||
"QLinear" + node.op_type, qlinear_binary_math_inputs,
|
||||
[qlinear_binary_math_output], qlinear_binary_math_name, **kwargs)
|
||||
nodes.append(qlinear_binary_math_node)
|
||||
|
||||
# Create an entry for this quantized value
|
||||
q_output = QuantizedValue(node.output[0], qlinear_binary_math_output, output_scale_name,
|
||||
output_zp_name, QuantizedValueType.Input)
|
||||
self.quantized_value_map[node.output[0]] = q_output
|
||||
|
||||
return nodes
|
||||
|
||||
def _quantize_gather_ops(self, node, new_nodes_list):
|
||||
assert (node.op_type == "Gather")
|
||||
(quantized_input_names, zero_point_names, scale_names, nodes) = \
|
||||
|
|
|
|||
Loading…
Reference in a new issue