Refactor some for the calibate.py. Add QLinearAdd and QLinearMul support. Fix bugs loading jpgs not strict RGB, and typoes in load_batch call. (#3542)

This commit is contained in:
Zhang Lei 2020-04-18 17:10:55 -07:00 committed by GitHub
parent db9566f70d
commit c365822808
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 89 additions and 15 deletions

View file

@ -21,21 +21,24 @@ import re
import subprocess
import json
def augment_graph(model):
def augment_graph(model, quantization_candidates=['Conv', 'MatMul'], black_nodes=[], white_nodes=[]):
'''
Adds ReduceMin and ReduceMax nodes to all Conv and MatMul nodes in
Adds ReduceMin and ReduceMax nodes to all quantization_candidates op type nodes in
model and ensures their outputs are stored as part of the graph output
parameter model: loaded FP32 ONNX model to quantize
parameter quantization_candidates: node op types for nodes to be quantized.
Calibraton will be done for them.
parameter black_nodes: nodes with these names will be force ignored by this
calibration augmentation, no mather what's their op type.
parameter white_nodes: nodes with these names will be force to be calibration augmented.
return: augmented ONNX model
'''
# Candidate nodes for quantization. Calibration will be done for these nodes only
# When more nodes are extended to support quantization, add them to this list
quantization_candidates = ['Conv', 'MatMul']
added_nodes = []
added_outputs = []
for node in model.graph.node:
if node.op_type in quantization_candidates:
should_be_calibrate = ((node.op_type in quantization_candidates) and (node.name not in black_nodes)) or (node.name in white_nodes)
if should_be_calibrate:
input_name = node.output[0]
# Adding ReduceMin nodes
reduce_min_name = ''
@ -217,6 +220,15 @@ def main():
parser = argparse.ArgumentParser(description='parsing model and test data set paths')
parser.add_argument('--model_path', required=True)
parser.add_argument('--dataset_path', required=True)
parser.add_argument('--force_fusions', default=False, action='store_true')
parser.add_argument('--op_types', type=str, default='Conv,MatMul',
help='comma delimited operator types to be calibrated and quantized')
parser.add_argument('--black_nodes', type=str, default='',
help='comma delimited operator names that should not be quantized')
parser.add_argument('--white_nodes', type=str, default='',
help='comma delimited operator names force to be quantized')
parser.add_argument('--augmented_model_path', type=str, default = 'augmented_model.onnx',
help='save augmented model to this file for verification purpose')
parser.add_argument('--output_model_path', type=str, default='calibrated_quantized_model.onnx')
parser.add_argument('--dataset_size',
type=int,
@ -228,6 +240,9 @@ def main():
choices=['preprocess_method1', 'preprocess_method2', 'None'],
help="Refer to Readme.md for guidance on choosing this option.")
args = parser.parse_args()
calibrate_op_types = args.op_types.split(',')
black_nodes = args.black_nodes.split(',')
white_nodes = args.white_nodes.split(',')
model_path = args.model_path
output_model_path = args.output_model_path
images_folder = args.dataset_path
@ -235,25 +250,25 @@ def main():
size_limit = args.dataset_size
# Generating augmented ONNX model
augmented_model_path = 'augmented_model.onnx'
model = onnx.load(model_path)
augmented_model = augment_graph(model)
onnx.save(augmented_model, augmented_model_path)
augmented_model = augment_graph(model, calibrate_op_types, black_nodes, white_nodes)
onnx.save(augmented_model, args.augmented_model_path)
# Conducting inference
session = onnxruntime.InferenceSession(augmented_model_path, None)
session = onnxruntime.InferenceSession(args.augmented_model_path, None)
(samples, channels, height, width) = session.get_inputs()[0].shape
# Generating inputs for quantization
if args.data_preprocess == "None":
inputs = load_pb_file(images_folder, args.dataset_size, samples, channels, height, width)
else:
inputs = load_batch(images_folder, height, width, size_limit, args.data_preprocess)
inputs = load_batch(images_folder, height, width, args.data_preprocess, size_limit)
print(inputs.shape)
dict_for_quantization = get_intermediate_outputs(model_path, session, inputs, calib_mode)
quantization_params_dict = calculate_quantization_params(model, quantization_thresholds=dict_for_quantization)
calibrated_quantized_model = quantize(onnx.load(model_path),
quantization_mode=QuantizationMode.QLinearOps,
force_fusions=args.force_fusions,
quantization_params=quantization_params_dict)
onnx.save(calibrated_quantized_model, output_model_path)

View file

@ -30,7 +30,8 @@ def preprocess_method1(image_filepath, height, width):
parameter width: image width in pixels
return: matrix characterizing image
'''
pillow_img = Image.open(image_filepath).resize((width, height))
pillow_img = Image.new("RGB", (width, height))
pillow_img.paste(Image.open(image_filepath).resize((width, height)))
input_data = np.float32(pillow_img) / 127.5 - 1.0 # normalization
input_data -= np.mean(input_data) # normalization
nhwc_data = np.expand_dims(input_data, axis=0)
@ -47,7 +48,8 @@ def preprocess_method2(image_filepath, height, width):
parameter width: image width in pixels
return: matrix characterizing image
'''
pillow_img = Image.open(image_filepath).resize((width, height))
pillow_img = Image.new("RGB", (width, height))
pillow_img.paste(Image.open(image_filepath).resize((width, height)))
input_data = np.float32(pillow_img) - \
np.array([123.68, 116.78, 103.94], dtype=np.float32)
nhwc_data = np.expand_dims(input_data, axis=0)

View file

@ -15,6 +15,7 @@ from onnx import shape_inference
__producer__ = "onnx.quantize"
__version__ = "0.1.0"
onnx_domain = "ai.onnx"
ms_domain = "com.microsoft"
onnx_op_set_version = 11
type_to_name = {
@ -314,6 +315,8 @@ class ONNXQuantizer:
new_list += self._quantize_matmul(node, new_list)
elif node.op_type == 'Gather' and self._is_valid_quantize_value(node.input[0]):
new_list += self._quantize_gather_ops(node, new_list)
elif node.op_type == 'Add' or node.op_type == 'Mul':
new_list += self._quantize_binary_math_ops(node, new_list)
elif node.op_type == 'Relu' or node.op_type == 'Clip':
new_list += self._handle_activation_ops(node, new_list)
else:
@ -875,7 +878,6 @@ class ONNXQuantizer:
List of scale names used for input quantization,
List of new QuantizeLinear nodes created)
'''
assert (node.op_type == "Conv" or node.op_type == "MatMul" or node.op_type == "Gather")
quantized_input_names = []
zero_point_names = []
@ -1018,6 +1020,61 @@ class ONNXQuantizer:
return []
def _quantize_binary_math_ops(self, node, new_nodes_list):
'''
Used when self.mode is QuantizationMode.QLinearOps.
Quantize the given binary math op, like Add, Mul, etc, to QLinearAdd, QLinearMul...
parameter node: Current binary math node
parameter new_nodes_list: List of new nodes created before processing current node
return: List of nodes in topological order that represents quantized binary math node
'''
if self.mode is not QuantizationMode.QLinearOps:
return self._handle_other_ops(node, new_nodes_list)
data_found, output_scale_name, output_zp_name, _, _ = \
self._get_quantization_params(node.output[0])
if (not data_found): # only try to quantize when given quantization parameters for it
return self._handle_other_ops(node, new_nodes_list)
(quantized_input_names, zero_point_names, scale_names, nodes) = \
self._quantize_inputs(node, [0, 1], new_nodes_list)
qlinear_binary_math_output = node.output[0] + "_quantized"
qlinear_binary_math_name = ""
if node.name != "":
qlinear_binary_math_name = node.name + "_quant"
kwargs = {}
for attribute in node.attribute:
kwargs.update(_attribute_to_kwarg(attribute))
kwargs["domain"]=ms_domain
qlinear_binary_math_inputs = []
# Input 0
qlinear_binary_math_inputs.append(quantized_input_names[0])
qlinear_binary_math_inputs.append(scale_names[0])
qlinear_binary_math_inputs.append(zero_point_names[0])
# Input 1
qlinear_binary_math_inputs.append(quantized_input_names[1])
qlinear_binary_math_inputs.append(scale_names[1])
qlinear_binary_math_inputs.append(zero_point_names[1])
# Output
qlinear_binary_math_inputs.append(output_scale_name)
qlinear_binary_math_inputs.append(output_zp_name)
qlinear_binary_math_node = onnx.helper.make_node(
"QLinear" + node.op_type, qlinear_binary_math_inputs,
[qlinear_binary_math_output], qlinear_binary_math_name, **kwargs)
nodes.append(qlinear_binary_math_node)
# Create an entry for this quantized value
q_output = QuantizedValue(node.output[0], qlinear_binary_math_output, output_scale_name,
output_zp_name, QuantizedValueType.Input)
self.quantized_value_map[node.output[0]] = q_output
return nodes
def _quantize_gather_ops(self, node, new_nodes_list):
assert (node.op_type == "Gather")
(quantized_input_names, zero_point_names, scale_names, nodes) = \