Refactor some for the calibate.py. Add QLinearAdd and QLinearMul support. Fix bugs loading jpgs not strict RGB, and typoes in load_batch call. (#3542)

2026-07-22 19:23:30 +00:00 · 2020-04-18 17:10:55 -07:00 · 2020-04-18 17:10:55 -07:00 · c365822808
commit c365822808
parent db9566f70d
3 changed files with 89 additions and 15 deletions
--- a/onnxruntime/python/tools/quantization/calibrate.py
+++ b/onnxruntime/python/tools/quantization/calibrate.py
@ -21,21 +21,24 @@ import re
 import subprocess
 import json

-
-def augment_graph(model):
+def augment_graph(model, quantization_candidates=['Conv', 'MatMul'], black_nodes=[], white_nodes=[]):
    '''
-    Adds ReduceMin and ReduceMax nodes to all Conv and MatMul nodes in
+    Adds ReduceMin and ReduceMax nodes to all quantization_candidates op type nodes in
    model and ensures their outputs are stored as part of the graph output
        parameter model: loaded FP32 ONNX model to quantize
+        parameter quantization_candidates: node op types for nodes to be quantized.
+                                           Calibraton will be done for them.
+        parameter black_nodes: nodes with these names will be force ignored by this
+                               calibration augmentation, no mather what's their op type.
+        parameter white_nodes: nodes with these names will be force to be calibration augmented.
        return: augmented ONNX model
    '''
-    # Candidate nodes for quantization. Calibration will be done for these nodes only
-    # When more nodes are extended to support quantization, add them to this list
-    quantization_candidates = ['Conv', 'MatMul']
+
    added_nodes = []
    added_outputs = []
    for node in model.graph.node:
-        if node.op_type in quantization_candidates:
+        should_be_calibrate = ((node.op_type in quantization_candidates) and (node.name not in black_nodes)) or (node.name in white_nodes)
+        if should_be_calibrate:
            input_name = node.output[0]
            # Adding ReduceMin nodes
            reduce_min_name = ''
@ -217,6 +220,15 @@ def main():
    parser = argparse.ArgumentParser(description='parsing model and test data set paths')
    parser.add_argument('--model_path', required=True)
    parser.add_argument('--dataset_path', required=True)
+    parser.add_argument('--force_fusions', default=False, action='store_true')
+    parser.add_argument('--op_types', type=str, default='Conv,MatMul',
+                        help='comma delimited operator types to be calibrated and quantized')
+    parser.add_argument('--black_nodes', type=str, default='',
+                        help='comma delimited operator names that should not be quantized')
+    parser.add_argument('--white_nodes', type=str, default='',
+                        help='comma delimited operator names force to be quantized')
+    parser.add_argument('--augmented_model_path', type=str, default = 'augmented_model.onnx',
+                        help='save augmented model to this file for verification purpose')
    parser.add_argument('--output_model_path', type=str, default='calibrated_quantized_model.onnx')
    parser.add_argument('--dataset_size',
                        type=int,
@ -228,6 +240,9 @@ def main():
                        choices=['preprocess_method1', 'preprocess_method2', 'None'],
                        help="Refer to Readme.md for guidance on choosing this option.")
    args = parser.parse_args()
+    calibrate_op_types = args.op_types.split(',')
+    black_nodes = args.black_nodes.split(',')
+    white_nodes = args.white_nodes.split(',')
    model_path = args.model_path
    output_model_path = args.output_model_path
    images_folder = args.dataset_path
@ -235,25 +250,25 @@ def main():
    size_limit = args.dataset_size

    # Generating augmented ONNX model
-    augmented_model_path = 'augmented_model.onnx'
    model = onnx.load(model_path)
-    augmented_model = augment_graph(model)
-    onnx.save(augmented_model, augmented_model_path)
+    augmented_model = augment_graph(model, calibrate_op_types, black_nodes, white_nodes)
+    onnx.save(augmented_model, args.augmented_model_path)

    # Conducting inference
-    session = onnxruntime.InferenceSession(augmented_model_path, None)
+    session = onnxruntime.InferenceSession(args.augmented_model_path, None)
    (samples, channels, height, width) = session.get_inputs()[0].shape

    # Generating inputs for quantization
    if args.data_preprocess == "None":
        inputs = load_pb_file(images_folder, args.dataset_size, samples, channels, height, width)
    else:
-        inputs = load_batch(images_folder, height, width, size_limit, args.data_preprocess)
+        inputs = load_batch(images_folder, height, width, args.data_preprocess, size_limit)
    print(inputs.shape)
    dict_for_quantization = get_intermediate_outputs(model_path, session, inputs, calib_mode)
    quantization_params_dict = calculate_quantization_params(model, quantization_thresholds=dict_for_quantization)
    calibrated_quantized_model = quantize(onnx.load(model_path),
                                          quantization_mode=QuantizationMode.QLinearOps,
+                                          force_fusions=args.force_fusions,
                                          quantization_params=quantization_params_dict)
    onnx.save(calibrated_quantized_model, output_model_path)

--- a/onnxruntime/python/tools/quantization/data_preprocess.py
+++ b/onnxruntime/python/tools/quantization/data_preprocess.py
@ -30,7 +30,8 @@ def preprocess_method1(image_filepath, height, width):
        parameter width: image width in pixels
        return: matrix characterizing image
    '''
-    pillow_img = Image.open(image_filepath).resize((width, height))
+    pillow_img = Image.new("RGB", (width, height))
+    pillow_img.paste(Image.open(image_filepath).resize((width, height)))
    input_data = np.float32(pillow_img) / 127.5 - 1.0  # normalization
    input_data -= np.mean(input_data)  # normalization
    nhwc_data = np.expand_dims(input_data, axis=0)
@ -47,7 +48,8 @@ def preprocess_method2(image_filepath, height, width):
        parameter width: image width in pixels
        return: matrix characterizing image
    '''
-    pillow_img = Image.open(image_filepath).resize((width, height))
+    pillow_img = Image.new("RGB", (width, height))
+    pillow_img.paste(Image.open(image_filepath).resize((width, height)))
    input_data = np.float32(pillow_img) - \
        np.array([123.68, 116.78, 103.94], dtype=np.float32)
    nhwc_data = np.expand_dims(input_data, axis=0)
--- a/onnxruntime/python/tools/quantization/quantize.py
+++ b/onnxruntime/python/tools/quantization/quantize.py
@ -15,6 +15,7 @@ from onnx import shape_inference
 __producer__ = "onnx.quantize"
 __version__ = "0.1.0"
 onnx_domain = "ai.onnx"
+ms_domain = "com.microsoft"
 onnx_op_set_version = 11

 type_to_name = {
@ -314,6 +315,8 @@ class ONNXQuantizer:
                    new_list += self._quantize_matmul(node, new_list)
                elif node.op_type == 'Gather' and self._is_valid_quantize_value(node.input[0]):
                    new_list += self._quantize_gather_ops(node, new_list)
+                elif node.op_type == 'Add' or node.op_type == 'Mul':
+                    new_list += self._quantize_binary_math_ops(node, new_list)
                elif node.op_type == 'Relu' or node.op_type == 'Clip':
                    new_list += self._handle_activation_ops(node, new_list)
                else:
@ -875,7 +878,6 @@ class ONNXQuantizer:
                     List of scale names used for input quantization,
                     List of new QuantizeLinear nodes created)
        '''
-        assert (node.op_type == "Conv" or node.op_type == "MatMul" or node.op_type == "Gather")

        quantized_input_names = []
        zero_point_names = []
@ -1018,6 +1020,61 @@ class ONNXQuantizer:

        return []

+    def _quantize_binary_math_ops(self, node, new_nodes_list):
+        '''
+        Used when self.mode is QuantizationMode.QLinearOps.
+        Quantize the given binary math op, like Add, Mul, etc, to QLinearAdd, QLinearMul...
+
+            parameter node: Current binary math node
+            parameter new_nodes_list: List of new nodes created before processing current node
+            return: List of nodes in topological order that represents quantized binary math node
+        '''
+        if self.mode is not QuantizationMode.QLinearOps:
+            return self._handle_other_ops(node, new_nodes_list)
+
+        data_found, output_scale_name, output_zp_name, _, _ = \
+            self._get_quantization_params(node.output[0])
+        if (not data_found): # only try to quantize when given quantization parameters for it
+            return self._handle_other_ops(node, new_nodes_list)
+
+        (quantized_input_names, zero_point_names, scale_names, nodes) = \
+            self._quantize_inputs(node, [0, 1], new_nodes_list)
+
+        qlinear_binary_math_output = node.output[0] + "_quantized"
+        qlinear_binary_math_name = ""
+        if node.name != "":
+            qlinear_binary_math_name = node.name + "_quant"
+        kwargs = {}
+        for attribute in node.attribute:
+            kwargs.update(_attribute_to_kwarg(attribute))
+        kwargs["domain"]=ms_domain
+
+        qlinear_binary_math_inputs = []
+        # Input 0
+        qlinear_binary_math_inputs.append(quantized_input_names[0])
+        qlinear_binary_math_inputs.append(scale_names[0])
+        qlinear_binary_math_inputs.append(zero_point_names[0])
+        # Input 1
+        qlinear_binary_math_inputs.append(quantized_input_names[1])
+        qlinear_binary_math_inputs.append(scale_names[1])
+        qlinear_binary_math_inputs.append(zero_point_names[1])
+
+        # Output
+        qlinear_binary_math_inputs.append(output_scale_name)
+        qlinear_binary_math_inputs.append(output_zp_name)
+
+        qlinear_binary_math_node = onnx.helper.make_node(
+            "QLinear" + node.op_type, qlinear_binary_math_inputs,
+            [qlinear_binary_math_output], qlinear_binary_math_name, **kwargs)
+        nodes.append(qlinear_binary_math_node)
+
+        # Create an entry for this quantized value
+        q_output = QuantizedValue(node.output[0], qlinear_binary_math_output, output_scale_name,
+                                  output_zp_name, QuantizedValueType.Input)
+        self.quantized_value_map[node.output[0]] = q_output
+
+        return nodes
+
    def _quantize_gather_ops(self, node, new_nodes_list):
        assert (node.op_type == "Gather")
        (quantized_input_names, zero_point_names, scale_names, nodes) = \