From 500f18badbb36e30d40f6dab9c4bf0fe277c60b8 Mon Sep 17 00:00:00 2001 From: Yufeng Li Date: Tue, 8 Jun 2021 14:01:06 -0700 Subject: [PATCH] fix bug that bias can not be shared across Convs (#7982) --- .../tools/quantization/onnx_quantizer.py | 75 ------------------- .../tools/quantization/operators/conv.py | 58 ++++++++++---- .../python/quantization/test_conv_dynamic.py | 70 +++++++++++++++++ 3 files changed, 114 insertions(+), 89 deletions(-) create mode 100644 onnxruntime/test/python/quantization/test_conv_dynamic.py diff --git a/onnxruntime/python/tools/quantization/onnx_quantizer.py b/onnxruntime/python/tools/quantization/onnx_quantizer.py index 778357b8d5..f0a9def76f 100644 --- a/onnxruntime/python/tools/quantization/onnx_quantizer.py +++ b/onnxruntime/python/tools/quantization/onnx_quantizer.py @@ -461,81 +461,6 @@ class ONNXQuantizer: self.quantized_value_map[input_name] = QuantizedValue(input_name, output_name, scale_name, zp_name, qType) return nodes + [qlinear_node] - def get_bias_add_nodes(self, nodes, node, last_output, quantized_bias_name): - ''' - Given a node, this function handles bias add by adding a "reshape" node on bias and an "add" node - parameter nodes: new nodes would be appended into nodes - parameter node: current node (Conv) - parameter last_output: output of previous node (input to bias add) - return: the name of output - ''' - # Add tensors for the shape to be reshaped to - weight = find_by_name(node.input[1], self.model.initializer()) - if weight is None: - raise ValueError("Expected {} to be an initializer".format(node.input[1])) - - # Add reshape for correct broadcase - reshape_input_data = quantized_bias_name - reshape_input_shape = quantized_bias_name + "_reshape_shape" - reshape_input = [reshape_input_data, reshape_input_shape] - - reshape_shape = np.ones((len(weight.dims)), dtype=np.int64) - reshape_shape[1] = -1 - init_shape = onnx.helper.make_tensor(reshape_input_shape, onnx_proto.TensorProto.INT64, [len(weight.dims)], - reshape_shape) - self.model.add_initializer(init_shape) - - reshape_op_output = node.output[0] + "_reshape" - reshape_node = onnx.helper.make_node("Reshape", reshape_input, [reshape_op_output], - quantized_bias_name + "reshape") - nodes.append(reshape_node) - - # Add an Add operation for bias - bias_add_input = [last_output] - bias_add_input.append(reshape_op_output) - add_node_output = node.output[0] + "_bias_add" - add_node = onnx.helper.make_node("Add", bias_add_input, [add_node_output], quantized_bias_name + "bias_add") - nodes.append(add_node) - return add_node_output - - def quantize_bias_dynamic(self, bias_name, input_name, weight_name, new_node_list): - ''' - Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale - ''' - - # get scale for weight - weight_scale_name = self.quantized_value_map[weight_name].scale_name - weight_initializer = find_by_name(weight_scale_name, self.model.initializer()) - weight_scale = self.tensor_proto_to_array(weight_initializer) - - # get bias - bias_initializer = find_by_name(bias_name, self.model.initializer()) - bias_data = self.tensor_proto_to_array(bias_initializer) - quantized_bias_name = bias_name + "_quantized" - - qType = onnx_proto.TensorProto.INT32 - - input_scale_name = input_name + "_scale" - bias_scale_node = onnx.helper.make_node("Mul", [input_scale_name, weight_scale_name], [bias_name + "_scale"], - bias_name + "_scale_node") - new_node_list.append(bias_scale_node) - - quantize_bias_node = onnx.helper.make_node("Div", [bias_name, bias_scale_node.output[0]], - [bias_name + "_tmp_quant:0"], bias_name + "_tmp_qaunt") - new_node_list.append(quantize_bias_node) - - bias_rounded_node = onnx.helper.make_node("Floor", quantize_bias_node.output, [bias_name + "_quant_rounded:0"], - bias_name + "_quant_rounded") - new_node_list.append(bias_rounded_node) - - bias_cast_node = onnx.helper.make_node("Cast", - bias_rounded_node.output, [quantized_bias_name], - quantized_bias_name + "_node", - to=qType) - new_node_list.append(bias_cast_node) - - return quantized_bias_name - def quantize_bias_static(self, bias_name, input_name, weight_name): ''' Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale diff --git a/onnxruntime/python/tools/quantization/operators/conv.py b/onnxruntime/python/tools/quantization/operators/conv.py index b9793aa238..4999d61d6a 100644 --- a/onnxruntime/python/tools/quantization/operators/conv.py +++ b/onnxruntime/python/tools/quantization/operators/conv.py @@ -1,4 +1,5 @@ import onnx +import numpy as np from .base_operator import QuantOperatorBase from .qdq_base_operator import QDQOperatorBase from ..quant_utils import find_by_name, get_mul_node, QuantizedValue, QuantizedValueType, attribute_to_kwarg, BiasToQuantize @@ -9,6 +10,42 @@ class ConvInteger(QuantOperatorBase): def __init__(self, onnx_quantizer, onnx_node): super().__init__(onnx_quantizer, onnx_node) + def add_bias(self, nodes, scaled_output): + ''' + Given a node, this function handles bias add by adding a "reshape" node on bias and an "add" node + parameter nodes: new nodes would be appended into nodes + parameter node: current node (Conv) + parameter scaled_output: output of quant conv without bias + parameter output: output of Conv + parameter bias_name: bias of Conv + return: the name of output + ''' + node = self.node + model = self.quantizer.model + # Add tensors for the shape to be reshaped to + weight = find_by_name(node.input[1], model.initializer()) + if weight is None: + raise ValueError("Expected {} to be an initializer".format(node.input[1])) + + # Add reshape for correct broadcase + output = node.output[0] + reshape_input_data = node.input[2] # bias of Conv + reshape_input_shape = output + "_bias_reshape_shape" + reshape_output = output + "_bias_reshape_output" + + shape = np.ones((len(weight.dims)), dtype=np.int64) + shape[1] = -1 + init_shape = onnx.helper.make_tensor(reshape_input_shape, onnx_proto.TensorProto.INT64, [len(weight.dims)], + shape) + model.add_initializer(init_shape) + + reshape_node = onnx.helper.make_node("Reshape", [reshape_input_data, reshape_input_shape], [reshape_output]) + nodes.append(reshape_node) + + # Add an Add operation for bias + add_node = onnx.helper.make_node("Add", [scaled_output, reshape_output], [output], output + "_bias_add") + nodes.append(add_node) + def quantize(self): node = self.node assert (node.op_type == "Conv") @@ -16,14 +53,6 @@ class ConvInteger(QuantOperatorBase): (quantized_input_names, zero_point_names, scale_names, nodes) = \ self.quantizer.quantize_inputs(node, [0, 1]) - # quantize bias if exist - quantized_bias_name = "" - bias_present = False - if len(node.input) == 3: - quantized_bias_name = self.quantizer.quantize_bias_dynamic(node.input[2], node.input[0], node.input[1], - nodes) - bias_present = True - conv_integer_output = node.output[0] + "_output_quantized" conv_integer_name = node.name + "_quant" if node.name != "" else "" @@ -34,11 +63,6 @@ class ConvInteger(QuantOperatorBase): [conv_integer_output], conv_integer_name, **kwargs) nodes.append(conv_integer_node) - # Add bias add nodes - if bias_present: - conv_integer_output = self.quantizer.get_bias_add_nodes(nodes, node, conv_integer_output, - quantized_bias_name) - # Add cast operation to cast convInteger output to float. cast_op_output = conv_integer_output + "_cast_output" cast_node = onnx.helper.make_node("Cast", [conv_integer_output], [cast_op_output], @@ -60,10 +84,16 @@ class ConvInteger(QuantOperatorBase): scales_mul_op_output = scales_mul_node.output[0] + has_bias = len(node.input) == 3 + scaled_output_name = node.output[0] if not has_bias else node.output[0] + "quant_scaled_output" + # Add mul operation to multiply mul_scales_op result with output of ConvInteger # and make the output of this node the same as output of original conv node. output_scale_mul_op = conv_integer_name + "_output_scale_mul" if conv_integer_name != "" else "" - nodes.append(get_mul_node([cast_op_output, scales_mul_op_output], node.output[0], output_scale_mul_op)) + nodes.append(get_mul_node([cast_op_output, scales_mul_op_output], scaled_output_name, output_scale_mul_op)) + + if has_bias: + self.add_bias(nodes, scaled_output_name) self.quantizer.new_nodes += nodes diff --git a/onnxruntime/test/python/quantization/test_conv_dynamic.py b/onnxruntime/test/python/quantization/test_conv_dynamic.py new file mode 100644 index 0000000000..a95ac9b50f --- /dev/null +++ b/onnxruntime/test/python/quantization/test_conv_dynamic.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python +# coding: utf-8 +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- + +import unittest +import onnx +import onnxruntime +import numpy as np +from onnx import helper, TensorProto, numpy_helper +from onnxruntime.quantization import quantize_dynamic +from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_type_order + + +def generate_input_initializer(tensor_shape, tensor_dtype, input_name): + ''' + Helper function to generate initializers for test inputs + ''' + tensor = np.random.normal(0, 0.3, tensor_shape).astype(tensor_dtype) + init = numpy_helper.from_array(tensor, input_name) + return init + +class TestONNXModel(unittest.TestCase): + def construct_model(self, model_path): + # input + # / | + # / | + # Conv(1) | + # | | + # Relu Conv(2) + # | | + # \ / + # Add + # | + # (output) + initializers = [] + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, [4, 2, 8, 8]) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, [4, 2, 8, 8]) + + initializers.append(generate_input_initializer([2, 2, 1, 1], np.float32, 'W1')) + initializers.append(generate_input_initializer([2, 2, 1, 1], np.float32, 'W2')) + initializers.append(generate_input_initializer([2], np.float32, 'B')) + conv_node_1 = onnx.helper.make_node('Conv', ['input', 'W1', 'B'], ['Conv1_O'], name='Conv1') + conv_node_2 = onnx.helper.make_node('Conv', ['input', 'W2', 'B'], ['Conv2_O'], name='Conv2') + relu_node = onnx.helper.make_node('Relu', ['Conv1_O'], ['Relu_O'], name='Relu') + add_node = onnx.helper.make_node('Add', ['Relu_O', 'Conv2_O'], ['output'], name='Add') + graph = helper.make_graph([conv_node_1, relu_node, conv_node_2, add_node], + 'onnx_model_test', [input], [output], initializer=initializers) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + onnx.save(model, model_path) + + def dynamic_quant_conv(self, model_fp32_path, model_int8_path): + quantize_dynamic(model_fp32_path, model_int8_path) + quant_nodes = {'ConvInteger' : 2} + check_op_type_count(self, model_int8_path, **quant_nodes) + check_model_correctness(self, model_fp32_path, model_int8_path, {'input': np.random.rand(4, 2, 8, 8).astype(np.float32)}) + + def test_quant_conv(self): + np.random.seed(1) + model_fp32_path = 'conv_bias.fp32.onnx' + model_int8_path = 'conv_bias.quant.onnx' + self.construct_model(model_fp32_path) + + self.dynamic_quant_conv(model_fp32_path, model_int8_path) + +if __name__ == '__main__': + unittest.main()