fix bug that bias can not be shared across Convs (#7982)

This commit is contained in:
Yufeng Li 2021-06-08 14:01:06 -07:00 committed by GitHub
parent 66170bfcef
commit 500f18badb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 114 additions and 89 deletions

View file

@ -461,81 +461,6 @@ class ONNXQuantizer:
self.quantized_value_map[input_name] = QuantizedValue(input_name, output_name, scale_name, zp_name, qType)
return nodes + [qlinear_node]
def get_bias_add_nodes(self, nodes, node, last_output, quantized_bias_name):
'''
Given a node, this function handles bias add by adding a "reshape" node on bias and an "add" node
parameter nodes: new nodes would be appended into nodes
parameter node: current node (Conv)
parameter last_output: output of previous node (input to bias add)
return: the name of output
'''
# Add tensors for the shape to be reshaped to
weight = find_by_name(node.input[1], self.model.initializer())
if weight is None:
raise ValueError("Expected {} to be an initializer".format(node.input[1]))
# Add reshape for correct broadcase
reshape_input_data = quantized_bias_name
reshape_input_shape = quantized_bias_name + "_reshape_shape"
reshape_input = [reshape_input_data, reshape_input_shape]
reshape_shape = np.ones((len(weight.dims)), dtype=np.int64)
reshape_shape[1] = -1
init_shape = onnx.helper.make_tensor(reshape_input_shape, onnx_proto.TensorProto.INT64, [len(weight.dims)],
reshape_shape)
self.model.add_initializer(init_shape)
reshape_op_output = node.output[0] + "_reshape"
reshape_node = onnx.helper.make_node("Reshape", reshape_input, [reshape_op_output],
quantized_bias_name + "reshape")
nodes.append(reshape_node)
# Add an Add operation for bias
bias_add_input = [last_output]
bias_add_input.append(reshape_op_output)
add_node_output = node.output[0] + "_bias_add"
add_node = onnx.helper.make_node("Add", bias_add_input, [add_node_output], quantized_bias_name + "bias_add")
nodes.append(add_node)
return add_node_output
def quantize_bias_dynamic(self, bias_name, input_name, weight_name, new_node_list):
'''
Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
'''
# get scale for weight
weight_scale_name = self.quantized_value_map[weight_name].scale_name
weight_initializer = find_by_name(weight_scale_name, self.model.initializer())
weight_scale = self.tensor_proto_to_array(weight_initializer)
# get bias
bias_initializer = find_by_name(bias_name, self.model.initializer())
bias_data = self.tensor_proto_to_array(bias_initializer)
quantized_bias_name = bias_name + "_quantized"
qType = onnx_proto.TensorProto.INT32
input_scale_name = input_name + "_scale"
bias_scale_node = onnx.helper.make_node("Mul", [input_scale_name, weight_scale_name], [bias_name + "_scale"],
bias_name + "_scale_node")
new_node_list.append(bias_scale_node)
quantize_bias_node = onnx.helper.make_node("Div", [bias_name, bias_scale_node.output[0]],
[bias_name + "_tmp_quant:0"], bias_name + "_tmp_qaunt")
new_node_list.append(quantize_bias_node)
bias_rounded_node = onnx.helper.make_node("Floor", quantize_bias_node.output, [bias_name + "_quant_rounded:0"],
bias_name + "_quant_rounded")
new_node_list.append(bias_rounded_node)
bias_cast_node = onnx.helper.make_node("Cast",
bias_rounded_node.output, [quantized_bias_name],
quantized_bias_name + "_node",
to=qType)
new_node_list.append(bias_cast_node)
return quantized_bias_name
def quantize_bias_static(self, bias_name, input_name, weight_name):
'''
Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale

View file

@ -1,4 +1,5 @@
import onnx
import numpy as np
from .base_operator import QuantOperatorBase
from .qdq_base_operator import QDQOperatorBase
from ..quant_utils import find_by_name, get_mul_node, QuantizedValue, QuantizedValueType, attribute_to_kwarg, BiasToQuantize
@ -9,6 +10,42 @@ class ConvInteger(QuantOperatorBase):
def __init__(self, onnx_quantizer, onnx_node):
super().__init__(onnx_quantizer, onnx_node)
def add_bias(self, nodes, scaled_output):
'''
Given a node, this function handles bias add by adding a "reshape" node on bias and an "add" node
parameter nodes: new nodes would be appended into nodes
parameter node: current node (Conv)
parameter scaled_output: output of quant conv without bias
parameter output: output of Conv
parameter bias_name: bias of Conv
return: the name of output
'''
node = self.node
model = self.quantizer.model
# Add tensors for the shape to be reshaped to
weight = find_by_name(node.input[1], model.initializer())
if weight is None:
raise ValueError("Expected {} to be an initializer".format(node.input[1]))
# Add reshape for correct broadcase
output = node.output[0]
reshape_input_data = node.input[2] # bias of Conv
reshape_input_shape = output + "_bias_reshape_shape"
reshape_output = output + "_bias_reshape_output"
shape = np.ones((len(weight.dims)), dtype=np.int64)
shape[1] = -1
init_shape = onnx.helper.make_tensor(reshape_input_shape, onnx_proto.TensorProto.INT64, [len(weight.dims)],
shape)
model.add_initializer(init_shape)
reshape_node = onnx.helper.make_node("Reshape", [reshape_input_data, reshape_input_shape], [reshape_output])
nodes.append(reshape_node)
# Add an Add operation for bias
add_node = onnx.helper.make_node("Add", [scaled_output, reshape_output], [output], output + "_bias_add")
nodes.append(add_node)
def quantize(self):
node = self.node
assert (node.op_type == "Conv")
@ -16,14 +53,6 @@ class ConvInteger(QuantOperatorBase):
(quantized_input_names, zero_point_names, scale_names, nodes) = \
self.quantizer.quantize_inputs(node, [0, 1])
# quantize bias if exist
quantized_bias_name = ""
bias_present = False
if len(node.input) == 3:
quantized_bias_name = self.quantizer.quantize_bias_dynamic(node.input[2], node.input[0], node.input[1],
nodes)
bias_present = True
conv_integer_output = node.output[0] + "_output_quantized"
conv_integer_name = node.name + "_quant" if node.name != "" else ""
@ -34,11 +63,6 @@ class ConvInteger(QuantOperatorBase):
[conv_integer_output], conv_integer_name, **kwargs)
nodes.append(conv_integer_node)
# Add bias add nodes
if bias_present:
conv_integer_output = self.quantizer.get_bias_add_nodes(nodes, node, conv_integer_output,
quantized_bias_name)
# Add cast operation to cast convInteger output to float.
cast_op_output = conv_integer_output + "_cast_output"
cast_node = onnx.helper.make_node("Cast", [conv_integer_output], [cast_op_output],
@ -60,10 +84,16 @@ class ConvInteger(QuantOperatorBase):
scales_mul_op_output = scales_mul_node.output[0]
has_bias = len(node.input) == 3
scaled_output_name = node.output[0] if not has_bias else node.output[0] + "quant_scaled_output"
# Add mul operation to multiply mul_scales_op result with output of ConvInteger
# and make the output of this node the same as output of original conv node.
output_scale_mul_op = conv_integer_name + "_output_scale_mul" if conv_integer_name != "" else ""
nodes.append(get_mul_node([cast_op_output, scales_mul_op_output], node.output[0], output_scale_mul_op))
nodes.append(get_mul_node([cast_op_output, scales_mul_op_output], scaled_output_name, output_scale_mul_op))
if has_bias:
self.add_bias(nodes, scaled_output_name)
self.quantizer.new_nodes += nodes

View file

@ -0,0 +1,70 @@
#!/usr/bin/env python
# coding: utf-8
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------
import unittest
import onnx
import onnxruntime
import numpy as np
from onnx import helper, TensorProto, numpy_helper
from onnxruntime.quantization import quantize_dynamic
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_type_order
def generate_input_initializer(tensor_shape, tensor_dtype, input_name):
'''
Helper function to generate initializers for test inputs
'''
tensor = np.random.normal(0, 0.3, tensor_shape).astype(tensor_dtype)
init = numpy_helper.from_array(tensor, input_name)
return init
class TestONNXModel(unittest.TestCase):
def construct_model(self, model_path):
# input
# / |
# / |
# Conv(1) |
# | |
# Relu Conv(2)
# | |
# \ /
# Add
# |
# (output)
initializers = []
input = helper.make_tensor_value_info('input', TensorProto.FLOAT, [4, 2, 8, 8])
output = helper.make_tensor_value_info('output', TensorProto.FLOAT, [4, 2, 8, 8])
initializers.append(generate_input_initializer([2, 2, 1, 1], np.float32, 'W1'))
initializers.append(generate_input_initializer([2, 2, 1, 1], np.float32, 'W2'))
initializers.append(generate_input_initializer([2], np.float32, 'B'))
conv_node_1 = onnx.helper.make_node('Conv', ['input', 'W1', 'B'], ['Conv1_O'], name='Conv1')
conv_node_2 = onnx.helper.make_node('Conv', ['input', 'W2', 'B'], ['Conv2_O'], name='Conv2')
relu_node = onnx.helper.make_node('Relu', ['Conv1_O'], ['Relu_O'], name='Relu')
add_node = onnx.helper.make_node('Add', ['Relu_O', 'Conv2_O'], ['output'], name='Add')
graph = helper.make_graph([conv_node_1, relu_node, conv_node_2, add_node],
'onnx_model_test', [input], [output], initializer=initializers)
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
onnx.save(model, model_path)
def dynamic_quant_conv(self, model_fp32_path, model_int8_path):
quantize_dynamic(model_fp32_path, model_int8_path)
quant_nodes = {'ConvInteger' : 2}
check_op_type_count(self, model_int8_path, **quant_nodes)
check_model_correctness(self, model_fp32_path, model_int8_path, {'input': np.random.rand(4, 2, 8, 8).astype(np.float32)})
def test_quant_conv(self):
np.random.seed(1)
model_fp32_path = 'conv_bias.fp32.onnx'
model_int8_path = 'conv_bias.quant.onnx'
self.construct_model(model_fp32_path)
self.dynamic_quant_conv(model_fp32_path, model_int8_path)
if __name__ == '__main__':
unittest.main()