mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-07-03 03:58:54 +00:00
fix bug that bias can not be shared across Convs (#7982)
This commit is contained in:
parent
66170bfcef
commit
500f18badb
3 changed files with 114 additions and 89 deletions
|
|
@ -461,81 +461,6 @@ class ONNXQuantizer:
|
|||
self.quantized_value_map[input_name] = QuantizedValue(input_name, output_name, scale_name, zp_name, qType)
|
||||
return nodes + [qlinear_node]
|
||||
|
||||
def get_bias_add_nodes(self, nodes, node, last_output, quantized_bias_name):
|
||||
'''
|
||||
Given a node, this function handles bias add by adding a "reshape" node on bias and an "add" node
|
||||
parameter nodes: new nodes would be appended into nodes
|
||||
parameter node: current node (Conv)
|
||||
parameter last_output: output of previous node (input to bias add)
|
||||
return: the name of output
|
||||
'''
|
||||
# Add tensors for the shape to be reshaped to
|
||||
weight = find_by_name(node.input[1], self.model.initializer())
|
||||
if weight is None:
|
||||
raise ValueError("Expected {} to be an initializer".format(node.input[1]))
|
||||
|
||||
# Add reshape for correct broadcase
|
||||
reshape_input_data = quantized_bias_name
|
||||
reshape_input_shape = quantized_bias_name + "_reshape_shape"
|
||||
reshape_input = [reshape_input_data, reshape_input_shape]
|
||||
|
||||
reshape_shape = np.ones((len(weight.dims)), dtype=np.int64)
|
||||
reshape_shape[1] = -1
|
||||
init_shape = onnx.helper.make_tensor(reshape_input_shape, onnx_proto.TensorProto.INT64, [len(weight.dims)],
|
||||
reshape_shape)
|
||||
self.model.add_initializer(init_shape)
|
||||
|
||||
reshape_op_output = node.output[0] + "_reshape"
|
||||
reshape_node = onnx.helper.make_node("Reshape", reshape_input, [reshape_op_output],
|
||||
quantized_bias_name + "reshape")
|
||||
nodes.append(reshape_node)
|
||||
|
||||
# Add an Add operation for bias
|
||||
bias_add_input = [last_output]
|
||||
bias_add_input.append(reshape_op_output)
|
||||
add_node_output = node.output[0] + "_bias_add"
|
||||
add_node = onnx.helper.make_node("Add", bias_add_input, [add_node_output], quantized_bias_name + "bias_add")
|
||||
nodes.append(add_node)
|
||||
return add_node_output
|
||||
|
||||
def quantize_bias_dynamic(self, bias_name, input_name, weight_name, new_node_list):
|
||||
'''
|
||||
Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
|
||||
'''
|
||||
|
||||
# get scale for weight
|
||||
weight_scale_name = self.quantized_value_map[weight_name].scale_name
|
||||
weight_initializer = find_by_name(weight_scale_name, self.model.initializer())
|
||||
weight_scale = self.tensor_proto_to_array(weight_initializer)
|
||||
|
||||
# get bias
|
||||
bias_initializer = find_by_name(bias_name, self.model.initializer())
|
||||
bias_data = self.tensor_proto_to_array(bias_initializer)
|
||||
quantized_bias_name = bias_name + "_quantized"
|
||||
|
||||
qType = onnx_proto.TensorProto.INT32
|
||||
|
||||
input_scale_name = input_name + "_scale"
|
||||
bias_scale_node = onnx.helper.make_node("Mul", [input_scale_name, weight_scale_name], [bias_name + "_scale"],
|
||||
bias_name + "_scale_node")
|
||||
new_node_list.append(bias_scale_node)
|
||||
|
||||
quantize_bias_node = onnx.helper.make_node("Div", [bias_name, bias_scale_node.output[0]],
|
||||
[bias_name + "_tmp_quant:0"], bias_name + "_tmp_qaunt")
|
||||
new_node_list.append(quantize_bias_node)
|
||||
|
||||
bias_rounded_node = onnx.helper.make_node("Floor", quantize_bias_node.output, [bias_name + "_quant_rounded:0"],
|
||||
bias_name + "_quant_rounded")
|
||||
new_node_list.append(bias_rounded_node)
|
||||
|
||||
bias_cast_node = onnx.helper.make_node("Cast",
|
||||
bias_rounded_node.output, [quantized_bias_name],
|
||||
quantized_bias_name + "_node",
|
||||
to=qType)
|
||||
new_node_list.append(bias_cast_node)
|
||||
|
||||
return quantized_bias_name
|
||||
|
||||
def quantize_bias_static(self, bias_name, input_name, weight_name):
|
||||
'''
|
||||
Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import onnx
|
||||
import numpy as np
|
||||
from .base_operator import QuantOperatorBase
|
||||
from .qdq_base_operator import QDQOperatorBase
|
||||
from ..quant_utils import find_by_name, get_mul_node, QuantizedValue, QuantizedValueType, attribute_to_kwarg, BiasToQuantize
|
||||
|
|
@ -9,6 +10,42 @@ class ConvInteger(QuantOperatorBase):
|
|||
def __init__(self, onnx_quantizer, onnx_node):
|
||||
super().__init__(onnx_quantizer, onnx_node)
|
||||
|
||||
def add_bias(self, nodes, scaled_output):
|
||||
'''
|
||||
Given a node, this function handles bias add by adding a "reshape" node on bias and an "add" node
|
||||
parameter nodes: new nodes would be appended into nodes
|
||||
parameter node: current node (Conv)
|
||||
parameter scaled_output: output of quant conv without bias
|
||||
parameter output: output of Conv
|
||||
parameter bias_name: bias of Conv
|
||||
return: the name of output
|
||||
'''
|
||||
node = self.node
|
||||
model = self.quantizer.model
|
||||
# Add tensors for the shape to be reshaped to
|
||||
weight = find_by_name(node.input[1], model.initializer())
|
||||
if weight is None:
|
||||
raise ValueError("Expected {} to be an initializer".format(node.input[1]))
|
||||
|
||||
# Add reshape for correct broadcase
|
||||
output = node.output[0]
|
||||
reshape_input_data = node.input[2] # bias of Conv
|
||||
reshape_input_shape = output + "_bias_reshape_shape"
|
||||
reshape_output = output + "_bias_reshape_output"
|
||||
|
||||
shape = np.ones((len(weight.dims)), dtype=np.int64)
|
||||
shape[1] = -1
|
||||
init_shape = onnx.helper.make_tensor(reshape_input_shape, onnx_proto.TensorProto.INT64, [len(weight.dims)],
|
||||
shape)
|
||||
model.add_initializer(init_shape)
|
||||
|
||||
reshape_node = onnx.helper.make_node("Reshape", [reshape_input_data, reshape_input_shape], [reshape_output])
|
||||
nodes.append(reshape_node)
|
||||
|
||||
# Add an Add operation for bias
|
||||
add_node = onnx.helper.make_node("Add", [scaled_output, reshape_output], [output], output + "_bias_add")
|
||||
nodes.append(add_node)
|
||||
|
||||
def quantize(self):
|
||||
node = self.node
|
||||
assert (node.op_type == "Conv")
|
||||
|
|
@ -16,14 +53,6 @@ class ConvInteger(QuantOperatorBase):
|
|||
(quantized_input_names, zero_point_names, scale_names, nodes) = \
|
||||
self.quantizer.quantize_inputs(node, [0, 1])
|
||||
|
||||
# quantize bias if exist
|
||||
quantized_bias_name = ""
|
||||
bias_present = False
|
||||
if len(node.input) == 3:
|
||||
quantized_bias_name = self.quantizer.quantize_bias_dynamic(node.input[2], node.input[0], node.input[1],
|
||||
nodes)
|
||||
bias_present = True
|
||||
|
||||
conv_integer_output = node.output[0] + "_output_quantized"
|
||||
conv_integer_name = node.name + "_quant" if node.name != "" else ""
|
||||
|
||||
|
|
@ -34,11 +63,6 @@ class ConvInteger(QuantOperatorBase):
|
|||
[conv_integer_output], conv_integer_name, **kwargs)
|
||||
nodes.append(conv_integer_node)
|
||||
|
||||
# Add bias add nodes
|
||||
if bias_present:
|
||||
conv_integer_output = self.quantizer.get_bias_add_nodes(nodes, node, conv_integer_output,
|
||||
quantized_bias_name)
|
||||
|
||||
# Add cast operation to cast convInteger output to float.
|
||||
cast_op_output = conv_integer_output + "_cast_output"
|
||||
cast_node = onnx.helper.make_node("Cast", [conv_integer_output], [cast_op_output],
|
||||
|
|
@ -60,10 +84,16 @@ class ConvInteger(QuantOperatorBase):
|
|||
|
||||
scales_mul_op_output = scales_mul_node.output[0]
|
||||
|
||||
has_bias = len(node.input) == 3
|
||||
scaled_output_name = node.output[0] if not has_bias else node.output[0] + "quant_scaled_output"
|
||||
|
||||
# Add mul operation to multiply mul_scales_op result with output of ConvInteger
|
||||
# and make the output of this node the same as output of original conv node.
|
||||
output_scale_mul_op = conv_integer_name + "_output_scale_mul" if conv_integer_name != "" else ""
|
||||
nodes.append(get_mul_node([cast_op_output, scales_mul_op_output], node.output[0], output_scale_mul_op))
|
||||
nodes.append(get_mul_node([cast_op_output, scales_mul_op_output], scaled_output_name, output_scale_mul_op))
|
||||
|
||||
if has_bias:
|
||||
self.add_bias(nodes, scaled_output_name)
|
||||
|
||||
self.quantizer.new_nodes += nodes
|
||||
|
||||
|
|
|
|||
70
onnxruntime/test/python/quantization/test_conv_dynamic.py
Normal file
70
onnxruntime/test/python/quantization/test_conv_dynamic.py
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
# -------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License. See License.txt in the project root for
|
||||
# license information.
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
import unittest
|
||||
import onnx
|
||||
import onnxruntime
|
||||
import numpy as np
|
||||
from onnx import helper, TensorProto, numpy_helper
|
||||
from onnxruntime.quantization import quantize_dynamic
|
||||
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_type_order
|
||||
|
||||
|
||||
def generate_input_initializer(tensor_shape, tensor_dtype, input_name):
|
||||
'''
|
||||
Helper function to generate initializers for test inputs
|
||||
'''
|
||||
tensor = np.random.normal(0, 0.3, tensor_shape).astype(tensor_dtype)
|
||||
init = numpy_helper.from_array(tensor, input_name)
|
||||
return init
|
||||
|
||||
class TestONNXModel(unittest.TestCase):
|
||||
def construct_model(self, model_path):
|
||||
# input
|
||||
# / |
|
||||
# / |
|
||||
# Conv(1) |
|
||||
# | |
|
||||
# Relu Conv(2)
|
||||
# | |
|
||||
# \ /
|
||||
# Add
|
||||
# |
|
||||
# (output)
|
||||
initializers = []
|
||||
input = helper.make_tensor_value_info('input', TensorProto.FLOAT, [4, 2, 8, 8])
|
||||
output = helper.make_tensor_value_info('output', TensorProto.FLOAT, [4, 2, 8, 8])
|
||||
|
||||
initializers.append(generate_input_initializer([2, 2, 1, 1], np.float32, 'W1'))
|
||||
initializers.append(generate_input_initializer([2, 2, 1, 1], np.float32, 'W2'))
|
||||
initializers.append(generate_input_initializer([2], np.float32, 'B'))
|
||||
conv_node_1 = onnx.helper.make_node('Conv', ['input', 'W1', 'B'], ['Conv1_O'], name='Conv1')
|
||||
conv_node_2 = onnx.helper.make_node('Conv', ['input', 'W2', 'B'], ['Conv2_O'], name='Conv2')
|
||||
relu_node = onnx.helper.make_node('Relu', ['Conv1_O'], ['Relu_O'], name='Relu')
|
||||
add_node = onnx.helper.make_node('Add', ['Relu_O', 'Conv2_O'], ['output'], name='Add')
|
||||
graph = helper.make_graph([conv_node_1, relu_node, conv_node_2, add_node],
|
||||
'onnx_model_test', [input], [output], initializer=initializers)
|
||||
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||
onnx.save(model, model_path)
|
||||
|
||||
def dynamic_quant_conv(self, model_fp32_path, model_int8_path):
|
||||
quantize_dynamic(model_fp32_path, model_int8_path)
|
||||
quant_nodes = {'ConvInteger' : 2}
|
||||
check_op_type_count(self, model_int8_path, **quant_nodes)
|
||||
check_model_correctness(self, model_fp32_path, model_int8_path, {'input': np.random.rand(4, 2, 8, 8).astype(np.float32)})
|
||||
|
||||
def test_quant_conv(self):
|
||||
np.random.seed(1)
|
||||
model_fp32_path = 'conv_bias.fp32.onnx'
|
||||
model_int8_path = 'conv_bias.quant.onnx'
|
||||
self.construct_model(model_fp32_path)
|
||||
|
||||
self.dynamic_quant_conv(model_fp32_path, model_int8_path)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
Loading…
Reference in a new issue