fix bug that bias can not be shared across Convs (#7982)

2026-07-13 18:08:13 +00:00 · 2021-06-08 14:01:06 -07:00 · 2021-06-08 14:01:06 -07:00 · 500f18badb
commit 500f18badb
parent 66170bfcef
3 changed files with 114 additions and 89 deletions
--- a/onnxruntime/python/tools/quantization/onnx_quantizer.py
+++ b/onnxruntime/python/tools/quantization/onnx_quantizer.py
@ -461,81 +461,6 @@ class ONNXQuantizer:
        self.quantized_value_map[input_name] = QuantizedValue(input_name, output_name, scale_name, zp_name, qType)
        return nodes + [qlinear_node]

-    def get_bias_add_nodes(self, nodes, node, last_output, quantized_bias_name):
-        '''
-        Given a node, this function handles bias add by adding a "reshape" node on bias and an "add" node
-            parameter nodes: new nodes would be appended into nodes
-            parameter node: current node (Conv)
-            parameter last_output: output of previous node (input to bias add)
-            return: the name of output
-        '''
-        # Add tensors for the shape to be reshaped to
-        weight = find_by_name(node.input[1], self.model.initializer())
-        if weight is None:
-            raise ValueError("Expected {} to be an initializer".format(node.input[1]))
-
-        # Add reshape for correct broadcase
-        reshape_input_data = quantized_bias_name
-        reshape_input_shape = quantized_bias_name + "_reshape_shape"
-        reshape_input = [reshape_input_data, reshape_input_shape]
-
-        reshape_shape = np.ones((len(weight.dims)), dtype=np.int64)
-        reshape_shape[1] = -1
-        init_shape = onnx.helper.make_tensor(reshape_input_shape, onnx_proto.TensorProto.INT64, [len(weight.dims)],
-                                             reshape_shape)
-        self.model.add_initializer(init_shape)
-
-        reshape_op_output = node.output[0] + "_reshape"
-        reshape_node = onnx.helper.make_node("Reshape", reshape_input, [reshape_op_output],
-                                             quantized_bias_name + "reshape")
-        nodes.append(reshape_node)
-
-        # Add an Add operation for bias
-        bias_add_input = [last_output]
-        bias_add_input.append(reshape_op_output)
-        add_node_output = node.output[0] + "_bias_add"
-        add_node = onnx.helper.make_node("Add", bias_add_input, [add_node_output], quantized_bias_name + "bias_add")
-        nodes.append(add_node)
-        return add_node_output
-
-    def quantize_bias_dynamic(self, bias_name, input_name, weight_name, new_node_list):
-        '''
-        Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
-        '''
-
-        # get scale for weight
-        weight_scale_name = self.quantized_value_map[weight_name].scale_name
-        weight_initializer = find_by_name(weight_scale_name, self.model.initializer())
-        weight_scale = self.tensor_proto_to_array(weight_initializer)
-
-        # get bias
-        bias_initializer = find_by_name(bias_name, self.model.initializer())
-        bias_data = self.tensor_proto_to_array(bias_initializer)
-        quantized_bias_name = bias_name + "_quantized"
-
-        qType = onnx_proto.TensorProto.INT32
-
-        input_scale_name = input_name + "_scale"
-        bias_scale_node = onnx.helper.make_node("Mul", [input_scale_name, weight_scale_name], [bias_name + "_scale"],
-                                                bias_name + "_scale_node")
-        new_node_list.append(bias_scale_node)
-
-        quantize_bias_node = onnx.helper.make_node("Div", [bias_name, bias_scale_node.output[0]],
-                                                   [bias_name + "_tmp_quant:0"], bias_name + "_tmp_qaunt")
-        new_node_list.append(quantize_bias_node)
-
-        bias_rounded_node = onnx.helper.make_node("Floor", quantize_bias_node.output, [bias_name + "_quant_rounded:0"],
-                                                  bias_name + "_quant_rounded")
-        new_node_list.append(bias_rounded_node)
-
-        bias_cast_node = onnx.helper.make_node("Cast",
-                                               bias_rounded_node.output, [quantized_bias_name],
-                                               quantized_bias_name + "_node",
-                                               to=qType)
-        new_node_list.append(bias_cast_node)
-
-        return quantized_bias_name
-
    def quantize_bias_static(self, bias_name, input_name, weight_name):
        '''
        Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
--- a/onnxruntime/python/tools/quantization/operators/conv.py
+++ b/onnxruntime/python/tools/quantization/operators/conv.py
@ -1,4 +1,5 @@
 import onnx
+import numpy as np
 from .base_operator import QuantOperatorBase
 from .qdq_base_operator import QDQOperatorBase
 from ..quant_utils import find_by_name, get_mul_node, QuantizedValue, QuantizedValueType, attribute_to_kwarg, BiasToQuantize
@ -9,6 +10,42 @@ class ConvInteger(QuantOperatorBase):
    def __init__(self, onnx_quantizer, onnx_node):
        super().__init__(onnx_quantizer, onnx_node)

+    def add_bias(self, nodes, scaled_output):
+        '''
+        Given a node, this function handles bias add by adding a "reshape" node on bias and an "add" node
+            parameter nodes: new nodes would be appended into nodes
+            parameter node: current node (Conv)
+            parameter scaled_output: output of quant conv without bias
+            parameter output: output of Conv
+            parameter bias_name: bias of Conv
+            return: the name of output
+        '''
+        node = self.node
+        model = self.quantizer.model
+        # Add tensors for the shape to be reshaped to
+        weight = find_by_name(node.input[1], model.initializer())
+        if weight is None:
+            raise ValueError("Expected {} to be an initializer".format(node.input[1]))
+
+        # Add reshape for correct broadcase
+        output = node.output[0]
+        reshape_input_data = node.input[2] # bias of Conv
+        reshape_input_shape = output + "_bias_reshape_shape"
+        reshape_output = output + "_bias_reshape_output"
+
+        shape = np.ones((len(weight.dims)), dtype=np.int64)
+        shape[1] = -1
+        init_shape = onnx.helper.make_tensor(reshape_input_shape, onnx_proto.TensorProto.INT64, [len(weight.dims)],
+                                             shape)
+        model.add_initializer(init_shape)
+
+        reshape_node = onnx.helper.make_node("Reshape", [reshape_input_data, reshape_input_shape], [reshape_output])
+        nodes.append(reshape_node)
+
+        # Add an Add operation for bias
+        add_node = onnx.helper.make_node("Add", [scaled_output, reshape_output], [output], output + "_bias_add")
+        nodes.append(add_node)
+
    def quantize(self):
        node = self.node
        assert (node.op_type == "Conv")
@ -16,14 +53,6 @@ class ConvInteger(QuantOperatorBase):
        (quantized_input_names, zero_point_names, scale_names, nodes) = \
            self.quantizer.quantize_inputs(node, [0, 1])

-        # quantize bias if exist
-        quantized_bias_name = ""
-        bias_present = False
-        if len(node.input) == 3:
-            quantized_bias_name = self.quantizer.quantize_bias_dynamic(node.input[2], node.input[0], node.input[1],
-                                                                       nodes)
-            bias_present = True
-
        conv_integer_output = node.output[0] + "_output_quantized"
        conv_integer_name = node.name + "_quant" if node.name != "" else ""

@ -34,11 +63,6 @@ class ConvInteger(QuantOperatorBase):
                                                  [conv_integer_output], conv_integer_name, **kwargs)
        nodes.append(conv_integer_node)

-        # Add bias add nodes
-        if bias_present:
-            conv_integer_output = self.quantizer.get_bias_add_nodes(nodes, node, conv_integer_output,
-                                                                    quantized_bias_name)
-
        # Add cast operation to cast convInteger output to float.
        cast_op_output = conv_integer_output + "_cast_output"
        cast_node = onnx.helper.make_node("Cast", [conv_integer_output], [cast_op_output],
@ -60,10 +84,16 @@ class ConvInteger(QuantOperatorBase):

        scales_mul_op_output = scales_mul_node.output[0]

+        has_bias = len(node.input) == 3
+        scaled_output_name = node.output[0] if not has_bias else node.output[0] + "quant_scaled_output"
+
        # Add mul operation to multiply mul_scales_op result with output of ConvInteger
        # and make the output of this node the same as output of original conv node.
        output_scale_mul_op = conv_integer_name + "_output_scale_mul" if conv_integer_name != "" else ""
-        nodes.append(get_mul_node([cast_op_output, scales_mul_op_output], node.output[0], output_scale_mul_op))
+        nodes.append(get_mul_node([cast_op_output, scales_mul_op_output], scaled_output_name, output_scale_mul_op))
+
+        if has_bias:
+            self.add_bias(nodes, scaled_output_name)

        self.quantizer.new_nodes += nodes

--- a/onnxruntime/test/python/quantization/test_conv_dynamic.py
+++ b/onnxruntime/test/python/quantization/test_conv_dynamic.py
@ -0,0 +1,70 @@
+#!/usr/bin/env python
+# coding: utf-8
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+import unittest
+import onnx
+import onnxruntime
+import numpy as np
+from onnx import helper, TensorProto, numpy_helper
+from onnxruntime.quantization import quantize_dynamic
+from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_type_order
+
+
+def generate_input_initializer(tensor_shape, tensor_dtype, input_name):
+  '''
+  Helper function to generate initializers for test inputs
+  '''
+  tensor = np.random.normal(0, 0.3, tensor_shape).astype(tensor_dtype)
+  init = numpy_helper.from_array(tensor, input_name)
+  return init
+
+class TestONNXModel(unittest.TestCase):
+    def construct_model(self, model_path):
+        #       input
+        #      /    |
+        #     /     |
+        #  Conv(1)  |
+        #     |     |
+        #    Relu  Conv(2)
+        #     |     |
+        #     \     /
+        #       Add
+        #        |
+        #       (output)
+        initializers = []
+        input = helper.make_tensor_value_info('input', TensorProto.FLOAT, [4, 2, 8, 8])
+        output = helper.make_tensor_value_info('output', TensorProto.FLOAT, [4, 2, 8, 8])
+
+        initializers.append(generate_input_initializer([2, 2, 1, 1], np.float32, 'W1'))
+        initializers.append(generate_input_initializer([2, 2, 1, 1], np.float32, 'W2'))
+        initializers.append(generate_input_initializer([2], np.float32, 'B'))
+        conv_node_1 = onnx.helper.make_node('Conv', ['input', 'W1', 'B'], ['Conv1_O'], name='Conv1')
+        conv_node_2 = onnx.helper.make_node('Conv', ['input', 'W2', 'B'], ['Conv2_O'], name='Conv2')
+        relu_node = onnx.helper.make_node('Relu', ['Conv1_O'], ['Relu_O'], name='Relu')
+        add_node = onnx.helper.make_node('Add', ['Relu_O', 'Conv2_O'], ['output'], name='Add')
+        graph = helper.make_graph([conv_node_1, relu_node, conv_node_2, add_node],
+                                  'onnx_model_test', [input], [output], initializer=initializers)
+        model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+        onnx.save(model, model_path)
+
+    def dynamic_quant_conv(self, model_fp32_path, model_int8_path):
+        quantize_dynamic(model_fp32_path, model_int8_path)
+        quant_nodes = {'ConvInteger' : 2}
+        check_op_type_count(self, model_int8_path, **quant_nodes)
+        check_model_correctness(self, model_fp32_path, model_int8_path, {'input': np.random.rand(4, 2, 8, 8).astype(np.float32)})
+
+    def test_quant_conv(self):
+        np.random.seed(1)
+        model_fp32_path = 'conv_bias.fp32.onnx'
+        model_int8_path = 'conv_bias.quant.onnx'
+        self.construct_model(model_fp32_path)
+
+        self.dynamic_quant_conv(model_fp32_path, model_int8_path)
+
+if __name__ == '__main__':
+    unittest.main()