From 500f18badbb36e30d40f6dab9c4bf0fe277c60b8 Mon Sep 17 00:00:00 2001
From: Yufeng Li <liyufeng1987@gmail.com>
Date: Tue, 8 Jun 2021 14:01:06 -0700
Subject: [PATCH] fix bug that bias can not be shared across Convs (#7982)

---
 .../tools/quantization/onnx_quantizer.py      | 75 -------------------
 .../tools/quantization/operators/conv.py      | 58 ++++++++++----
 .../python/quantization/test_conv_dynamic.py  | 70 +++++++++++++++++
 3 files changed, 114 insertions(+), 89 deletions(-)
 create mode 100644 onnxruntime/test/python/quantization/test_conv_dynamic.py

diff --git a/onnxruntime/python/tools/quantization/onnx_quantizer.py b/onnxruntime/python/tools/quantization/onnx_quantizer.py
index 778357b8d5..f0a9def76f 100644
--- a/onnxruntime/python/tools/quantization/onnx_quantizer.py
+++ b/onnxruntime/python/tools/quantization/onnx_quantizer.py
@@ -461,81 +461,6 @@ class ONNXQuantizer:
         self.quantized_value_map[input_name] = QuantizedValue(input_name, output_name, scale_name, zp_name, qType)
         return nodes + [qlinear_node]
 
-    def get_bias_add_nodes(self, nodes, node, last_output, quantized_bias_name):
-        '''
-        Given a node, this function handles bias add by adding a "reshape" node on bias and an "add" node
-            parameter nodes: new nodes would be appended into nodes
-            parameter node: current node (Conv)
-            parameter last_output: output of previous node (input to bias add)
-            return: the name of output
-        '''
-        # Add tensors for the shape to be reshaped to
-        weight = find_by_name(node.input[1], self.model.initializer())
-        if weight is None:
-            raise ValueError("Expected {} to be an initializer".format(node.input[1]))
-
-        # Add reshape for correct broadcase
-        reshape_input_data = quantized_bias_name
-        reshape_input_shape = quantized_bias_name + "_reshape_shape"
-        reshape_input = [reshape_input_data, reshape_input_shape]
-
-        reshape_shape = np.ones((len(weight.dims)), dtype=np.int64)
-        reshape_shape[1] = -1
-        init_shape = onnx.helper.make_tensor(reshape_input_shape, onnx_proto.TensorProto.INT64, [len(weight.dims)],
-                                             reshape_shape)
-        self.model.add_initializer(init_shape)
-
-        reshape_op_output = node.output[0] + "_reshape"
-        reshape_node = onnx.helper.make_node("Reshape", reshape_input, [reshape_op_output],
-                                             quantized_bias_name + "reshape")
-        nodes.append(reshape_node)
-
-        # Add an Add operation for bias
-        bias_add_input = [last_output]
-        bias_add_input.append(reshape_op_output)
-        add_node_output = node.output[0] + "_bias_add"
-        add_node = onnx.helper.make_node("Add", bias_add_input, [add_node_output], quantized_bias_name + "bias_add")
-        nodes.append(add_node)
-        return add_node_output
-
-    def quantize_bias_dynamic(self, bias_name, input_name, weight_name, new_node_list):
-        '''
-        Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
-        '''
-
-        # get scale for weight
-        weight_scale_name = self.quantized_value_map[weight_name].scale_name
-        weight_initializer = find_by_name(weight_scale_name, self.model.initializer())
-        weight_scale = self.tensor_proto_to_array(weight_initializer)
-
-        # get bias
-        bias_initializer = find_by_name(bias_name, self.model.initializer())
-        bias_data = self.tensor_proto_to_array(bias_initializer)
-        quantized_bias_name = bias_name + "_quantized"
-
-        qType = onnx_proto.TensorProto.INT32
-
-        input_scale_name = input_name + "_scale"
-        bias_scale_node = onnx.helper.make_node("Mul", [input_scale_name, weight_scale_name], [bias_name + "_scale"],
-                                                bias_name + "_scale_node")
-        new_node_list.append(bias_scale_node)
-
-        quantize_bias_node = onnx.helper.make_node("Div", [bias_name, bias_scale_node.output[0]],
-                                                   [bias_name + "_tmp_quant:0"], bias_name + "_tmp_qaunt")
-        new_node_list.append(quantize_bias_node)
-
-        bias_rounded_node = onnx.helper.make_node("Floor", quantize_bias_node.output, [bias_name + "_quant_rounded:0"],
-                                                  bias_name + "_quant_rounded")
-        new_node_list.append(bias_rounded_node)
-
-        bias_cast_node = onnx.helper.make_node("Cast",
-                                               bias_rounded_node.output, [quantized_bias_name],
-                                               quantized_bias_name + "_node",
-                                               to=qType)
-        new_node_list.append(bias_cast_node)
-
-        return quantized_bias_name
-
     def quantize_bias_static(self, bias_name, input_name, weight_name):
         '''
         Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
diff --git a/onnxruntime/python/tools/quantization/operators/conv.py b/onnxruntime/python/tools/quantization/operators/conv.py
index b9793aa238..4999d61d6a 100644
--- a/onnxruntime/python/tools/quantization/operators/conv.py
+++ b/onnxruntime/python/tools/quantization/operators/conv.py
@@ -1,4 +1,5 @@
 import onnx
+import numpy as np
 from .base_operator import QuantOperatorBase
 from .qdq_base_operator import QDQOperatorBase
 from ..quant_utils import find_by_name, get_mul_node, QuantizedValue, QuantizedValueType, attribute_to_kwarg, BiasToQuantize
@@ -9,6 +10,42 @@ class ConvInteger(QuantOperatorBase):
     def __init__(self, onnx_quantizer, onnx_node):
         super().__init__(onnx_quantizer, onnx_node)
 
+    def add_bias(self, nodes, scaled_output):
+        '''
+        Given a node, this function handles bias add by adding a "reshape" node on bias and an "add" node
+            parameter nodes: new nodes would be appended into nodes
+            parameter node: current node (Conv)
+            parameter scaled_output: output of quant conv without bias
+            parameter output: output of Conv
+            parameter bias_name: bias of Conv
+            return: the name of output
+        '''
+        node = self.node
+        model = self.quantizer.model
+        # Add tensors for the shape to be reshaped to
+        weight = find_by_name(node.input[1], model.initializer())
+        if weight is None:
+            raise ValueError("Expected {} to be an initializer".format(node.input[1]))
+
+        # Add reshape for correct broadcase
+        output = node.output[0]
+        reshape_input_data = node.input[2] # bias of Conv
+        reshape_input_shape = output + "_bias_reshape_shape"
+        reshape_output = output + "_bias_reshape_output"
+
+        shape = np.ones((len(weight.dims)), dtype=np.int64)
+        shape[1] = -1
+        init_shape = onnx.helper.make_tensor(reshape_input_shape, onnx_proto.TensorProto.INT64, [len(weight.dims)],
+                                             shape)
+        model.add_initializer(init_shape)
+
+        reshape_node = onnx.helper.make_node("Reshape", [reshape_input_data, reshape_input_shape], [reshape_output])
+        nodes.append(reshape_node)
+
+        # Add an Add operation for bias
+        add_node = onnx.helper.make_node("Add", [scaled_output, reshape_output], [output], output + "_bias_add")
+        nodes.append(add_node)
+
     def quantize(self):
         node = self.node
         assert (node.op_type == "Conv")
@@ -16,14 +53,6 @@ class ConvInteger(QuantOperatorBase):
         (quantized_input_names, zero_point_names, scale_names, nodes) = \
             self.quantizer.quantize_inputs(node, [0, 1])
 
-        # quantize bias if exist
-        quantized_bias_name = ""
-        bias_present = False
-        if len(node.input) == 3:
-            quantized_bias_name = self.quantizer.quantize_bias_dynamic(node.input[2], node.input[0], node.input[1],
-                                                                       nodes)
-            bias_present = True
-
         conv_integer_output = node.output[0] + "_output_quantized"
         conv_integer_name = node.name + "_quant" if node.name != "" else ""
 
@@ -34,11 +63,6 @@ class ConvInteger(QuantOperatorBase):
                                                   [conv_integer_output], conv_integer_name, **kwargs)
         nodes.append(conv_integer_node)
 
-        # Add bias add nodes
-        if bias_present:
-            conv_integer_output = self.quantizer.get_bias_add_nodes(nodes, node, conv_integer_output,
-                                                                    quantized_bias_name)
-
         # Add cast operation to cast convInteger output to float.
         cast_op_output = conv_integer_output + "_cast_output"
         cast_node = onnx.helper.make_node("Cast", [conv_integer_output], [cast_op_output],
@@ -60,10 +84,16 @@ class ConvInteger(QuantOperatorBase):
 
         scales_mul_op_output = scales_mul_node.output[0]
 
+        has_bias = len(node.input) == 3
+        scaled_output_name = node.output[0] if not has_bias else node.output[0] + "quant_scaled_output"
+
         # Add mul operation to multiply mul_scales_op result with output of ConvInteger
         # and make the output of this node the same as output of original conv node.
         output_scale_mul_op = conv_integer_name + "_output_scale_mul" if conv_integer_name != "" else ""
-        nodes.append(get_mul_node([cast_op_output, scales_mul_op_output], node.output[0], output_scale_mul_op))
+        nodes.append(get_mul_node([cast_op_output, scales_mul_op_output], scaled_output_name, output_scale_mul_op))
+
+        if has_bias:
+            self.add_bias(nodes, scaled_output_name)
 
         self.quantizer.new_nodes += nodes
 
diff --git a/onnxruntime/test/python/quantization/test_conv_dynamic.py b/onnxruntime/test/python/quantization/test_conv_dynamic.py
new file mode 100644
index 0000000000..a95ac9b50f
--- /dev/null
+++ b/onnxruntime/test/python/quantization/test_conv_dynamic.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+# coding: utf-8
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+import unittest
+import onnx
+import onnxruntime
+import numpy as np
+from onnx import helper, TensorProto, numpy_helper
+from onnxruntime.quantization import quantize_dynamic
+from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_type_order
+
+
+def generate_input_initializer(tensor_shape, tensor_dtype, input_name):
+  '''
+  Helper function to generate initializers for test inputs
+  '''
+  tensor = np.random.normal(0, 0.3, tensor_shape).astype(tensor_dtype)
+  init = numpy_helper.from_array(tensor, input_name)
+  return init
+
+class TestONNXModel(unittest.TestCase):
+    def construct_model(self, model_path):
+        #       input
+        #      /    |
+        #     /     |
+        #  Conv(1)  |
+        #     |     |
+        #    Relu  Conv(2)
+        #     |     |
+        #     \     /
+        #       Add
+        #        |
+        #       (output)
+        initializers = []
+        input = helper.make_tensor_value_info('input', TensorProto.FLOAT, [4, 2, 8, 8])
+        output = helper.make_tensor_value_info('output', TensorProto.FLOAT, [4, 2, 8, 8])
+
+        initializers.append(generate_input_initializer([2, 2, 1, 1], np.float32, 'W1'))
+        initializers.append(generate_input_initializer([2, 2, 1, 1], np.float32, 'W2'))
+        initializers.append(generate_input_initializer([2], np.float32, 'B'))
+        conv_node_1 = onnx.helper.make_node('Conv', ['input', 'W1', 'B'], ['Conv1_O'], name='Conv1')
+        conv_node_2 = onnx.helper.make_node('Conv', ['input', 'W2', 'B'], ['Conv2_O'], name='Conv2')
+        relu_node = onnx.helper.make_node('Relu', ['Conv1_O'], ['Relu_O'], name='Relu')
+        add_node = onnx.helper.make_node('Add', ['Relu_O', 'Conv2_O'], ['output'], name='Add')
+        graph = helper.make_graph([conv_node_1, relu_node, conv_node_2, add_node],
+                                  'onnx_model_test', [input], [output], initializer=initializers)
+        model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+        onnx.save(model, model_path)
+
+    def dynamic_quant_conv(self, model_fp32_path, model_int8_path):
+        quantize_dynamic(model_fp32_path, model_int8_path)
+        quant_nodes = {'ConvInteger' : 2}
+        check_op_type_count(self, model_int8_path, **quant_nodes)
+        check_model_correctness(self, model_fp32_path, model_int8_path, {'input': np.random.rand(4, 2, 8, 8).astype(np.float32)})
+
+    def test_quant_conv(self):
+        np.random.seed(1)
+        model_fp32_path = 'conv_bias.fp32.onnx'
+        model_int8_path = 'conv_bias.quant.onnx'
+        self.construct_model(model_fp32_path)
+
+        self.dynamic_quant_conv(model_fp32_path, model_int8_path)
+
+if __name__ == '__main__':
+    unittest.main()