Fix bug in the back to back quantization of matmul and conv (#5264)

* fix bug in the back to back quantization of matmul and conv * fix bug in back to back gather
2026-07-05 04:17:53 +00:00 · 2020-09-23 08:47:20 -07:00 · 2020-09-23 08:47:20 -07:00 · 61ba5b501a
commit 61ba5b501a
parent b5a6a8e847
4 changed files with 4 additions and 4 deletions
--- a/onnxruntime/python/tools/quantization/onnx_model.py
+++ b/onnxruntime/python/tools/quantization/onnx_model.py
@ -127,7 +127,7 @@ class ONNXModel:
                    nodes.append(node)
        return nodes
    
-     def save_model_to_file(self, output_path, use_external_data_format=False):
+    def save_model_to_file(self, output_path, use_external_data_format=False):
        '''
        Save model to external data, which is needed for model size > 2GB
        '''
--- a/onnxruntime/python/tools/quantization/operators/conv.py
+++ b/onnxruntime/python/tools/quantization/operators/conv.py
@ -22,7 +22,7 @@ class ConvInteger(QuantOperatorBase):
            quantized_bias_name = self.quantizer.quantize_bias(node, nodes)
            bias_present = True

-        conv_integer_output = node.output[0] + "_quantized"
+        conv_integer_output = node.output[0] + "_output_quantized"
        conv_integer_name = node.name + "_quant" if node.name != "" else ""

        kwargs = {}
--- a/onnxruntime/python/tools/quantization/operators/gather.py
+++ b/onnxruntime/python/tools/quantization/operators/gather.py
@ -15,7 +15,7 @@ class GatherQuant(QuantOperatorBase):
        node = self.node
        assert (node.op_type == "Gather")
        if (not self.quantizer._is_valid_quantize_weight(node.input[0])):
-            self.quantizer.new_nodes += [node]
+            super().quantize()
            return

        (quantized_input_names, zero_point_names, scale_names, nodes) = \
--- a/onnxruntime/python/tools/quantization/operators/matmul.py
+++ b/onnxruntime/python/tools/quantization/operators/matmul.py
@ -18,7 +18,7 @@ class MatMulInteger(QuantOperatorBase):
        (quantized_input_names, zero_point_names, scale_names, nodes) = \
            self.quantizer.quantize_inputs(node, [0, 1])

-        matmul_integer_output = node.output[0] + "_quantized"
+        matmul_integer_output = node.output[0] + "_output_quantized"
        matmul_integer_name = node.name + "_quant" if node.name != "" else ""
        matmul_integer_node = onnx.helper.make_node("MatMulInteger", quantized_input_names + zero_point_names,
                                                    [matmul_integer_output], matmul_integer_name)