From 05d6805830ddeff6c4229ade7d1ae07c97d97f9b Mon Sep 17 00:00:00 2001
From: Yufeng Li <liyufeng1987@gmail.com>
Date: Tue, 15 Feb 2022 15:37:21 -0800
Subject: [PATCH] clean up quantization of QAT model (#10549)

---
 .../python/tools/quantization/__init__.py     |  2 +-
 .../tools/quantization/onnx_quantizer.py      | 11 +++-
 .../python/tools/quantization/quantize.py     | 59 +------------------
 3 files changed, 12 insertions(+), 60 deletions(-)

diff --git a/onnxruntime/python/tools/quantization/__init__.py b/onnxruntime/python/tools/quantization/__init__.py
index a040282efa..f823219491 100644
--- a/onnxruntime/python/tools/quantization/__init__.py
+++ b/onnxruntime/python/tools/quantization/__init__.py
@@ -1,4 +1,4 @@
-from .quantize import quantize, quantize_static, quantize_dynamic, quantize_qat
+from .quantize import quantize, quantize_static, quantize_dynamic
 from .quantize import QuantizationMode
 from .calibrate import CalibrationDataReader, CalibraterBase, MinMaxCalibrater, create_calibrator, CalibrationMethod
 from .quant_utils import QuantType, QuantFormat, write_calibration_table
diff --git a/onnxruntime/python/tools/quantization/onnx_quantizer.py b/onnxruntime/python/tools/quantization/onnx_quantizer.py
index 6349bd8cd5..2b2a97971e 100644
--- a/onnxruntime/python/tools/quantization/onnx_quantizer.py
+++ b/onnxruntime/python/tools/quantization/onnx_quantizer.py
@@ -170,6 +170,12 @@ class ONNXQuantizer:
         self.fuse_dynamic_quant = True
         return opset_version
 
+    def has_QDQ_nodes(self):
+        '''
+            Detect if model already has QuantizeLinear or DequantizeLinear.
+        '''
+        return any(node.op_type == 'QuantizeLinear' or node.op_type == 'DequantizeLinear' for node in self.model.nodes())
+
     def remove_fake_quantized_nodes(self):
         '''
             Detect and remove the quantize/dequantizelinear node pairs(fake quantized nodes in Quantization-Aware training)
@@ -270,7 +276,10 @@ class ONNXQuantizer:
                 self.generated_value_names.add(output_name)
 
     def quantize_model(self):
-        self.remove_fake_quantized_nodes()
+        if self.has_QDQ_nodes():
+            logging.warning(
+                "Please check if the model is already quantized."
+                "Note you don't need to quantize a QAT model. OnnxRuntime support to run QAT model directly.")
 
         for node in self.model.nodes():
             # quantize subgraphes if have
diff --git a/onnxruntime/python/tools/quantization/quantize.py b/onnxruntime/python/tools/quantization/quantize.py
index 826d53884f..75ad9a7c99 100644
--- a/onnxruntime/python/tools/quantization/quantize.py
+++ b/onnxruntime/python/tools/quantization/quantize.py
@@ -322,61 +322,4 @@ def quantize_dynamic(model_input: Path,
         extra_options)
 
     quantizer.quantize_model()
-    quantizer.model.save_model_to_file(model_output, use_external_data_format)
-
-
-def quantize_qat(model_input: Path,
-                 model_output: Path,
-                 op_types_to_quantize=[],
-                 per_channel=False,
-                 reduce_range=False,
-                 activation_type=QuantType.QUInt8,
-                 weight_type=QuantType.QUInt8,
-                 nodes_to_quantize=[],
-                 nodes_to_exclude=[],
-                 use_external_data_format=False):
-    '''
-        Given a quantize-aware traning onnx model, create a quantized onnx model and save it into a file
-    :param model_input: file path of model to quantize
-    :param model_output: file path of quantized model
-    :param op_types_to_quantize: specify the types of operators to quantize, like ['Conv'] to quantize Conv only. It quantizes all supported operators by default
-    :param per_channel: quantize weights per channel
-    :param reduce_range: quantize weights with 7-bits. It may improve the accuracy for some models running on non-VNNI machine, especially for per-channel mode
-    :param activation_type: quantization data type of activation
-    :param nodes_to_quantize:
-        List of nodes names to quantize. When this list is not None only the nodes in this list
-        are quantized.
-        example:
-        [
-            'Conv__224',
-            'Conv__252'
-        ]
-    :param nodes_to_exclude:
-        List of nodes names to exclude. The nodes in this list will be excluded from quantization
-        when it is not None.
-    :parma use_external_data_format: option used for large size (>2GB) model. Set to False by default. 
-    '''
-
-    mode = QuantizationMode.IntegerOps
-
-    #optimize the original model
-    optimized_model = optimize_model(Path(model_input))
-
-    if not op_types_to_quantize or len(op_types_to_quantize) == 0:
-        op_types_to_quantize = list(IntegerOpsRegistry.keys())
-
-    quantizer = ONNXQuantizer(
-        optimized_model,
-        per_channel,
-        reduce_range,
-        mode,
-        False,  #static
-        weight_type,
-        activation_type,
-        None,
-        nodes_to_quantize,
-        nodes_to_exclude,
-        op_types_to_quantize)
-
-    quantizer.quantize_model()
-    quantizer.model.save_model_to_file(model_output, use_external_data_format)
+    quantizer.model.save_model_to_file(model_output, use_external_data_format)
\ No newline at end of file