From fde847473b91c7feeaefc1ee641306a8147fcf0b Mon Sep 17 00:00:00 2001
From: Ella Charlaix <80481427+echarlaix@users.noreply.github.com>
Date: Fri, 4 Mar 2022 23:55:31 +0100
Subject: [PATCH] Add min max moving average calibration method (#10753)

* Add min max moving average calibration method

* Modify the calibration extra options dictionnary creation
---
 .../python/tools/quantization/calibrate.py    | 37 ++++++++++++++++---
 .../python/tools/quantization/quantize.py     | 14 ++++++-
 2 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/onnxruntime/python/tools/quantization/calibrate.py b/onnxruntime/python/tools/quantization/calibrate.py
index 7a9e9d9197..14761a73bb 100644
--- a/onnxruntime/python/tools/quantization/calibrate.py
+++ b/onnxruntime/python/tools/quantization/calibrate.py
@@ -140,19 +140,32 @@ class CalibraterBase:
 
 
 class MinMaxCalibrater(CalibraterBase):
-    def __init__(self, model, op_types_to_calibrate=[], augmented_model_path='augmented_model.onnx', symmetric=False, use_external_data_format=False):
+    def __init__(self, 
+                model,
+                op_types_to_calibrate=[],
+                augmented_model_path='augmented_model.onnx',
+                symmetric=False,
+                use_external_data_format=False,
+                moving_average=False,
+                averaging_constant=0.01):
         '''
         :param model: ONNX model to calibrate. It can be a ModelProto or a model path
         :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
         :param augmented_model_path: save augmented model to this path.
         :param symmetric: make range of tensor symmetric (central point is 0).
         :param use_external_data_format: use external data format to store model which size is >= 2Gb
+        :param moving_average: compute the moving average of the minimum and maximum values instead of the global minimum and maximum.
+        :param averaging_constant: constant smoothing factor to use when computing the moving average.
         '''
         super(MinMaxCalibrater, self).__init__(model, op_types_to_calibrate, augmented_model_path, symmetric, use_external_data_format)
         self.intermediate_outputs = []
         self.calibrate_tensors_range = None
         self.num_model_outputs = len(self.model.graph.output)
         self.model_original_outputs = set(output.name for output in self.model.graph.output)
+        self.moving_average = moving_average
+        if moving_average and (averaging_constant < 0 or averaging_constant > 1):
+            raise ValueError("Invalid averaging constant, which should not be < 0 or > 1.")
+        self.averaging_constant = averaging_constant
 
     def augment_graph(self):
         '''
@@ -222,8 +235,12 @@ class MinMaxCalibrater(CalibraterBase):
             return new_range
 
         for key, value in old_range.items(): 
-            min_value = min(value[0], new_range[key][0])
-            max_value = max(value[1], new_range[key][1])
+            if self.moving_average:
+                min_value = value[0] + self.averaging_constant * (new_range[key][0] - value[0])
+                max_value = value[1] + self.averaging_constant * (new_range[key][1] - value[1])
+            else:
+                min_value = min(value[0], new_range[key][0])
+                max_value = max(value[1], new_range[key][1])
             new_range[key] = (min_value, max_value)
 
         return new_range
@@ -258,8 +275,12 @@ class MinMaxCalibrater(CalibraterBase):
         for i in range(0, len(added_output_names), 2):
             min_value = 0
             max_value = 0
-            min_value_array = min(merged_added_output_dict[added_output_names[i]])
-            max_value_array = max(merged_added_output_dict[added_output_names[i + 1]])
+            if self.moving_average:
+                min_value_array = np.mean(merged_added_output_dict[added_output_names[i]], axis = 0)
+                max_value_array = np.mean(merged_added_output_dict[added_output_names[i + 1]], axis = 0)
+            else:
+                min_value_array = min(merged_added_output_dict[added_output_names[i]])
+                max_value_array = max(merged_added_output_dict[added_output_names[i + 1]])
             if type(min_value_array) == int or min_value_array.size > 0:
                 min_value = float(min_value_array)
             if type(max_value_array) == int or max_value_array.size > 0:
@@ -716,10 +737,14 @@ def create_calibrator(model,
     if calibrate_method == CalibrationMethod.MinMax:
         # default settings for min-max algorithm
         symmetric = False if 'symmetric' not in extra_options else extra_options['symmetric']
+        moving_average = False if 'moving_average' not in extra_options else extra_options['moving_average']
+        averaging_constant = 0.01 if 'averaging_constant' not in extra_options else extra_options['averaging_constant']
         return MinMaxCalibrater(
             model, op_types_to_calibrate, augmented_model_path,
             use_external_data_format=use_external_data_format,
-            symmetric=symmetric
+            symmetric=symmetric,
+            moving_average=moving_average,
+            averaging_constant=averaging_constant
         )
     elif calibrate_method == CalibrationMethod.Entropy:
         # default settings for entropy algorithm
diff --git a/onnxruntime/python/tools/quantization/quantize.py b/onnxruntime/python/tools/quantization/quantize.py
index 9be2774d4b..75962b6402 100644
--- a/onnxruntime/python/tools/quantization/quantize.py
+++ b/onnxruntime/python/tools/quantization/quantize.py
@@ -226,6 +226,11 @@ def quantize_static(model_input,
                                                             If specific op type supports per channel quantization but not explicitly specified with channel axis,
                                                             default channel axis will be used.
             CalibTensorRangeSymmetric = True/False : Default is False. If enabled, the final range of tensor during calibration will be explicitly set to symmetric to central point "0".
+            CalibMovingAverage = True/False : Default is False. If enabled, the moving average of the minimum and maximum values
+                                              will be computed when the calibration method selected is MinMax.
+            CalibMovingAverageConstant = float : Default is 0.01. Constant smoothing factor to use when computing the moving average of
+                                                 the minimum and maximum values. Effective only when the calibration method selected is
+                                                 MinMax and when CalibMovingAverage is set to True.
     '''
 
     mode = QuantizationMode.QLinearOps
@@ -235,7 +240,12 @@ def quantize_static(model_input,
 
     model = load_model(Path(model_input), optimize_model, False)
 
-    calib_extra_options = {} if 'CalibTensorRangeSymmetric' not in extra_options else {'symmetric': extra_options['CalibTensorRangeSymmetric']} 
+    calib_extra_options_keys = [
+        ('CalibTensorRangeSymmetric', 'symmetric'),
+        ('CalibMovingAverage', 'moving_average'),
+        ('CalibMovingAverageConstant', 'averaging_constant')
+    ]
+    calib_extra_options = {key: extra_options.get(name) for (name, key) in calib_extra_options_keys if name in extra_options}
     calibrator = create_calibrator(
         model,
         op_types_to_quantize,
@@ -355,4 +365,4 @@ def quantize_dynamic(model_input: Path,
         extra_options)
 
     quantizer.quantize_model()
-    quantizer.model.save_model_to_file(model_output, use_external_data_format)
\ No newline at end of file
+    quantizer.model.save_model_to_file(model_output, use_external_data_format)