diff --git a/onnxruntime/python/tools/quantization/calibrate.py b/onnxruntime/python/tools/quantization/calibrate.py index 26e74a6dfb..35d9626769 100644 --- a/onnxruntime/python/tools/quantization/calibrate.py +++ b/onnxruntime/python/tools/quantization/calibrate.py @@ -224,6 +224,7 @@ class MinMaxCalibrater(CalibraterBase): use_external_data_format=False, moving_average=False, averaging_constant=0.01, + max_intermediate_outputs=None, ): """ :param model_path: ONNX model to calibrate. It is a model path @@ -233,6 +234,7 @@ class MinMaxCalibrater(CalibraterBase): :param use_external_data_format: use external data format to store model which size is >= 2Gb :param moving_average: compute the moving average of the minimum and maximum values instead of the global minimum and maximum. :param averaging_constant: constant smoothing factor to use when computing the moving average. + :param max_intermediate_outputs: maximum number of intermediate outputs before an intermediate range is computed. """ super().__init__( model_path, @@ -249,6 +251,7 @@ class MinMaxCalibrater(CalibraterBase): if moving_average and (averaging_constant < 0 or averaging_constant > 1): raise ValueError("Invalid averaging constant, which should not be < 0 or > 1.") self.averaging_constant = averaging_constant + self.max_intermediate_outputs = max_intermediate_outputs def augment_graph(self): """ @@ -302,8 +305,14 @@ class MinMaxCalibrater(CalibraterBase): if not inputs: break self.intermediate_outputs.append(self.infer_session.run(None, inputs)) + if ( + self.max_intermediate_outputs is not None + and len(self.intermediate_outputs) == self.max_intermediate_outputs + ): + self.compute_range() + self.clear_collected_data() - if len(self.intermediate_outputs) == 0: + if len(self.intermediate_outputs) == 0 and self.calibrate_tensors_range is None: raise ValueError("No data is collected.") t = self.compute_data() @@ -1011,6 +1020,9 @@ def create_calibrator( symmetric = False if "symmetric" not in extra_options else extra_options["symmetric"] moving_average = False if "moving_average" not in extra_options else extra_options["moving_average"] averaging_constant = 0.01 if "averaging_constant" not in extra_options else extra_options["averaging_constant"] + max_intermediate_outputs = ( + None if "max_intermediate_outputs" not in extra_options else extra_options["max_intermediate_outputs"] + ) calibrator = MinMaxCalibrater( model, op_types_to_calibrate, @@ -1019,6 +1031,7 @@ def create_calibrator( symmetric=symmetric, moving_average=moving_average, averaging_constant=averaging_constant, + max_intermediate_outputs=max_intermediate_outputs, ) elif calibrate_method == CalibrationMethod.Entropy: # default settings for entropy algorithm diff --git a/onnxruntime/python/tools/quantization/quantize.py b/onnxruntime/python/tools/quantization/quantize.py index 706047fe32..0fdd64fdd3 100644 --- a/onnxruntime/python/tools/quantization/quantize.py +++ b/onnxruntime/python/tools/quantization/quantize.py @@ -351,6 +351,10 @@ def quantize_static( Default is 0.01. Constant smoothing factor to use when computing the moving average of the minimum and maximum values. Effective only when the calibration method selected is MinMax and when CalibMovingAverage is set to True. + CalibMaxIntermediateOutputs = Optional[int] : + Default is None. If set to an integer, during calculation of the min-max range of the tensors + it will load at max value number of outputs before computing and merging the range. This will + produce the same result as all computing with None, but is more memory efficient. SmoothQuant = True/False : Default is False. If enabled, SmoothQuant algorithm will be applied before quantization to do fake input channel quantization. @@ -396,6 +400,7 @@ def quantize_static( ("CalibTensorRangeSymmetric", "symmetric"), ("CalibMovingAverage", "moving_average"), ("CalibMovingAverageConstant", "averaging_constant"), + ("CalibMaxIntermediateOutputs", "max_intermediate_outputs"), ] calib_extra_options = { key: extra_options.get(name) for (name, key) in calib_extra_options_keys if name in extra_options