Add option for max intermediate outputs for MinMaxCalibrater (#17029)

### Description  Adds the option to set max_intermediate_outputs for quantization with the MinMaxCalibrater via. extra_options following the structure of existing flags. ### Motivation and Context  When running quantization with the MinMaxCalibrater with larger datasets, one quickly runs out of memory since it tries to load the full dataset. Since merging and clearing of the intermediate_outputs is already implemented within the Calibrater this simply adds an optional flag to make use of these functions during quantization.
2026-07-16 18:31:27 +00:00 · 2023-10-05 20:43:12 +02:00 · 2023-10-05 20:43:12 +02:00 · 742069a8e8
commit 742069a8e8
parent b6bef0f063
2 changed files with 19 additions and 1 deletions
--- a/onnxruntime/python/tools/quantization/calibrate.py
+++ b/onnxruntime/python/tools/quantization/calibrate.py
@ -224,6 +224,7 @@ class MinMaxCalibrater(CalibraterBase):
        use_external_data_format=False,
        moving_average=False,
        averaging_constant=0.01,
+        max_intermediate_outputs=None,
    ):
        """
        :param model_path: ONNX model to calibrate. It is a model path
@ -233,6 +234,7 @@ class MinMaxCalibrater(CalibraterBase):
        :param use_external_data_format: use external data format to store model which size is >= 2Gb
        :param moving_average: compute the moving average of the minimum and maximum values instead of the global minimum and maximum.
        :param averaging_constant: constant smoothing factor to use when computing the moving average.
+        :param max_intermediate_outputs: maximum number of intermediate outputs before an intermediate range is computed.
        """
        super().__init__(
            model_path,
@ -249,6 +251,7 @@ class MinMaxCalibrater(CalibraterBase):
        if moving_average and (averaging_constant < 0 or averaging_constant > 1):
            raise ValueError("Invalid averaging constant, which should not be < 0 or > 1.")
        self.averaging_constant = averaging_constant
+        self.max_intermediate_outputs = max_intermediate_outputs

    def augment_graph(self):
        """
@ -302,8 +305,14 @@ class MinMaxCalibrater(CalibraterBase):
            if not inputs:
                break
            self.intermediate_outputs.append(self.infer_session.run(None, inputs))
+            if (
+                self.max_intermediate_outputs is not None
+                and len(self.intermediate_outputs) == self.max_intermediate_outputs
+            ):
+                self.compute_range()
+                self.clear_collected_data()

-        if len(self.intermediate_outputs) == 0:
+        if len(self.intermediate_outputs) == 0 and self.calibrate_tensors_range is None:
            raise ValueError("No data is collected.")

        t = self.compute_data()
@ -1011,6 +1020,9 @@ def create_calibrator(
        symmetric = False if "symmetric" not in extra_options else extra_options["symmetric"]
        moving_average = False if "moving_average" not in extra_options else extra_options["moving_average"]
        averaging_constant = 0.01 if "averaging_constant" not in extra_options else extra_options["averaging_constant"]
+        max_intermediate_outputs = (
+            None if "max_intermediate_outputs" not in extra_options else extra_options["max_intermediate_outputs"]
+        )
        calibrator = MinMaxCalibrater(
            model,
            op_types_to_calibrate,
@ -1019,6 +1031,7 @@ def create_calibrator(
            symmetric=symmetric,
            moving_average=moving_average,
            averaging_constant=averaging_constant,
+            max_intermediate_outputs=max_intermediate_outputs,
        )
    elif calibrate_method == CalibrationMethod.Entropy:
        # default settings for entropy algorithm
--- a/onnxruntime/python/tools/quantization/quantize.py
+++ b/onnxruntime/python/tools/quantization/quantize.py
@ -351,6 +351,10 @@ def quantize_static(
                    Default is 0.01. Constant smoothing factor to use when computing the moving average of the
                    minimum and maximum values. Effective only when the calibration method selected is MinMax and
                    when CalibMovingAverage is set to True.
+                CalibMaxIntermediateOutputs = Optional[int] :
+                    Default is None. If set to an integer, during calculation of the min-max range of the tensors
+                    it will load at max value number of outputs before computing and merging the range. This will
+                    produce the same result as all computing with None, but is more memory efficient.
                SmoothQuant = True/False :
                    Default is False. If enabled, SmoothQuant algorithm will be applied before quantization to do
                    fake input channel quantization.
@ -396,6 +400,7 @@ def quantize_static(
        ("CalibTensorRangeSymmetric", "symmetric"),
        ("CalibMovingAverage", "moving_average"),
        ("CalibMovingAverageConstant", "averaging_constant"),
+        ("CalibMaxIntermediateOutputs", "max_intermediate_outputs"),
    ]
    calib_extra_options = {
        key: extra_options.get(name) for (name, key) in calib_extra_options_keys if name in extra_options