Add option for max intermediate outputs for MinMaxCalibrater (#17029)

### Description
<!-- Describe your changes. -->
Adds the option to set max_intermediate_outputs for quantization with
the MinMaxCalibrater via. extra_options following the structure of
existing flags.


### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->
When running quantization with the MinMaxCalibrater with larger
datasets, one quickly runs out of memory since it tries to load the full
dataset. Since merging and clearing of the intermediate_outputs is
already implemented within the Calibrater this simply adds an optional
flag to make use of these functions during quantization.
This commit is contained in:
Benedikt Hilmes 2023-10-05 20:43:12 +02:00 committed by GitHub
parent b6bef0f063
commit 742069a8e8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 19 additions and 1 deletions

View file

@ -224,6 +224,7 @@ class MinMaxCalibrater(CalibraterBase):
use_external_data_format=False,
moving_average=False,
averaging_constant=0.01,
max_intermediate_outputs=None,
):
"""
:param model_path: ONNX model to calibrate. It is a model path
@ -233,6 +234,7 @@ class MinMaxCalibrater(CalibraterBase):
:param use_external_data_format: use external data format to store model which size is >= 2Gb
:param moving_average: compute the moving average of the minimum and maximum values instead of the global minimum and maximum.
:param averaging_constant: constant smoothing factor to use when computing the moving average.
:param max_intermediate_outputs: maximum number of intermediate outputs before an intermediate range is computed.
"""
super().__init__(
model_path,
@ -249,6 +251,7 @@ class MinMaxCalibrater(CalibraterBase):
if moving_average and (averaging_constant < 0 or averaging_constant > 1):
raise ValueError("Invalid averaging constant, which should not be < 0 or > 1.")
self.averaging_constant = averaging_constant
self.max_intermediate_outputs = max_intermediate_outputs
def augment_graph(self):
"""
@ -302,8 +305,14 @@ class MinMaxCalibrater(CalibraterBase):
if not inputs:
break
self.intermediate_outputs.append(self.infer_session.run(None, inputs))
if (
self.max_intermediate_outputs is not None
and len(self.intermediate_outputs) == self.max_intermediate_outputs
):
self.compute_range()
self.clear_collected_data()
if len(self.intermediate_outputs) == 0:
if len(self.intermediate_outputs) == 0 and self.calibrate_tensors_range is None:
raise ValueError("No data is collected.")
t = self.compute_data()
@ -1011,6 +1020,9 @@ def create_calibrator(
symmetric = False if "symmetric" not in extra_options else extra_options["symmetric"]
moving_average = False if "moving_average" not in extra_options else extra_options["moving_average"]
averaging_constant = 0.01 if "averaging_constant" not in extra_options else extra_options["averaging_constant"]
max_intermediate_outputs = (
None if "max_intermediate_outputs" not in extra_options else extra_options["max_intermediate_outputs"]
)
calibrator = MinMaxCalibrater(
model,
op_types_to_calibrate,
@ -1019,6 +1031,7 @@ def create_calibrator(
symmetric=symmetric,
moving_average=moving_average,
averaging_constant=averaging_constant,
max_intermediate_outputs=max_intermediate_outputs,
)
elif calibrate_method == CalibrationMethod.Entropy:
# default settings for entropy algorithm

View file

@ -351,6 +351,10 @@ def quantize_static(
Default is 0.01. Constant smoothing factor to use when computing the moving average of the
minimum and maximum values. Effective only when the calibration method selected is MinMax and
when CalibMovingAverage is set to True.
CalibMaxIntermediateOutputs = Optional[int] :
Default is None. If set to an integer, during calculation of the min-max range of the tensors
it will load at max value number of outputs before computing and merging the range. This will
produce the same result as all computing with None, but is more memory efficient.
SmoothQuant = True/False :
Default is False. If enabled, SmoothQuant algorithm will be applied before quantization to do
fake input channel quantization.
@ -396,6 +400,7 @@ def quantize_static(
("CalibTensorRangeSymmetric", "symmetric"),
("CalibMovingAverage", "moving_average"),
("CalibMovingAverageConstant", "averaging_constant"),
("CalibMaxIntermediateOutputs", "max_intermediate_outputs"),
]
calib_extra_options = {
key: extra_options.get(name) for (name, key) in calib_extra_options_keys if name in extra_options