mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-30 23:18:20 +00:00
Add option for max intermediate outputs for MinMaxCalibrater (#17029)
### Description <!-- Describe your changes. --> Adds the option to set max_intermediate_outputs for quantization with the MinMaxCalibrater via. extra_options following the structure of existing flags. ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. --> When running quantization with the MinMaxCalibrater with larger datasets, one quickly runs out of memory since it tries to load the full dataset. Since merging and clearing of the intermediate_outputs is already implemented within the Calibrater this simply adds an optional flag to make use of these functions during quantization.
This commit is contained in:
parent
b6bef0f063
commit
742069a8e8
2 changed files with 19 additions and 1 deletions
|
|
@ -224,6 +224,7 @@ class MinMaxCalibrater(CalibraterBase):
|
|||
use_external_data_format=False,
|
||||
moving_average=False,
|
||||
averaging_constant=0.01,
|
||||
max_intermediate_outputs=None,
|
||||
):
|
||||
"""
|
||||
:param model_path: ONNX model to calibrate. It is a model path
|
||||
|
|
@ -233,6 +234,7 @@ class MinMaxCalibrater(CalibraterBase):
|
|||
:param use_external_data_format: use external data format to store model which size is >= 2Gb
|
||||
:param moving_average: compute the moving average of the minimum and maximum values instead of the global minimum and maximum.
|
||||
:param averaging_constant: constant smoothing factor to use when computing the moving average.
|
||||
:param max_intermediate_outputs: maximum number of intermediate outputs before an intermediate range is computed.
|
||||
"""
|
||||
super().__init__(
|
||||
model_path,
|
||||
|
|
@ -249,6 +251,7 @@ class MinMaxCalibrater(CalibraterBase):
|
|||
if moving_average and (averaging_constant < 0 or averaging_constant > 1):
|
||||
raise ValueError("Invalid averaging constant, which should not be < 0 or > 1.")
|
||||
self.averaging_constant = averaging_constant
|
||||
self.max_intermediate_outputs = max_intermediate_outputs
|
||||
|
||||
def augment_graph(self):
|
||||
"""
|
||||
|
|
@ -302,8 +305,14 @@ class MinMaxCalibrater(CalibraterBase):
|
|||
if not inputs:
|
||||
break
|
||||
self.intermediate_outputs.append(self.infer_session.run(None, inputs))
|
||||
if (
|
||||
self.max_intermediate_outputs is not None
|
||||
and len(self.intermediate_outputs) == self.max_intermediate_outputs
|
||||
):
|
||||
self.compute_range()
|
||||
self.clear_collected_data()
|
||||
|
||||
if len(self.intermediate_outputs) == 0:
|
||||
if len(self.intermediate_outputs) == 0 and self.calibrate_tensors_range is None:
|
||||
raise ValueError("No data is collected.")
|
||||
|
||||
t = self.compute_data()
|
||||
|
|
@ -1011,6 +1020,9 @@ def create_calibrator(
|
|||
symmetric = False if "symmetric" not in extra_options else extra_options["symmetric"]
|
||||
moving_average = False if "moving_average" not in extra_options else extra_options["moving_average"]
|
||||
averaging_constant = 0.01 if "averaging_constant" not in extra_options else extra_options["averaging_constant"]
|
||||
max_intermediate_outputs = (
|
||||
None if "max_intermediate_outputs" not in extra_options else extra_options["max_intermediate_outputs"]
|
||||
)
|
||||
calibrator = MinMaxCalibrater(
|
||||
model,
|
||||
op_types_to_calibrate,
|
||||
|
|
@ -1019,6 +1031,7 @@ def create_calibrator(
|
|||
symmetric=symmetric,
|
||||
moving_average=moving_average,
|
||||
averaging_constant=averaging_constant,
|
||||
max_intermediate_outputs=max_intermediate_outputs,
|
||||
)
|
||||
elif calibrate_method == CalibrationMethod.Entropy:
|
||||
# default settings for entropy algorithm
|
||||
|
|
|
|||
|
|
@ -351,6 +351,10 @@ def quantize_static(
|
|||
Default is 0.01. Constant smoothing factor to use when computing the moving average of the
|
||||
minimum and maximum values. Effective only when the calibration method selected is MinMax and
|
||||
when CalibMovingAverage is set to True.
|
||||
CalibMaxIntermediateOutputs = Optional[int] :
|
||||
Default is None. If set to an integer, during calculation of the min-max range of the tensors
|
||||
it will load at max value number of outputs before computing and merging the range. This will
|
||||
produce the same result as all computing with None, but is more memory efficient.
|
||||
SmoothQuant = True/False :
|
||||
Default is False. If enabled, SmoothQuant algorithm will be applied before quantization to do
|
||||
fake input channel quantization.
|
||||
|
|
@ -396,6 +400,7 @@ def quantize_static(
|
|||
("CalibTensorRangeSymmetric", "symmetric"),
|
||||
("CalibMovingAverage", "moving_average"),
|
||||
("CalibMovingAverageConstant", "averaging_constant"),
|
||||
("CalibMaxIntermediateOutputs", "max_intermediate_outputs"),
|
||||
]
|
||||
calib_extra_options = {
|
||||
key: extra_options.get(name) for (name, key) in calib_extra_options_keys if name in extra_options
|
||||
|
|
|
|||
Loading…
Reference in a new issue