Quantization tool: Allow user to override calibrator's session EP (#23559)

### Description
The quantization calibrators have `execution_providers` attributes but
there is no way for a user to provide their own providers when using the
`quantize` or `quantize_static` functions. This PR adds a
`calibration_providers` parameter to allow users to specify the
execution providers to use during calibration. It is helpful when
quantizing large models which are slow to calibrate on the CPU.
- Chose `calibration_providers` as the name since there is the
docstrings refer to another `execution_provider`
169917b1e7/onnxruntime/python/tools/quantization/quantize.py (L204)

169917b1e7/onnxruntime/python/tools/quantization/quantize.py (L415)
which are not present anywhere in the code.
- Can change the name to something else if needed like
calibrator_providers, and/or make it into a string instead of a
providers list.
This commit is contained in:
Jambay Kinley 2025-02-05 22:38:21 -08:00 committed by GitHub
parent 649ced4a60
commit d1fb58b0f2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 17 additions and 1 deletions

View file

@ -380,7 +380,7 @@ class MinMaxCalibrater(CalibraterBase):
else:
raise ValueError(
f"Unable to guess tensor type for tensor {tensor_name!r}, "
f"running shape inference before quantization may resolve this issue."
"running shape inference before quantization may resolve this issue."
)
# Include axes in reduce_op when per_channel, always keeping axis=1
@ -1177,6 +1177,7 @@ def create_calibrator(
augmented_model_path="augmented_model.onnx",
calibrate_method=CalibrationMethod.MinMax,
use_external_data_format=False,
providers=None,
extra_options={}, # noqa: B006
):
calibrator = None
@ -1243,6 +1244,8 @@ def create_calibrator(
if calibrator:
calibrator.augment_graph()
if providers:
calibrator.execution_providers = providers
calibrator.create_inference_session()
return calibrator

View file

@ -53,6 +53,7 @@ def get_qnn_qdq_config(
weight_symmetric: bool | None = None,
keep_removable_activations: bool = False,
stride: int | None = None,
calibration_providers: list[str] | None = None,
) -> StaticQuantConfig:
"""
Returns a static quantization configuration suitable for running QDQ models on QNN EP.
@ -117,6 +118,8 @@ def get_qnn_qdq_config(
are automatically removed if activations are asymmetrically quantized. Keeping these activations
is necessary if optimizations or EP transformations will later remove
QuantizeLinear/DequantizeLinear operators from the model.
calibration_providers: Execution providers to run the session during calibration. Default is None which uses
[ "CPUExecutionProvider" ].
Returns:
A StaticQuantConfig object
@ -192,6 +195,7 @@ def get_qnn_qdq_config(
op_types_to_quantize=list(op_types.difference(OP_TYPES_TO_EXCLUDE)),
per_channel=per_channel,
use_external_data_format=(model_has_external_data or model.ByteSize() >= MODEL_SIZE_THRESHOLD),
calibration_providers=calibration_providers,
extra_options=extra_options,
)

View file

@ -99,6 +99,7 @@ class StaticQuantConfig(QuantConfig):
per_channel=False,
reduce_range=False,
use_external_data_format=False,
calibration_providers=None,
extra_options=None,
):
"""
@ -112,6 +113,8 @@ class StaticQuantConfig(QuantConfig):
quant_format: QuantFormat{QOperator, QDQ}.
QOperator format quantizes the model with quantized operators directly.
QDQ format quantize the model by inserting QuantizeLinear/DeQuantizeLinear on the tensor.
calibration_providers: Execution providers to run the session during calibration. Default is None which uses
[ "CPUExecutionProvider" ].
extra_options:
key value pair dictionary for various options in different case. Current used:
extra.Sigmoid.nnapi = True/False (Default is False)
@ -219,6 +222,7 @@ class StaticQuantConfig(QuantConfig):
self.calibration_data_reader = calibration_data_reader
self.calibrate_method = calibrate_method
self.quant_format = quant_format
self.calibration_providers = calibration_providers
self.extra_options = extra_options or {}
@ -473,6 +477,7 @@ def quantize_static(
nodes_to_exclude=None,
use_external_data_format=False,
calibrate_method=CalibrationMethod.MinMax,
calibration_providers=None,
extra_options=None,
):
"""
@ -520,6 +525,8 @@ def quantize_static(
List of nodes names to exclude. The nodes in this list will be excluded from quantization
when it is not None.
use_external_data_format: option used for large size (>2GB) model. Set to False by default.
calibration_providers: Execution providers to run the session during calibration. Default is None which uses
[ "CPUExecutionProvider" ]
extra_options:
key value pair dictionary for various options in different case. Current used:
extra.Sigmoid.nnapi = True/False (Default is False)
@ -697,6 +704,7 @@ def quantize_static(
augmented_model_path=Path(quant_tmp_dir).joinpath("augmented_model.onnx").as_posix(),
calibrate_method=calibrate_method,
use_external_data_format=use_external_data_format,
providers=calibration_providers,
extra_options=calib_extra_options,
)
@ -890,6 +898,7 @@ def quantize(
per_channel=quant_config.per_channel,
reduce_range=quant_config.reduce_range,
use_external_data_format=quant_config.use_external_data_format,
calibration_providers=quant_config.calibration_providers,
extra_options=quant_config.extra_options,
)