mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-14 20:48:00 +00:00
Quantization tool: Allow user to override calibrator's session EP (#23559)
### Description The quantization calibrators have `execution_providers` attributes but there is no way for a user to provide their own providers when using the `quantize` or `quantize_static` functions. This PR adds a `calibration_providers` parameter to allow users to specify the execution providers to use during calibration. It is helpful when quantizing large models which are slow to calibrate on the CPU. - Chose `calibration_providers` as the name since there is the docstrings refer to another `execution_provider`169917b1e7/onnxruntime/python/tools/quantization/quantize.py (L204)169917b1e7/onnxruntime/python/tools/quantization/quantize.py (L415)which are not present anywhere in the code. - Can change the name to something else if needed like calibrator_providers, and/or make it into a string instead of a providers list.
This commit is contained in:
parent
649ced4a60
commit
d1fb58b0f2
3 changed files with 17 additions and 1 deletions
|
|
@ -380,7 +380,7 @@ class MinMaxCalibrater(CalibraterBase):
|
|||
else:
|
||||
raise ValueError(
|
||||
f"Unable to guess tensor type for tensor {tensor_name!r}, "
|
||||
f"running shape inference before quantization may resolve this issue."
|
||||
"running shape inference before quantization may resolve this issue."
|
||||
)
|
||||
|
||||
# Include axes in reduce_op when per_channel, always keeping axis=1
|
||||
|
|
@ -1177,6 +1177,7 @@ def create_calibrator(
|
|||
augmented_model_path="augmented_model.onnx",
|
||||
calibrate_method=CalibrationMethod.MinMax,
|
||||
use_external_data_format=False,
|
||||
providers=None,
|
||||
extra_options={}, # noqa: B006
|
||||
):
|
||||
calibrator = None
|
||||
|
|
@ -1243,6 +1244,8 @@ def create_calibrator(
|
|||
|
||||
if calibrator:
|
||||
calibrator.augment_graph()
|
||||
if providers:
|
||||
calibrator.execution_providers = providers
|
||||
calibrator.create_inference_session()
|
||||
return calibrator
|
||||
|
||||
|
|
|
|||
|
|
@ -53,6 +53,7 @@ def get_qnn_qdq_config(
|
|||
weight_symmetric: bool | None = None,
|
||||
keep_removable_activations: bool = False,
|
||||
stride: int | None = None,
|
||||
calibration_providers: list[str] | None = None,
|
||||
) -> StaticQuantConfig:
|
||||
"""
|
||||
Returns a static quantization configuration suitable for running QDQ models on QNN EP.
|
||||
|
|
@ -117,6 +118,8 @@ def get_qnn_qdq_config(
|
|||
are automatically removed if activations are asymmetrically quantized. Keeping these activations
|
||||
is necessary if optimizations or EP transformations will later remove
|
||||
QuantizeLinear/DequantizeLinear operators from the model.
|
||||
calibration_providers: Execution providers to run the session during calibration. Default is None which uses
|
||||
[ "CPUExecutionProvider" ].
|
||||
|
||||
Returns:
|
||||
A StaticQuantConfig object
|
||||
|
|
@ -192,6 +195,7 @@ def get_qnn_qdq_config(
|
|||
op_types_to_quantize=list(op_types.difference(OP_TYPES_TO_EXCLUDE)),
|
||||
per_channel=per_channel,
|
||||
use_external_data_format=(model_has_external_data or model.ByteSize() >= MODEL_SIZE_THRESHOLD),
|
||||
calibration_providers=calibration_providers,
|
||||
extra_options=extra_options,
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -99,6 +99,7 @@ class StaticQuantConfig(QuantConfig):
|
|||
per_channel=False,
|
||||
reduce_range=False,
|
||||
use_external_data_format=False,
|
||||
calibration_providers=None,
|
||||
extra_options=None,
|
||||
):
|
||||
"""
|
||||
|
|
@ -112,6 +113,8 @@ class StaticQuantConfig(QuantConfig):
|
|||
quant_format: QuantFormat{QOperator, QDQ}.
|
||||
QOperator format quantizes the model with quantized operators directly.
|
||||
QDQ format quantize the model by inserting QuantizeLinear/DeQuantizeLinear on the tensor.
|
||||
calibration_providers: Execution providers to run the session during calibration. Default is None which uses
|
||||
[ "CPUExecutionProvider" ].
|
||||
extra_options:
|
||||
key value pair dictionary for various options in different case. Current used:
|
||||
extra.Sigmoid.nnapi = True/False (Default is False)
|
||||
|
|
@ -219,6 +222,7 @@ class StaticQuantConfig(QuantConfig):
|
|||
self.calibration_data_reader = calibration_data_reader
|
||||
self.calibrate_method = calibrate_method
|
||||
self.quant_format = quant_format
|
||||
self.calibration_providers = calibration_providers
|
||||
self.extra_options = extra_options or {}
|
||||
|
||||
|
||||
|
|
@ -473,6 +477,7 @@ def quantize_static(
|
|||
nodes_to_exclude=None,
|
||||
use_external_data_format=False,
|
||||
calibrate_method=CalibrationMethod.MinMax,
|
||||
calibration_providers=None,
|
||||
extra_options=None,
|
||||
):
|
||||
"""
|
||||
|
|
@ -520,6 +525,8 @@ def quantize_static(
|
|||
List of nodes names to exclude. The nodes in this list will be excluded from quantization
|
||||
when it is not None.
|
||||
use_external_data_format: option used for large size (>2GB) model. Set to False by default.
|
||||
calibration_providers: Execution providers to run the session during calibration. Default is None which uses
|
||||
[ "CPUExecutionProvider" ]
|
||||
extra_options:
|
||||
key value pair dictionary for various options in different case. Current used:
|
||||
extra.Sigmoid.nnapi = True/False (Default is False)
|
||||
|
|
@ -697,6 +704,7 @@ def quantize_static(
|
|||
augmented_model_path=Path(quant_tmp_dir).joinpath("augmented_model.onnx").as_posix(),
|
||||
calibrate_method=calibrate_method,
|
||||
use_external_data_format=use_external_data_format,
|
||||
providers=calibration_providers,
|
||||
extra_options=calib_extra_options,
|
||||
)
|
||||
|
||||
|
|
@ -890,6 +898,7 @@ def quantize(
|
|||
per_channel=quant_config.per_channel,
|
||||
reduce_range=quant_config.reduce_range,
|
||||
use_external_data_format=quant_config.use_external_data_format,
|
||||
calibration_providers=quant_config.calibration_providers,
|
||||
extra_options=quant_config.extra_options,
|
||||
)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue