Quantization tool: Allow user to override calibrator's session EP (#23559)

### Description The quantization calibrators have `execution_providers` attributes but there is no way for a user to provide their own providers when using the `quantize` or `quantize_static` functions. This PR adds a `calibration_providers` parameter to allow users to specify the execution providers to use during calibration. It is helpful when quantizing large models which are slow to calibrate on the CPU. - Chose `calibration_providers` as the name since there is the docstrings refer to another `execution_provider` 169917b1e7/onnxruntime/python/tools/quantization/quantize.py (L204) 169917b1e7/onnxruntime/python/tools/quantization/quantize.py (L415) which are not present anywhere in the code. - Can change the name to something else if needed like calibrator_providers, and/or make it into a string instead of a providers list.
2026-05-14 20:48:00 +00:00 · 2025-02-05 22:38:21 -08:00 · 2025-02-05 22:38:21 -08:00 · d1fb58b0f2
commit d1fb58b0f2
parent 649ced4a60
3 changed files with 17 additions and 1 deletions
--- a/onnxruntime/python/tools/quantization/calibrate.py
+++ b/onnxruntime/python/tools/quantization/calibrate.py
@ -380,7 +380,7 @@ class MinMaxCalibrater(CalibraterBase):
            else:
                raise ValueError(
                    f"Unable to guess tensor type for tensor {tensor_name!r}, "
-                    f"running shape inference before quantization may resolve this issue."
+                    "running shape inference before quantization may resolve this issue."
                )

            # Include axes in reduce_op when per_channel, always keeping axis=1
@ -1177,6 +1177,7 @@ def create_calibrator(
    augmented_model_path="augmented_model.onnx",
    calibrate_method=CalibrationMethod.MinMax,
    use_external_data_format=False,
+    providers=None,
    extra_options={},  # noqa: B006
 ):
    calibrator = None
@ -1243,6 +1244,8 @@ def create_calibrator(

    if calibrator:
        calibrator.augment_graph()
+        if providers:
+            calibrator.execution_providers = providers
        calibrator.create_inference_session()
        return calibrator

--- a/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py
+++ b/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py
@ -53,6 +53,7 @@ def get_qnn_qdq_config(
    weight_symmetric: bool | None = None,
    keep_removable_activations: bool = False,
    stride: int | None = None,
+    calibration_providers: list[str] | None = None,
 ) -> StaticQuantConfig:
    """
    Returns a static quantization configuration suitable for running QDQ models on QNN EP.
@ -117,6 +118,8 @@ def get_qnn_qdq_config(
                        are automatically removed if activations are asymmetrically quantized. Keeping these activations
                        is necessary if optimizations or EP transformations will later remove
                        QuantizeLinear/DequantizeLinear operators from the model.
+        calibration_providers: Execution providers to run the session during calibration. Default is None which uses
+            [ "CPUExecutionProvider" ].

    Returns:
        A StaticQuantConfig object
@ -192,6 +195,7 @@ def get_qnn_qdq_config(
        op_types_to_quantize=list(op_types.difference(OP_TYPES_TO_EXCLUDE)),
        per_channel=per_channel,
        use_external_data_format=(model_has_external_data or model.ByteSize() >= MODEL_SIZE_THRESHOLD),
+        calibration_providers=calibration_providers,
        extra_options=extra_options,
    )

--- a/onnxruntime/python/tools/quantization/quantize.py
+++ b/onnxruntime/python/tools/quantization/quantize.py
@ -99,6 +99,7 @@ class StaticQuantConfig(QuantConfig):
        per_channel=False,
        reduce_range=False,
        use_external_data_format=False,
+        calibration_providers=None,
        extra_options=None,
    ):
        """
@ -112,6 +113,8 @@ class StaticQuantConfig(QuantConfig):
            quant_format: QuantFormat{QOperator, QDQ}.
                QOperator format quantizes the model with quantized operators directly.
                QDQ format quantize the model by inserting QuantizeLinear/DeQuantizeLinear on the tensor.
+            calibration_providers: Execution providers to run the session during calibration. Default is None which uses
+                [ "CPUExecutionProvider" ].
            extra_options:
                key value pair dictionary for various options in different case. Current used:
                    extra.Sigmoid.nnapi = True/False  (Default is False)
@ -219,6 +222,7 @@ class StaticQuantConfig(QuantConfig):
        self.calibration_data_reader = calibration_data_reader
        self.calibrate_method = calibrate_method
        self.quant_format = quant_format
+        self.calibration_providers = calibration_providers
        self.extra_options = extra_options or {}


@ -473,6 +477,7 @@ def quantize_static(
    nodes_to_exclude=None,
    use_external_data_format=False,
    calibrate_method=CalibrationMethod.MinMax,
+    calibration_providers=None,
    extra_options=None,
 ):
    """
@ -520,6 +525,8 @@ def quantize_static(
            List of nodes names to exclude. The nodes in this list will be excluded from quantization
            when it is not None.
        use_external_data_format: option used for large size (>2GB) model. Set to False by default.
+        calibration_providers: Execution providers to run the session during calibration. Default is None which uses
+            [ "CPUExecutionProvider" ]
        extra_options:
            key value pair dictionary for various options in different case. Current used:
                extra.Sigmoid.nnapi = True/False  (Default is False)
@ -697,6 +704,7 @@ def quantize_static(
            augmented_model_path=Path(quant_tmp_dir).joinpath("augmented_model.onnx").as_posix(),
            calibrate_method=calibrate_method,
            use_external_data_format=use_external_data_format,
+            providers=calibration_providers,
            extra_options=calib_extra_options,
        )

@ -890,6 +898,7 @@ def quantize(
            per_channel=quant_config.per_channel,
            reduce_range=quant_config.reduce_range,
            use_external_data_format=quant_config.use_external_data_format,
+            calibration_providers=quant_config.calibration_providers,
            extra_options=quant_config.extra_options,
        )