mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-14 20:48:00 +00:00
Enable ability to control whether or not to quantize the bias (#14549)
This commit is contained in:
parent
7eca42484c
commit
a5eb616819
2 changed files with 14 additions and 1 deletions
|
|
@ -97,6 +97,13 @@ class QDQQuantizer(ONNXQuantizer):
|
|||
False if "AddQDQPairToWeight" not in extra_options else extra_options["AddQDQPairToWeight"]
|
||||
)
|
||||
|
||||
# Some scenarios do not need the bias quantized. For example, in the case of Quantization Aware Training,
|
||||
# quantizing the bias is not needed. This is because in QAT, all model parameters are expected to be in
|
||||
# floating point format. To that end, we can use the FakeQuant operator for weights and activations that
|
||||
# can always have QDQ pairs (by using AddQDQPairToWeight). But for biases in a quantized model, we can't use
|
||||
# FakeQuant because it only ever appears before a DQ (since it is quantized as int32).
|
||||
self.quantize_bias = True if "QuantizeBias" not in extra_options else extra_options["QuantizeBias"]
|
||||
|
||||
# The default behavior is that multiple nodes can share a QDQ pair as their inputs.
|
||||
# In TRT, QDQ pair can’t be shared between nodes, so it will create dedicated QDQ pairs for each node.
|
||||
self.dedicated_qdq_pair = (
|
||||
|
|
@ -211,7 +218,8 @@ class QDQQuantizer(ONNXQuantizer):
|
|||
|
||||
self._quantize_normal_tensors()
|
||||
self._quantize_sharing_param_tensors()
|
||||
self._quantize_bias_tensors()
|
||||
if self.quantize_bias:
|
||||
self._quantize_bias_tensors()
|
||||
self.remove_nodes()
|
||||
if not self.add_qdq_pair_to_weight:
|
||||
self.model.clean_initializers()
|
||||
|
|
|
|||
|
|
@ -139,6 +139,11 @@ class StaticQuantConfig(QuantConfig):
|
|||
Default is 0.01. Constant smoothing factor to use when computing the moving average of the
|
||||
minimum and maximum values. Effective only when the calibration method selected is MinMax and
|
||||
when CalibMovingAverage is set to True.
|
||||
QuantizeBias = True/False :
|
||||
Default is True which quantizes floating-point biases and it solely inserts
|
||||
a DeQuantizeLinear node. If False, it remains floating-point bias and does not insert
|
||||
any quantization nodes associated with biases.
|
||||
This extra option is only effective when quant_format is QuantFormat.QDQ.
|
||||
execution_provider : A enum indicates the Execution Provider such as: CPU, TRT, NNAPI, SNE, etc.
|
||||
Raises:
|
||||
ValueError: Raise ValueError if execution provider is unknown
|
||||
|
|
|
|||
Loading…
Reference in a new issue