From 7df8776322bc66bda9bb1bff1502fcceb8596efc Mon Sep 17 00:00:00 2001 From: duanshengliu <44742794+duanshengliu@users.noreply.github.com> Date: Thu, 29 Aug 2024 05:29:17 +0800 Subject: [PATCH] Add overflow protection for quantization bias to reduce quantization precision loss (#21645) ### Description When the scale of the bias is too small, the quantized bias may exceed the range of `int32`, leading to significant loss of precision. Therefore, before converting quantized bias to `int32`, it needs to be clipped within the range of `int32` to reduce the loss of quantization precision. ### Motivation and Context Fix the issue https://github.com/microsoft/onnxruntime/issues/21000 --- onnxruntime/python/tools/quantization/base_quantizer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/onnxruntime/python/tools/quantization/base_quantizer.py b/onnxruntime/python/tools/quantization/base_quantizer.py index d48964203c..b20af5137d 100644 --- a/onnxruntime/python/tools/quantization/base_quantizer.py +++ b/onnxruntime/python/tools/quantization/base_quantizer.py @@ -230,7 +230,9 @@ class BaseQuantizer: # TODO: This formula should be explained including why the scale is not estimated for the bias as well. bias_scale = input_scale * weight_scale * beta - quantized_data = (np.asarray(bias_data) / bias_scale).round().astype(np.int32) + quantized_data = (np.asarray(bias_data) / bias_scale).round() + quantized_data = np.clip(quantized_data, np.iinfo(np.int32).min, np.iinfo(np.int32).max) + quantized_data = quantized_data.astype(np.int32) # update bias initializer bias_np_data = np.asarray(quantized_data, dtype=np.int32).reshape(bias_initializer.dims)