mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-26 22:35:43 +00:00
[Quant tool] Do not default to contrib Q/DQ ops for 16-bit (#20376)
### Description Updates the QDQ quantizer to use ONNX Q/DQ ops for 16-bit quantization if opset >= 21. ### Motivation and Context The QDQ quantizer previously set the 'com.microsoft' domain on inserted Q/DQ ops when the model needed 16-bit support. ONNX 1.16.0 added int16/uint16 support to the QuantizeLinear and DequantizeLinear operators, so we can change the default behavior.
This commit is contained in:
parent
a8f74e3ec7
commit
e4c0cb2b9a
3 changed files with 65 additions and 24 deletions
|
|
@ -165,10 +165,13 @@ def get_qnn_qdq_config(
|
|||
"WeightSymmetric": weight_symmetric,
|
||||
}
|
||||
|
||||
# TODO: Remove this extra option once ORT uses an ONNX version that supports 16-bit Q/DQ ops.
|
||||
overrides_have_int16 = any(t in Q16_TYPES for t in overrides_helper.get_quant_types())
|
||||
if activation_type in Q16_TYPES or weight_type in Q16_TYPES or overrides_have_int16:
|
||||
extra_options["UseQDQContribOps"] = True
|
||||
# ONNX opset < 21 does not support 16-bit quantization, so must use 'com.microsoft' domain
|
||||
# on Q/DQ operators if using 16-bit quantization.
|
||||
onnx_opset = next(x for x in model.opset_import if x.domain == "" or x.domain == "ai.onnx")
|
||||
if onnx_opset.version < 21:
|
||||
overrides_have_int16 = any(t in Q16_TYPES for t in overrides_helper.get_quant_types())
|
||||
if activation_type in Q16_TYPES or weight_type in Q16_TYPES or overrides_have_int16:
|
||||
extra_options["UseQDQContribOps"] = True
|
||||
|
||||
return StaticQuantConfig(
|
||||
calibration_data_reader,
|
||||
|
|
|
|||
|
|
@ -187,20 +187,22 @@ class QDQQuantizer(BaseQuantizer):
|
|||
|
||||
self.qdq_op_domain = ms_domain if extra_options.get("UseQDQContribOps", False) else None
|
||||
|
||||
# The ONNX spec does not yet support 16-bit Q/DQ ops. So, must override the Q/DQ op domain to 'com.microsoft'
|
||||
# if the activation or weight types are 16-bit integers.
|
||||
# TODO: Remove this override (and use only the 'UseQDQContribOps' option) if/when ONNX adds 16-bit support.
|
||||
int16_types = (TensorProto.UINT16, TensorProto.INT16)
|
||||
overrides_have_int16 = any(t.tensor_type in int16_types for t in self.tensor_quant_override_qtypes)
|
||||
if not self.qdq_op_domain and (
|
||||
self.activation_qType in int16_types or self.weight_qType in int16_types or overrides_have_int16
|
||||
):
|
||||
logging.warning(
|
||||
"ONNX QuantizeLinear and DequantizeLinear operators do not support 16-bit integer quantization types. "
|
||||
f"The domain of QuantizeLinear and DequantizeLinear operators will be set to '{ms_domain}' to "
|
||||
"enable support."
|
||||
)
|
||||
self.qdq_op_domain = ms_domain
|
||||
# The ONNX spec did not support 16-bit Q/DQ ops before opset 21.
|
||||
# So, may have to override the Q/DQ op domain to 'com.microsoft' if the activation or weight types
|
||||
# are 16-bit integers.
|
||||
if self.opset_version < 21:
|
||||
int16_types = (TensorProto.UINT16, TensorProto.INT16)
|
||||
overrides_have_int16 = any(t.tensor_type in int16_types for t in self.tensor_quant_override_qtypes)
|
||||
if not self.qdq_op_domain and (
|
||||
self.activation_qType in int16_types or self.weight_qType in int16_types or overrides_have_int16
|
||||
):
|
||||
logging.warning(
|
||||
"ONNX QuantizeLinear and DequantizeLinear operators do not support "
|
||||
"16-bit integer quantization types prior to opset 21. "
|
||||
f"The domain of QuantizeLinear and DequantizeLinear operators will be set to '{ms_domain}' to "
|
||||
"enable support."
|
||||
)
|
||||
self.qdq_op_domain = ms_domain
|
||||
|
||||
self.quantization_params = self.calc_graph_quant_params()
|
||||
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ class TestTensorQuantOverridesOption(unittest.TestCase):
|
|||
"OUT": (0, np.float32(0.005075461231172085)),
|
||||
}
|
||||
|
||||
def build_float32_model(self):
|
||||
def build_float32_model(self, opset=13):
|
||||
# (input)
|
||||
# |
|
||||
# Sigmoid
|
||||
|
|
@ -72,11 +72,13 @@ class TestTensorQuantOverridesOption(unittest.TestCase):
|
|||
graph = onnx.helper.make_graph(
|
||||
[sigmoid_node, conv_node], "test", [inp], [out], initializer=[wgt_init, bias_init]
|
||||
)
|
||||
model = onnx.helper.make_model(graph, opset_imports=[onnx.helper.make_opsetid("", 13)])
|
||||
model = onnx.helper.make_model(graph, opset_imports=[onnx.helper.make_opsetid("", opset)])
|
||||
onnx.save(model, "model.onnx")
|
||||
|
||||
def perform_qdq_quantization(self, output_model_name, extra_options=None, per_channel=False, activation_type=None):
|
||||
self.build_float32_model()
|
||||
def perform_qdq_quantization(
|
||||
self, output_model_name, extra_options=None, per_channel=False, activation_type=None, opset=13
|
||||
):
|
||||
self.build_float32_model(opset)
|
||||
|
||||
if activation_type is None:
|
||||
activation_type = self.default_act_qtype
|
||||
|
|
@ -428,8 +430,9 @@ class TestTensorQuantOverridesOption(unittest.TestCase):
|
|||
|
||||
def test_16bit_overrides_set_ms_domain(self):
|
||||
"""
|
||||
Test that overriding a tensor to 16bit (when default is 8bit) automatically sets the 'com.microsoft'
|
||||
domain on DQ and Q ops.
|
||||
Test that overriding a tensor to 16bit (when default is 8bit) automatically
|
||||
sets the 'com.microsoft' domain on DQ and Q ops for opset < 21.
|
||||
Before ONNX 1.16.0, we had to use the 'com.microsoft' domain to be able to use 16-bit quantization.
|
||||
"""
|
||||
qdq_model_name = "model_quant_overrides_to_16bit.onnx"
|
||||
inp_zp, _, sig_out_zp, _, _, _, _, _, out_zp, _ = self.perform_qdq_quantization(
|
||||
|
|
@ -441,6 +444,7 @@ class TestTensorQuantOverridesOption(unittest.TestCase):
|
|||
"SIG_OUT": [{"quant_type": QuantType.QUInt16}],
|
||||
}
|
||||
},
|
||||
opset=19,
|
||||
)
|
||||
|
||||
# Input and Sigmoid's output should be overridden to 16bit
|
||||
|
|
@ -456,6 +460,38 @@ class TestTensorQuantOverridesOption(unittest.TestCase):
|
|||
if node.op_type in {"QuantizeLinear", "DequantizeLinear"}:
|
||||
self.assertEqual(node.domain, ms_domain)
|
||||
|
||||
def test_16bit_overrides_not_set_ms_domain(self):
|
||||
"""
|
||||
Test that overriding a tensor to 16bit (when default is 8bit) no longer automatically
|
||||
sets the 'com.microsoft' domain on DQ and Q ops for opset >= 21.
|
||||
Before ONNX 1.16.0, we had to use the 'com.microsoft' domain to be able to use 16-bit quantization.
|
||||
"""
|
||||
qdq_model_name = "model_quant_overrides_to_16bit.onnx"
|
||||
inp_zp, _, sig_out_zp, _, _, _, _, _, out_zp, _ = self.perform_qdq_quantization(
|
||||
qdq_model_name,
|
||||
activation_type=onnx.TensorProto.UINT8, # Default to 8bit activations
|
||||
extra_options={
|
||||
"TensorQuantOverrides": {
|
||||
"INP": [{"quant_type": QuantType.QUInt16}],
|
||||
"SIG_OUT": [{"quant_type": QuantType.QUInt16}],
|
||||
}
|
||||
},
|
||||
opset=21,
|
||||
)
|
||||
|
||||
# Input and Sigmoid's output should be overridden to 16bit
|
||||
self.assertEqual(inp_zp.data_type, onnx.TensorProto.UINT16)
|
||||
self.assertEqual(sig_out_zp.data_type, onnx.TensorProto.UINT16)
|
||||
|
||||
# Output should the default uint8 type
|
||||
self.assertEqual(out_zp.data_type, onnx.TensorProto.UINT8)
|
||||
|
||||
# Q/DQ ops should all have the 'com.microsoft' domain
|
||||
qdq_model = onnx.load_model(qdq_model_name)
|
||||
for node in qdq_model.graph.node:
|
||||
if node.op_type in {"QuantizeLinear", "DequantizeLinear"}:
|
||||
self.assertNotEqual(node.domain, ms_domain)
|
||||
|
||||
def test_override_validation_nonexisting_tensor(self):
|
||||
"""
|
||||
Test that specifying a non-existing tensor should fail.
|
||||
|
|
|
|||
Loading…
Reference in a new issue