MaxPool versioning in quantization tools. (#5194)

MaxPool versioning in quantization tools.
2026-06-07 00:13:17 +00:00 · 2020-09-16 22:52:24 -07:00 · 2020-09-16 22:52:24 -07:00 · 498483b464
commit 498483b464
parent 39a7f96a44
2 changed files with 8 additions and 2 deletions
--- a/onnxruntime/python/tools/quantization/onnx_quantizer.py
+++ b/onnxruntime/python/tools/quantization/onnx_quantizer.py
@ -93,7 +93,7 @@ class ONNXQuantizer:
        self.op_types_to_quantize = op_types_to_quantize
        self.new_nodes = []

-        self.check_opset_version()
+        self.opset_version = self.check_opset_version()

        if not self.mode in quantization_modes:
            raise ValueError('unsupported quantization mode {}'.format(self.mode))
@ -124,7 +124,7 @@ class ONNXQuantizer:
            print(
                "Warning: The original model opset version is {}, which does not support node fusions. Please update the model to opset >= 11 for better performance."
                .format(opset_version))
-            return
+            return 10

        if opset_version < 10:
            print(
@ -132,8 +132,10 @@ class ONNXQuantizer:
                .format(opset_version))
            self.model.model.opset_import.remove(ai_onnx_domain[0])
            self.model.model.opset_import.extend([onnx.helper.make_opsetid("", 11)])
+            opset_version = 11
        
        self.fuse_dynamic_quant = True
+        return opset_version

    def replace_gemm_with_matmul(self):
        nodes_to_remove = []
--- a/onnxruntime/python/tools/quantization/operators/maxpool.py
+++ b/onnxruntime/python/tools/quantization/operators/maxpool.py
@ -12,6 +12,10 @@ class QMaxPool(QuantOperatorBase):
        node = self.node
        assert (node.op_type == "MaxPool")

+        if self.quantizer.opset_version < 12:
+            super().quantize()
+            return
+
        # When mode is QLinearOps, the output quantization params are calculated based on outputs from
        # activation nodes, therefore these nodes can be removed from the graph if they follow a quantized op.
        # If input to this node is not quantized then keep this node