From b000ec91cc3141a43ba7a136a8ce1b2bc82e6341 Mon Sep 17 00:00:00 2001
From: Zhang Lei <zhang.huanning@hotmail.com>
Date: Fri, 10 Dec 2021 16:40:01 -0800
Subject: [PATCH] Add quantization tool and its unittest with s8s8 support
 (#10007)

* Add quantization tool with s8s8 support
  * Add unittest for existing s8s8 support operators
  * Comment ready unittest for upcomming s8s8 operator (ConvInteger, and Resize)
  * Minor change on quantization tools

* Use different s8 min value upon weight or activation.

* use same qmin for reduce ranged s8.
---
 .../tools/quantization/operators/concat.py    |  3 +-
 .../python/tools/quantization/quant_utils.py  | 13 ++-
 .../test/python/quantization/op_test_utils.py | 26 ++++-
 .../python/quantization/test_conv_dynamic.py  | 45 ++++++---
 .../python/quantization/test_op_concat.py     | 47 ++++++---
 .../python/quantization/test_op_gavgpool.py   | 48 +++++----
 .../test/python/quantization/test_op_gemm.py  | 98 ++++++++++++-------
 .../python/quantization/test_op_maxpool.py    | 53 ++++++----
 .../test/python/quantization/test_op_pad.py   | 76 +++++++++++---
 .../python/quantization/test_op_pooling.py    | 46 ++++++---
 .../python/quantization/test_op_reshape.py    | 30 ++++--
 .../python/quantization/test_op_resize.py     | 33 +++++--
 .../quantization/test_op_squeeze_unsqueeze.py | 43 +++++---
 .../python/quantization/test_op_transpose.py  | 31 ++++--
 .../test/python/quantization/test_qdq.py      |  2 +-
 15 files changed, 418 insertions(+), 176 deletions(-)

diff --git a/onnxruntime/python/tools/quantization/operators/concat.py b/onnxruntime/python/tools/quantization/operators/concat.py
index ada842c7b6..76c05828e5 100644
--- a/onnxruntime/python/tools/quantization/operators/concat.py
+++ b/onnxruntime/python/tools/quantization/operators/concat.py
@@ -12,7 +12,8 @@ class QLinearConcat(QuantOperatorBase):
 
         data_found, output_scale_name, output_zp_name, _, _ = \
             self.quantizer._get_quantization_params(node.output[0])
-        (q_input_names, zero_point_names, scale_names, nodes) = self.quantizer.quantize_inputs(node, [*range(0, len(node.input))])
+        (q_input_names, zero_point_names, scale_names, nodes) = \
+            self.quantizer.quantize_inputs(node, [*range(0, len(node.input))], initializer_use_weight_qType=False)
         if not data_found or q_input_names is None:
             return super().quantize()
 
diff --git a/onnxruntime/python/tools/quantization/quant_utils.py b/onnxruntime/python/tools/quantization/quant_utils.py
index e57160275f..73dd7fdbf7 100644
--- a/onnxruntime/python/tools/quantization/quant_utils.py
+++ b/onnxruntime/python/tools/quantization/quant_utils.py
@@ -176,7 +176,7 @@ def quantize_data(data, qType, symmetric, reduce_range=False):
     if len(data):
         rmin = min(data)
         rmax = max(data)
-        qmin, qmax = get_qmin_qmax_for_qType(qType, reduce_range)
+        qmin, qmax = get_qmin_qmax_for_qType(qType, reduce_range, for_weight=True)
 
         zero_point, scale = compute_scale_zp(rmin, rmax, qmin, qmax, symmetric)
 
@@ -184,7 +184,7 @@ def quantize_data(data, qType, symmetric, reduce_range=False):
 
     return rmin, rmax, zero_point, scale, quantized_data
 
-def get_qmin_qmax_for_qType(qType, reduce_range=False):
+def get_qmin_qmax_for_qType(qType, reduce_range=False, for_weight=False):
     '''
     Return qmin and qmax, the minimum and maximum value representable by the given qType
     :parameter qType: onnx.onnx_pb.TensorProto.UINT8 or onnx.onnx_pb.TensorProto.UINT8
@@ -193,18 +193,21 @@ def get_qmin_qmax_for_qType(qType, reduce_range=False):
     if qType == onnx_proto.TensorProto.UINT8:
         (qmin, qmax) = (0,127) if reduce_range else (0,255)
     elif qType == onnx_proto.TensorProto.INT8:
-        (qmin, qmax) = (-64,64) if reduce_range else (-127,127)
+        if for_weight:
+            (qmin, qmax) = (-64,64) if reduce_range else (-127,127)
+        else:
+            (qmin, qmax) = (-64,64) if reduce_range else (-128,127)
     else:
         raise ValueError("Unexpected data type {} requested. Only INT8 and UINT8 are supported.".format(qType))
     return qmin, qmax
 
-def get_qrange_for_qType(qType, reduce_range=False):
+def get_qrange_for_qType(qType, reduce_range=False, for_weight=False):
     '''
     Helper function to get the quantization range for a type.
         parameter qType: quantization type.
         return: quantization range.
     '''
-    qmin, qmax = get_qmin_qmax_for_qType(qType, reduce_range)
+    qmin, qmax = get_qmin_qmax_for_qType(qType, reduce_range, for_weight=for_weight)
     return  qmax - qmin
 
 class QuantizedInitializer:
diff --git a/onnxruntime/test/python/quantization/op_test_utils.py b/onnxruntime/test/python/quantization/op_test_utils.py
index a5aaae027c..a3f0192289 100644
--- a/onnxruntime/test/python/quantization/op_test_utils.py
+++ b/onnxruntime/test/python/quantization/op_test_utils.py
@@ -34,7 +34,6 @@ def InputFeedsNegOneZeroOne(n, name2shape):
     dr = TestDataFeeds(input_data_list)
     return dr
 
-
 def check_op_type_order(testcase, model_to_check, ops):
     if isinstance(model_to_check, string_types):
         model = onnx.load(model_to_check)
@@ -78,3 +77,28 @@ def check_op_nodes(testcase, model_path, node_checker):
     model = onnx.load(Path(model_path))
     for node in model.graph.node:
         testcase.assertTrue(node_checker(node))
+
+def check_qtype_by_node_type(testcase, model_to_check, check_list):
+    if isinstance(model_to_check, string_types):
+        model = onnx.load(model_to_check)
+    elif isinstance(model_to_check, onnx.ModelProto):
+        model = model_to_check
+    model = onnx.shape_inference.infer_shapes(model)
+    value_infos = {vi.name: vi for vi in model.graph.value_info}
+    value_infos.update({ot.name: ot for ot in model.graph.output})
+    value_infos.update({it.name: it for it in model.graph.input})
+    initializers = {init.name : init for init in model.graph.initializer}
+
+    for node in model.graph.node:
+        if node.op_type in check_list:
+            input_output_check_list = check_list[node.op_type]
+            for check_item in input_output_check_list:
+                tensor_name = node.input[check_item[1]] if check_item[0] == 'i' else node.output[check_item[1]]
+                testcase.assertTrue((tensor_name in value_infos) or (tensor_name in initializers))
+                if tensor_name in value_infos:
+                    vi = value_infos[tensor_name]
+                    testcase.assertTrue(vi.type.HasField('tensor_type'))
+                    testcase.assertTrue(vi.type.tensor_type.elem_type == check_item[2])
+                else: #if (tensor_name in initializers):
+                    init = initializers[tensor_name]
+                    testcase.assertTrue(init.data_type == check_item[2])
diff --git a/onnxruntime/test/python/quantization/test_conv_dynamic.py b/onnxruntime/test/python/quantization/test_conv_dynamic.py
index a95ac9b50f..6a54081979 100644
--- a/onnxruntime/test/python/quantization/test_conv_dynamic.py
+++ b/onnxruntime/test/python/quantization/test_conv_dynamic.py
@@ -11,17 +11,18 @@ import onnx
 import onnxruntime
 import numpy as np
 from onnx import helper, TensorProto, numpy_helper
-from onnxruntime.quantization import quantize_dynamic
-from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_type_order
+from onnxruntime.quantization import quantize_dynamic, QuantType
+from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_type_order, check_qtype_by_node_type
 
 
 def generate_input_initializer(tensor_shape, tensor_dtype, input_name):
-  '''
-  Helper function to generate initializers for test inputs
-  '''
-  tensor = np.random.normal(0, 0.3, tensor_shape).astype(tensor_dtype)
-  init = numpy_helper.from_array(tensor, input_name)
-  return init
+    '''
+    Helper function to generate initializers for test inputs
+    '''
+    tensor = np.random.normal(0, 0.3, tensor_shape).astype(tensor_dtype)
+    init = numpy_helper.from_array(tensor, input_name)
+    return init
+
 
 class TestONNXModel(unittest.TestCase):
     def construct_model(self, model_path):
@@ -52,19 +53,31 @@ class TestONNXModel(unittest.TestCase):
         model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
         onnx.save(model, model_path)
 
-    def dynamic_quant_conv(self, model_fp32_path, model_int8_path):
-        quantize_dynamic(model_fp32_path, model_int8_path)
-        quant_nodes = {'ConvInteger' : 2}
+    def dynamic_quant_conv_test(self, activation_type, weight_type, extra_options={}):
+        np.random.seed(1)
+        model_fp32_path = 'conv_bias.fp32.onnx'
+        self.construct_model(model_fp32_path)
+
+        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
+        activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8'
+        weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
+        model_int8_path = 'conv_bias.quant.{}{}.onnx'.format(activation_type_str, weight_type_str)
+
+        quantize_dynamic(model_fp32_path, model_int8_path,
+                         activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
+        quant_nodes = {'ConvInteger': 2}
         check_op_type_count(self, model_int8_path, **quant_nodes)
+        qnode_io_qtypes = {'ConvInteger': [['i', 2, activation_proto_qtype]]}
+        check_qtype_by_node_type(self, model_int8_path, qnode_io_qtypes)
         check_model_correctness(self, model_fp32_path, model_int8_path, {'input': np.random.rand(4, 2, 8, 8).astype(np.float32)})
 
     def test_quant_conv(self):
-        np.random.seed(1)
-        model_fp32_path = 'conv_bias.fp32.onnx'
-        model_int8_path = 'conv_bias.quant.onnx'
-        self.construct_model(model_fp32_path)
+        self.dynamic_quant_conv_test(QuantType.QUInt8, QuantType.QUInt8, extra_options={})
+
+    # TODO: uncomment following after ConvInteger s8 supportted
+    # def test_quant_conv_s8s8(self):
+    #    self.dynamic_quant_conv_test(QuantType.QInt8, QuantType.QInt8, extra_options={'ActivationSymmetric': True})
 
-        self.dynamic_quant_conv(model_fp32_path, model_int8_path)
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/onnxruntime/test/python/quantization/test_op_concat.py b/onnxruntime/test/python/quantization/test_op_concat.py
index f7952d4c50..01291df157 100644
--- a/onnxruntime/test/python/quantization/test_op_concat.py
+++ b/onnxruntime/test/python/quantization/test_op_concat.py
@@ -7,8 +7,8 @@
 import unittest
 import numpy as np
 from onnx import helper, TensorProto, numpy_helper, save
-from onnxruntime.quantization import quantize_static, QuantFormat
-from op_test_utils import InputFeedsNegOneZeroOne, check_model_correctness, check_op_type_count
+from onnxruntime.quantization import quantize_static, QuantFormat, QuantType
+from op_test_utils import InputFeedsNegOneZeroOne, check_model_correctness, check_op_type_count, check_qtype_by_node_type
 
 
 class TestONNXModel(unittest.TestCase):
@@ -47,7 +47,7 @@ class TestONNXModel(unittest.TestCase):
         conv3_node = helper.make_node('Conv', ['input', 'conv3_weight'], ['conv3_output'], name='conv3_node')
 
         concat_node = helper.make_node('Concat', ['conv1_output', 'conv2_output', 'conv3_output'], [
-                                            'concat_output'], name='concat_node', axis=1)
+            'concat_output'], name='concat_node', axis=1)
 
         identity_node = helper.make_node('Identity', ['concat_output'], ['output'], name='identity_node')
 
@@ -57,31 +57,48 @@ class TestONNXModel(unittest.TestCase):
         model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
         save(model, model_path)
 
-    def test_quantize_concat(self):
+    def quantize_concat_test(self, activation_type, weight_type, extra_options={}):
         np.random.seed(1)
-
         model_fp32_path = 'concat_fp32.onnx'
-        model_uint8_path = 'concat_uint8.onnx'
-        model_uint8_qdq_path = 'concat_uint8_qdq.onnx'
-
         self.construct_model(model_fp32_path)
+        data_reader = InputFeedsNegOneZeroOne(1, {'input': [1, 3, 15, 15]})
+
+        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
+        activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8'
+        weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
+        model_q8_path = 'concat_{}{}.onnx'.format(activation_type_str, weight_type_str)
+        model_q8_qdq_path = 'concat_{}{}_qdq.onnx'.format(activation_type_str, weight_type_str)
 
         # Verify QOperator mode
-        data_reader = InputFeedsNegOneZeroOne(1, {'input': [1, 3, 15, 15]})
-        quantize_static(model_fp32_path, model_uint8_path, data_reader)
+        data_reader.rewind()
+        quantize_static(model_fp32_path, model_q8_path, data_reader,
+                        activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
 
         qnode_counts = {'QLinearConv': 3, 'QuantizeLinear': 1, 'DequantizeLinear': 1, 'QLinearConcat': 1}
-        check_op_type_count(self, model_uint8_path, **qnode_counts)
+        check_op_type_count(self, model_q8_path, **qnode_counts)
+        qnode_io_qtypes = {'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
+        qnode_io_qtypes.update({'QLinearConcat': [['i', 1, activation_proto_qtype], [
+                               'i', 4, activation_proto_qtype], ['i', 7, activation_proto_qtype]]})
+        check_qtype_by_node_type(self, model_q8_path, qnode_io_qtypes)
         data_reader.rewind()
-        check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next())
+        check_model_correctness(self, model_fp32_path, model_q8_path, data_reader.get_next())
 
         # Verify QDQ mode
         data_reader.rewind()
-        quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ)
+        quantize_static(model_fp32_path, model_q8_qdq_path, data_reader, quant_format=QuantFormat.QDQ,
+                        activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
         qdqnode_counts = {'Conv': 3, 'QuantizeLinear': 5, 'DequantizeLinear': 8, 'Concat': 1}
-        check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
+        check_op_type_count(self, model_q8_qdq_path, **qdqnode_counts)
+        qnode_io_qtypes = {'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
+        check_qtype_by_node_type(self, model_q8_qdq_path, qnode_io_qtypes)
         data_reader.rewind()
-        check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())
+        check_model_correctness(self, model_fp32_path, model_q8_qdq_path, data_reader.get_next())
+
+    def test_quantize_concat(self):
+        self.quantize_concat_test(QuantType.QUInt8, QuantType.QUInt8, extra_options={})
+
+    def test_quantize_concat_s8s8(self):
+        self.quantize_concat_test(QuantType.QInt8, QuantType.QInt8, extra_options={'ActivationSymmetric': True})
 
 
 if __name__ == '__main__':
diff --git a/onnxruntime/test/python/quantization/test_op_gavgpool.py b/onnxruntime/test/python/quantization/test_op_gavgpool.py
index 7e89758c99..abf739aa33 100644
--- a/onnxruntime/test/python/quantization/test_op_gavgpool.py
+++ b/onnxruntime/test/python/quantization/test_op_gavgpool.py
@@ -10,8 +10,8 @@ import unittest
 import onnx
 import numpy as np
 from onnx import helper, TensorProto
-from onnxruntime.quantization import quantize_static, quantize_dynamic
-from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count
+from onnxruntime.quantization import quantize_static, quantize_dynamic, QuantType
+from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_qtype_by_node_type
 
 
 class TestOpGlobalAveragePool(unittest.TestCase):
@@ -44,10 +44,10 @@ class TestOpGlobalAveragePool(unittest.TestCase):
         output_name = 'output'
         initializers = []
 
-        #make 1st GlobalAveragePool node
+        # make 1st GlobalAveragePool node
         gavgpool_node_1 = onnx.helper.make_node('GlobalAveragePool', [input_name], [expand_input])
 
-        #make Expand node
+        # make Expand node
         expand_shape_name = 'expand_shape'
         initializers.append(onnx.numpy_helper.from_array(np.array(input_shape, dtype=np.int64), name=expand_shape_name))
         expand_node = onnx.helper.make_node('Expand', [expand_input, expand_shape_name], [conv_input])
@@ -59,7 +59,7 @@ class TestOpGlobalAveragePool(unittest.TestCase):
         initializers.append(onnx.numpy_helper.from_array(conv_weight_data, name=weight_name))
         conv_node = onnx.helper.make_node('Conv', [conv_input, weight_name], [gavgpool_input_2nd], name=conv_name)
 
-        #make 1st GlobalAveragePool node
+        # make 1st GlobalAveragePool node
         gavgpool_node_2 = onnx.helper.make_node('GlobalAveragePool', [gavgpool_input_2nd], [output_name])
 
         # make graph
@@ -69,30 +69,42 @@ class TestOpGlobalAveragePool(unittest.TestCase):
         graph = helper.make_graph([gavgpool_node_1, expand_node, conv_node, gavgpool_node_2], graph_name,
                                   [input_tensor], [output_tensor], initializer=initializers)
         model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
-        model.ir_version = 7 # use stable onnx ir version
+        model.ir_version = 7  # use stable onnx ir version
 
         onnx.save(model, output_model_path)
 
-    def test_quantize_reshape(self):
+    def quantize_gavgpool_test(self, activation_type, weight_type, extra_options={}):
         np.random.seed(1)
         model_fp32_path = 'gavg_pool_fp32.onnx'
-        model_int8_path = 'gavg_pool_fp32.quant.onnx'
         data_reader = self.input_feeds(1, {'input': [1, 8, 33, 33]})
         self.construct_model_gavgpool(model_fp32_path,
                                       [1, 8, 33, 33],
                                       [16, 8, 3, 3],
                                       [1, 16, 1, 1])
-        quantize_static(model_fp32_path,
-                        model_int8_path,
-                        data_reader)
+
+        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
+        activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8'
+        weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
+        model_q8_path = 'gavg_pool_{}{}.onnx'.format(activation_type_str, weight_type_str)
+
         data_reader.rewind()
-        quant_nodes = {'QLinearConv' : 1,
-                       'GlobalAveragePool' : 1,
-                       'QLinearGlobalAveragePool' : 1,
-                       'QuantizeLinear' : 1,
-                       'DequantizeLinear' : 1}
-        check_op_type_count(self, model_int8_path, **quant_nodes)
-        check_model_correctness(self, model_fp32_path, model_int8_path, data_reader.get_next())
+        quantize_static(model_fp32_path, model_q8_path, data_reader,
+                        activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
+
+        quant_nodes = {'QLinearConv': 1, 'GlobalAveragePool': 1, 'QLinearGlobalAveragePool': 1,
+                       'QuantizeLinear': 1, 'DequantizeLinear': 1}
+        check_op_type_count(self, model_q8_path, **quant_nodes)
+        qnode_io_qtypes = {'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
+        qnode_io_qtypes.update({'QLinearGlobalAveragePool': [['i', 2, activation_proto_qtype], ['i', 4, activation_proto_qtype]]})
+        check_qtype_by_node_type(self, model_q8_path, qnode_io_qtypes)
+        data_reader.rewind()
+        check_model_correctness(self, model_fp32_path, model_q8_path, data_reader.get_next())
+
+    def test_quantize_gavgpool(self):
+        self.quantize_gavgpool_test(QuantType.QUInt8, QuantType.QUInt8, extra_options={})
+
+    def test_quantize_gavgpool_s8s8(self):
+        self.quantize_gavgpool_test(QuantType.QInt8, QuantType.QInt8, extra_options={'ActivationSymmetric': True})
 
 
 if __name__ == '__main__':
diff --git a/onnxruntime/test/python/quantization/test_op_gemm.py b/onnxruntime/test/python/quantization/test_op_gemm.py
index e41ee633e2..cf61402fa5 100644
--- a/onnxruntime/test/python/quantization/test_op_gemm.py
+++ b/onnxruntime/test/python/quantization/test_op_gemm.py
@@ -10,8 +10,8 @@ import unittest
 import onnx
 import numpy as np
 from onnx import helper, TensorProto
-from onnxruntime.quantization import quantize_static, quantize_dynamic, QuantFormat
-from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count
+from onnxruntime.quantization import quantize_static, quantize_dynamic, QuantFormat, QuantType
+from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_qtype_by_node_type
 
 
 class TestOpGEMM(unittest.TestCase):
@@ -46,12 +46,12 @@ class TestOpGEMM(unittest.TestCase):
             bias_data = np.random.normal(0, 0.1, bias_shape).astype(np.float32)
             initializers.append(onnx.numpy_helper.from_array(bias_data, name=bias_name))
 
-            return onnx.helper.make_node('Gemm', [input_name, weight_name, bias_name], [output_name], alpha=1.0, beta=1.0, transB = 1)
+            return onnx.helper.make_node('Gemm', [input_name, weight_name, bias_name], [output_name], alpha=1.0, beta=1.0, transB=1)
         # make gemm1 node
         gemm1_output_name = "gemm1_output"
         gemm1_node = make_gemm(input_name, [100, 10], 'linear1.weight', [100], 'linear1.bias', gemm1_output_name)
 
-        #make Clip
+        # make Clip
         clip_min_name = 'clip_min'
         clip_max_name = 'clip_max'
         clip_output_name = 'clip_output'
@@ -71,7 +71,7 @@ class TestOpGEMM(unittest.TestCase):
         graph = helper.make_graph([gemm1_node, clip_node, gemm2_node], graph_name,
                                   [input_tensor], [output_tensor], initializer=initializers)
         model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
-        model.ir_version = 7 # use stable onnx ir version
+        model.ir_version = 7  # use stable onnx ir version
 
         onnx.save(model, output_model_path)
 
@@ -121,55 +121,82 @@ class TestOpGEMM(unittest.TestCase):
 
         onnx.save(model, output_model_path)
 
-    def static_quant_test(self, model_fp32_path, model_int8_path):
-        data_reader = self.input_feeds(1, {'input': [5, 10]})
-        quantize_static(model_fp32_path,
-                        model_int8_path,
-                        data_reader)
+    def static_quant_test(self, model_fp32_path, data_reader, activation_type, weight_type, extra_options={}):
+        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
+        activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8'
+        weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
+        model_int8_path = 'gemm_fp32.quant_{}{}.onnx'.format(activation_type_str, weight_type_str)
+
         data_reader.rewind()
-        quant_nodes = {'QLinearMatMul' : 2,
-                       'QLinearAdd' : 2,
-                       'QuantizeLinear' : 1,
-                       'DequantizeLinear' : 1}
+        quantize_static(model_fp32_path, model_int8_path, data_reader,
+                        activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
+        quant_nodes = {'QLinearMatMul': 2, 'QLinearAdd': 2, 'QuantizeLinear': 1, 'DequantizeLinear': 1}
         check_op_type_count(self, model_int8_path, **quant_nodes)
+        qnode_io_qtypes = {'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
+        qnode_io_qtypes.update({'DequantizeLinear': [['i', 2, activation_proto_qtype]]})
+        check_qtype_by_node_type(self, model_int8_path, qnode_io_qtypes)
+        data_reader.rewind()
         check_model_correctness(self, model_fp32_path, model_int8_path, data_reader.get_next())
 
-    def static_quant_test_qdq(self, model_fp32_path, model_int8_path):
-        data_reader = self.input_feeds(1, {'input': [5, 10]})
-        quantize_static(model_fp32_path,
-                        model_int8_path,
-                        data_reader,
-                        quant_format=QuantFormat.QDQ)
+    def static_quant_test_qdq(self, model_fp32_path, data_reader, activation_type, weight_type, extra_options={}):
+        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
+        activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8'
+        weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
+        model_int8_path = 'gemm_fp32.quant_dqd_{}{}.onnx'.format(activation_type_str, weight_type_str)
+
         data_reader.rewind()
-        quant_nodes = {'MatMul' : 2,
-                       'Add' : 2,
-                       'QuantizeLinear' : 5,
-                       'DequantizeLinear' : 9}
+        quantize_static(model_fp32_path, model_int8_path, data_reader, quant_format=QuantFormat.QDQ,
+                        activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
+        quant_nodes = {'MatMul': 2, 'Add': 2, 'QuantizeLinear': 5, 'DequantizeLinear': 9}
         check_op_type_count(self, model_int8_path, **quant_nodes)
+        qnode_io_qtypes = {'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
+        check_qtype_by_node_type(self, model_int8_path, qnode_io_qtypes)
+        data_reader.rewind()
         check_model_correctness(self, model_fp32_path, model_int8_path, data_reader.get_next())
 
+    def dynamic_quant_test(self, model_fp32_path, data_reader, activation_type, weight_type, extra_options={}):
+        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
+        activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8'
+        weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
+        model_int8_path = 'gemm_fp32.quant_dynamic_{}{}.onnx'.format(activation_type_str, weight_type_str)
 
-    def dynamic_quant_test(self, model_fp32_path, model_int8_path):
-        quantize_dynamic(model_fp32_path, model_int8_path)
-        quant_nodes = {'MatMulInteger' : 2}
+        quantize_dynamic(model_fp32_path, model_int8_path,
+                         activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
+        quant_nodes = {'MatMulInteger': 2}
         check_op_type_count(self, model_int8_path, **quant_nodes)
-        check_model_correctness(self, model_fp32_path, model_int8_path, {'input': np.random.rand(5,10).astype(np.float32)})
+        qnode_io_qtypes = {'MatMulInteger': [['i', 2, activation_proto_qtype]]}
+        check_qtype_by_node_type(self, model_int8_path, qnode_io_qtypes)
+        data_reader.rewind()
+        check_model_correctness(self, model_fp32_path, model_int8_path, {'input': np.random.rand(5, 10).astype(np.float32)})
 
     def dynamic_attention_quant_test(self, model_fp32_path, model_int8_path, per_channel, reduce_range):
         quantize_dynamic(model_fp32_path, model_int8_path, per_channel=per_channel, reduce_range=reduce_range)
-        quant_nodes = {'QAttention' : 1, 'MatMulInteger' : 1}
+        quant_nodes = {'QAttention': 1, 'MatMulInteger': 1}
         check_op_type_count(self, model_int8_path, **quant_nodes)
-        check_model_correctness(self, model_fp32_path, model_int8_path, {'input': np.random.rand(1,5,10).astype(np.float32)})
+        check_model_correctness(self, model_fp32_path, model_int8_path, {'input': np.random.rand(1, 5, 10).astype(np.float32)})
 
-    def test_quantize_reshape(self):
+    def test_quantize_gemm(self):
         np.random.seed(1)
         model_fp32_path = 'gemm_fp32.onnx'
-        model_int8_path = 'gemm_fp32.quant.onnx'
         self.construct_model_gemm(model_fp32_path)
+        data_reader = self.input_feeds(1, {'input': [5, 10]})
 
-        self.static_quant_test(model_fp32_path, model_int8_path)
-        self.static_quant_test_qdq(model_fp32_path, model_int8_path)
-        self.dynamic_quant_test(model_fp32_path, model_int8_path)
+        self.static_quant_test(model_fp32_path, data_reader, activation_type=QuantType.QUInt8, weight_type=QuantType.QUInt8)
+        self.static_quant_test_qdq(model_fp32_path, data_reader, activation_type=QuantType.QUInt8, weight_type=QuantType.QUInt8)
+        self.dynamic_quant_test(model_fp32_path, data_reader, activation_type=QuantType.QUInt8, weight_type=QuantType.QUInt8)
+
+    def test_quantize_gemm_s8s8(self):
+        np.random.seed(1)
+        model_fp32_path = 'gemm_fp32.onnx'
+        self.construct_model_gemm(model_fp32_path)
+        data_reader = self.input_feeds(1, {'input': [5, 10]})
+
+        self.static_quant_test(model_fp32_path, data_reader, activation_type=QuantType.QInt8, weight_type=QuantType.QInt8,
+                               extra_options={'ActivationSymmetric': True})
+        self.static_quant_test_qdq(model_fp32_path, data_reader, activation_type=QuantType.QInt8, weight_type=QuantType.QInt8,
+                                   extra_options={'ActivationSymmetric': True})
+        self.dynamic_quant_test(model_fp32_path, data_reader, activation_type=QuantType.QInt8, weight_type=QuantType.QInt8,
+                                extra_options={'ActivationSymmetric': True})
 
     def test_quantize_attention(self):
         np.random.seed(1)
@@ -182,5 +209,6 @@ class TestOpGEMM(unittest.TestCase):
         self.dynamic_attention_quant_test(model_fp32_path, model_int8_path, False, True)
         self.dynamic_attention_quant_test(model_fp32_path, model_int8_path, False, False)
 
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/onnxruntime/test/python/quantization/test_op_maxpool.py b/onnxruntime/test/python/quantization/test_op_maxpool.py
index 9a891f10b9..f474c9a547 100644
--- a/onnxruntime/test/python/quantization/test_op_maxpool.py
+++ b/onnxruntime/test/python/quantization/test_op_maxpool.py
@@ -10,8 +10,8 @@ import unittest
 import onnx
 import numpy as np
 from onnx import helper, TensorProto
-from onnxruntime.quantization import quantize_static, QuantFormat
-from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_nodes
+from onnxruntime.quantization import quantize_static, QuantFormat, QuantType
+from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_nodes, check_qtype_by_node_type
 
 
 class TestOpMaxPool(unittest.TestCase):
@@ -54,40 +54,55 @@ class TestOpMaxPool(unittest.TestCase):
         graph = helper.make_graph([conv_node, identity_node, maxpool_node], 'TestOpQuantizerMaxPool_test_model',
                                   [input_tensor], [identity_out, output_tensor], initializer=initializers)
         model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 14)])
-        model.ir_version = 7 # use stable onnx ir version
+        model.ir_version = 7  # use stable onnx ir version
         onnx.save(model, output_model_path)
 
-    def test_quantize_maxpool(self):
+    def quantize_maxpool_test(self, activation_type, weight_type, extra_options={}):
         np.random.seed(1)
-
         model_fp32_path = 'maxpool_fp32.onnx'
-        model_uint8_path = 'maxpool_uint8.onnx'
-        model_uint8_qdq_path = 'maxpool_uint8_qdq.onnx'
-
         self.construct_model_conv_maxpool(model_fp32_path,
                                           [1, 2, 26, 42], [3, 2, 3, 3],
                                           [1, 3, 24, 40], {'kernel_shape': [3, 3]},
                                           [1, 3, 22, 38])
+        data_reader = self.input_feeds(1, {'input': [1, 2, 26, 42]})
+
+        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
+        activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8'
+        weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
+        model_q8_path = 'maxpool_{}{}.onnx'.format(activation_type_str, weight_type_str)
+        model_q8_qdq_path = 'maxpool_dqd_{}{}.onnx'.format(activation_type_str, weight_type_str)
 
         # Verify QOperator mode
-        data_reader = self.input_feeds(1, {'input': [1, 2, 26, 42]})
-        quantize_static(model_fp32_path, model_uint8_path, data_reader)
-
-        # make sure maxpool become xint8 operator, its input name could tell that
-        check_op_nodes(self, model_uint8_path, lambda node: (node.name != "maxpool_node" or node.input[0] != 'conv_output'))
-        qnode_counts = {'QLinearConv': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 2, 'MaxPool': 1}
-        check_op_type_count(self, model_uint8_path, **qnode_counts)
         data_reader.rewind()
-        check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next())
+        quantize_static(model_fp32_path, model_q8_path, data_reader,
+                        activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
+        # make sure maxpool become xint8 operator, its input name could tell that
+        check_op_nodes(self, model_q8_path, lambda node: (node.name != "maxpool_node" or node.input[0] != 'conv_output'))
+        qnode_counts = {'QLinearConv': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 2, 'MaxPool': 1}
+        check_op_type_count(self, model_q8_path, **qnode_counts)
+        qnode_io_qtypes = {'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
+        qnode_io_qtypes.update({'DequantizeLinear': [['i', 2, activation_proto_qtype]]})
+        check_qtype_by_node_type(self, model_q8_path, qnode_io_qtypes)
+        data_reader.rewind()
+        check_model_correctness(self, model_fp32_path, model_q8_path, data_reader.get_next())
 
         # Verify QDQ mode
         data_reader.rewind()
-        quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ)
+        quantize_static(model_fp32_path, model_q8_qdq_path, data_reader, quant_format=QuantFormat.QDQ,
+                        activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
         qdqnode_counts = {'Conv': 1, 'QuantizeLinear': 3, 'DequantizeLinear': 4, 'MaxPool': 1}
-        check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
+        check_op_type_count(self, model_q8_qdq_path, **qdqnode_counts)
+        qnode_io_qtypes = {'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
+        qnode_io_qtypes.update({'DequantizeLinear': [['i', 2, activation_proto_qtype]]})
+        check_qtype_by_node_type(self, model_q8_qdq_path, qnode_io_qtypes)
         data_reader.rewind()
-        check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())
+        check_model_correctness(self, model_fp32_path, model_q8_qdq_path, data_reader.get_next())
 
+    def test_quantize_maxpool(self):
+        self.quantize_maxpool_test(QuantType.QUInt8, QuantType.QUInt8, extra_options={ })
+
+    def test_quantize_maxpool_s8s8(self):
+        self.quantize_maxpool_test(QuantType.QInt8, QuantType.QInt8, extra_options={'ActivationSymmetric': True})
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/onnxruntime/test/python/quantization/test_op_pad.py b/onnxruntime/test/python/quantization/test_op_pad.py
index f83d97757e..b34996a9b8 100644
--- a/onnxruntime/test/python/quantization/test_op_pad.py
+++ b/onnxruntime/test/python/quantization/test_op_pad.py
@@ -10,8 +10,8 @@ import unittest
 import onnx
 import numpy as np
 from onnx import helper, TensorProto
-from onnxruntime.quantization import quantize_static, quantize_dynamic
-from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count
+from onnxruntime.quantization import quantize_static, quantize_dynamic, QuantType
+from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_qtype_by_node_type
 
 
 class TestOpQuatizerPad(unittest.TestCase):
@@ -51,7 +51,7 @@ class TestOpQuatizerPad(unittest.TestCase):
         graph = helper.make_graph([pad_node], 'TestOpQuantizerPad_test_model',
                                   [input_tensor], [output_tensor], initializer=initializers)
         model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
-        model.ir_version = 7 # use stable onnx ir version
+        model.ir_version = 7  # use stable onnx ir version
 
         onnx.save(model, output_model_path)
 
@@ -91,14 +91,17 @@ class TestOpQuatizerPad(unittest.TestCase):
         graph = helper.make_graph([conv_node, identity_node, pad_node], 'TestOpQuantizerPad_test_model',
                                   [input_tensor], [identity_out, output_tensor], initializer=initializers)
         model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
-        model.ir_version = 7 # use stable onnx ir version
+        model.ir_version = 7  # use stable onnx ir version
         onnx.save(model, output_model_path)
 
-    def quantize_model(self, model_fp32_path, model_i8_path, data_reader=None):
+    def quantize_model(self, model_fp32_path, model_i8_path, data_reader=None,
+                       activation_type=QuantType.QUInt8, weight_type=QuantType.QUInt8, extra_options={}):
         if data_reader is not None:
-            quantize_static(model_fp32_path, model_i8_path, data_reader, reduce_range=True)
+            quantize_static(model_fp32_path, model_i8_path, data_reader, reduce_range=True,
+                            activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
         else:
-            quantize_dynamic(model_fp32_path, model_i8_path, reduce_range=True)
+            quantize_dynamic(model_fp32_path, model_i8_path, reduce_range=True,
+                             activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
 
     def verify_should_not_trigger(self, quantize_mode='static'):
         np.random.seed(108)
@@ -118,23 +121,39 @@ class TestOpQuatizerPad(unittest.TestCase):
     def test_dynamic_quantize_no_trigger(self):
         self.verify_should_not_trigger(quantize_mode='dynamic')
 
-    def verify_quantize_with_pad_mode(self, pad_mode, constant_value=None, quantize_mode='static'):
+    def verify_quantize_with_pad_mode(self, pad_mode, constant_value=None, quantize_mode='static', rtol=0.01, atol=0.05,
+                                      activation_type=QuantType.QUInt8, weight_type=QuantType.QUInt8, extra_options={}):
         np.random.seed(108)
         tag_pad_mode = pad_mode if pad_mode is not None else 'none'
         tag_constant_value = '' if constant_value is None else '_value'
         model_fp32_path = 'qop_pad_{}_fp32_{}{}.onnx'.format(quantize_mode, tag_pad_mode, tag_constant_value)
-        model_i8_path = 'qop_pad_{}_i8_{}{}.onnx'.format(quantize_mode, tag_pad_mode, tag_constant_value)
         data_reader = self.input_feeds(1, {'input': [1, 8, 33, 33]})
         self.construct_model_conv_pad(model_fp32_path, [1, 8, 33, 33], [16, 8, 3, 3], [1, 16, 31, 31],
                                       pad_mode, [0, 0, 1, 2, 0, 0, 3, 4], constant_value=constant_value)
-        self.quantize_model(model_fp32_path, model_i8_path, None if quantize_mode != 'static' else data_reader)
+
+        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
+        activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8'
+        weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
+        model_i8_path = 'qop_pad_{}_i8_{}{}_{}{}.onnx'.format(
+            quantize_mode, tag_pad_mode, tag_constant_value, activation_type_str, weight_type_str)
         data_reader.rewind()
+        self.quantize_model(model_fp32_path, model_i8_path, None if quantize_mode != 'static' else data_reader,
+                            activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
         # DequantizeLinear=2 means there are one DequantizeLinear Node aftr both conv and pad,
         # which means pad node is running in quantized semantic.
         # In dynamic quantize mode, pad operator in fact not quantized as input is fp32.
-        kwargs = {'DynamicQuantizeLinear': 1} if quantize_mode != 'static' else {'DequantizeLinear': 2, 'QuantizeLinear': 1}
+        if quantize_mode != 'static':
+            kwargs = {'DynamicQuantizeLinear': 1} if activation_type == QuantType.QUInt8 else {'QuantizeLinear': 1}
+        else:
+            kwargs = {'DequantizeLinear': 2, 'QuantizeLinear': 1}
         check_op_type_count(self, model_i8_path, **kwargs)
-        check_model_correctness(self, model_fp32_path, model_i8_path, data_reader.get_next())
+        # check node input/output type if such node exists in the graph
+        qnode_io_qtypes = {'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
+        qnode_io_qtypes.update({'DequantizeLinear': [['i', 2, activation_proto_qtype]]})
+        qnode_io_qtypes.update({'ConvInteger': [['i', 2, activation_proto_qtype]]})
+        check_qtype_by_node_type(self, model_i8_path, qnode_io_qtypes)
+        data_reader.rewind()
+        check_model_correctness(self, model_fp32_path, model_i8_path, data_reader.get_next(), rtol=rtol, atol=atol)
 
     def test_static_mode_edge(self):
         self.verify_quantize_with_pad_mode('edge', constant_value=None)
@@ -148,6 +167,22 @@ class TestOpQuatizerPad(unittest.TestCase):
     def test_static_mode_constant_value(self):
         self.verify_quantize_with_pad_mode('constant', constant_value=3.75)
 
+    def test_static_mode_edge_s8s8(self):
+        self.verify_quantize_with_pad_mode('edge', constant_value=None, rtol=0.1, atol=0.1, activation_type=QuantType.QInt8,
+                                           weight_type=QuantType.QInt8, extra_options={'ActivationSymmetric': True})
+
+    def test_static_mode_reflect_s8s8(self):
+        self.verify_quantize_with_pad_mode('reflect', constant_value=None, rtol=0.1, atol=0.1, activation_type=QuantType.QInt8,
+                                           weight_type=QuantType.QInt8, extra_options={'ActivationSymmetric': True})
+
+    def test_static_mode_constant_default_s8s8(self):
+        self.verify_quantize_with_pad_mode('constant', constant_value=None, rtol=0.1, atol=0.1, activation_type=QuantType.QInt8,
+                                           weight_type=QuantType.QInt8, extra_options={'ActivationSymmetric': True})
+
+    def test_static_mode_constant_value_s8s8(self):
+        self.verify_quantize_with_pad_mode('constant', constant_value=3.75, rtol=0.1, atol=0.1, activation_type=QuantType.QInt8,
+                                           weight_type=QuantType.QInt8, extra_options={'ActivationSymmetric': True})
+
     def test_dynamic_mode_edge(self):
         self.verify_quantize_with_pad_mode('edge', constant_value=None, quantize_mode='dynamic')
 
@@ -160,6 +195,23 @@ class TestOpQuatizerPad(unittest.TestCase):
     def test_dynamic_mode_constant_value(self):
         self.verify_quantize_with_pad_mode('constant', constant_value=3.75, quantize_mode='dynamic')
 
+    # TODO: uncomment following after ConvInteger s8 supported
+    # def test_dynamic_mode_edge_s8s8(self):
+    #     self.verify_quantize_with_pad_mode('edge', constant_value=None, quantize_mode='dynamic', activation_type=QuantType.QInt8,
+    #                                        weight_type=QuantType.QInt8, extra_options={'ActivationSymmetric': True})
+
+    # def test_dynamic_mode_reflect_s8s8(self):
+    #     self.verify_quantize_with_pad_mode('reflect', constant_value=None, quantize_mode='dynamic', activation_type=QuantType.QInt8,
+    #                                        weight_type=QuantType.QInt8, extra_options={'ActivationSymmetric': True})
+
+    # def test_dynamic_mode_constant_default_s8s8(self):
+    #     self.verify_quantize_with_pad_mode('constant', constant_value=None, quantize_mode='dynamic', activation_type=QuantType.QInt8,
+    #                                        weight_type=QuantType.QInt8, extra_options={'ActivationSymmetric': True})
+
+    # def test_dynamic_mode_constant_value_s8s8(self):
+    #     self.verify_quantize_with_pad_mode('constant', constant_value=3.75, quantize_mode='dynamic', activation_type=QuantType.QInt8,
+    #                                        weight_type=QuantType.QInt8, extra_options={'ActivationSymmetric': True})
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/onnxruntime/test/python/quantization/test_op_pooling.py b/onnxruntime/test/python/quantization/test_op_pooling.py
index bb40ff9a8f..ee2bff4e02 100644
--- a/onnxruntime/test/python/quantization/test_op_pooling.py
+++ b/onnxruntime/test/python/quantization/test_op_pooling.py
@@ -10,8 +10,8 @@ import unittest
 import onnx
 import numpy as np
 from onnx import helper, TensorProto
-from onnxruntime.quantization import quantize_static, QuantFormat
-from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_nodes
+from onnxruntime.quantization import quantize_static, QuantFormat, QuantType
+from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_nodes, check_qtype_by_node_type
 
 
 class TestOpAveragePool(unittest.TestCase):
@@ -57,34 +57,50 @@ class TestOpAveragePool(unittest.TestCase):
         model.ir_version = 7 # use stable onnx ir version
         onnx.save(model, output_model_path)
 
-    def test_quantize_avgpool(self):
+    def quantize_avgpool_test(self, activation_type, weight_type, extra_options = {}):
         np.random.seed(1)
-
         model_fp32_path = 'avgpool_fp32.onnx'
-        model_uint8_path = 'avgpool_uint8.onnx'
-        model_uint8_qdq_path = 'avgpool_uint8_qdq.onnx'
-
         self.construct_model_conv_avgpool(model_fp32_path,
                                           [1, 2, 26, 42], [3, 2, 3, 3],
                                           [1, 3, 24, 40], {'kernel_shape': [3, 3]},
                                           [1, 3, 22, 38])
+        data_reader = self.input_feeds(1, {'input': [1, 2, 26, 42]})
+
+        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
+        activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8'
+        weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
+        model_q8_path = 'avgpool_{}{}.onnx'.format(activation_type_str, weight_type_str)
+        model_q8_qdq_path = 'avgpool_qdq_{}{}.onnx'.format(activation_type_str, weight_type_str)
 
         # Verify QOperator mode
-        data_reader = self.input_feeds(1, {'input': [1, 2, 26, 42]})
-        quantize_static(model_fp32_path, model_uint8_path, data_reader)
-        qnode_counts = {'QLinearConv': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 2, 'QLinearAveragePool': 1}
-        check_op_type_count(self, model_uint8_path, **qnode_counts)
         data_reader.rewind()
-        check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next())
+        quantize_static(model_fp32_path, model_q8_path, data_reader,
+                        activation_type = activation_type, weight_type = weight_type, extra_options = extra_options)
+        qnode_counts = {'QLinearConv': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 2, 'QLinearAveragePool': 1}
+        check_op_type_count(self, model_q8_path, **qnode_counts)
+        qnode_io_qtypes = {'QuantizeLinear' : [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
+        qnode_io_qtypes.update({'QLinearConv' : [['i', 2, activation_proto_qtype], ['i', 7, activation_proto_qtype], ['o', 0, activation_proto_qtype]]})
+        qnode_io_qtypes.update({'QLinearAveragePool' : [['i', 4, activation_proto_qtype]]}) # shape info note workig on custome ops
+        check_qtype_by_node_type(self, model_q8_path, qnode_io_qtypes)
+        data_reader.rewind()
+        check_model_correctness(self, model_fp32_path, model_q8_path, data_reader.get_next())
 
         # Verify QDQ mode
         data_reader.rewind()
-        quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ)
+        quantize_static(model_fp32_path, model_q8_qdq_path, data_reader, quant_format=QuantFormat.QDQ,
+                        activation_type = activation_type, weight_type = weight_type, extra_options = extra_options)
         qdqnode_counts = {'Conv': 1, 'QuantizeLinear': 3, 'DequantizeLinear': 4, 'AveragePool': 1}
-        check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
+        check_op_type_count(self, model_q8_qdq_path, **qdqnode_counts)
+        qnode_io_qtypes = {'QuantizeLinear' : [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
+        check_qtype_by_node_type(self, model_q8_qdq_path, qnode_io_qtypes)
         data_reader.rewind()
-        check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())
+        check_model_correctness(self, model_fp32_path, model_q8_qdq_path, data_reader.get_next())
 
+    def test_quantize_avgpool(self):
+        self.quantize_avgpool_test(QuantType.QUInt8, QuantType.QUInt8)
+
+    def test_quantize_avgpool_s8s8(self):
+        self.quantize_avgpool_test(QuantType.QInt8, QuantType.QInt8, extra_options = {'ActivationSymmetric' : True})
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/onnxruntime/test/python/quantization/test_op_reshape.py b/onnxruntime/test/python/quantization/test_op_reshape.py
index a457a21570..0397d1a9c2 100644
--- a/onnxruntime/test/python/quantization/test_op_reshape.py
+++ b/onnxruntime/test/python/quantization/test_op_reshape.py
@@ -10,8 +10,8 @@ import unittest
 import onnx
 import numpy as np
 from onnx import helper, TensorProto
-from onnxruntime.quantization import quantize_static, QuantFormat
-from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_nodes
+from onnxruntime.quantization import quantize_static, QuantFormat, QuantType
+from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_nodes, check_qtype_by_node_type
 
 
 class TestOpReshape(unittest.TestCase):
@@ -67,35 +67,51 @@ class TestOpReshape(unittest.TestCase):
 
         onnx.save(model, output_model_path)
 
-    def test_quantize_reshape(self):
+    def quantize_reshape_test(self, activation_type, weight_type, extra_options = {}):
         np.random.seed(1)
         model_fp32_path = 'reshape_fp32.onnx'
-        model_uint8_path = 'reshape_uint8.onnx'
-        model_uint8_qdq_path = 'reshape_uint8_qdq.onnx'
 
         self.construct_model_matmul_reshape(model_fp32_path,
                                             [3, 7],
                                             [7, 3],
                                             [1, 9])
 
+        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
+        activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8'
+        weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
+        model_uint8_path = 'reshape_{}{}.onnx'.format(activation_type_str, weight_type_str)
+        model_uint8_qdq_path = 'reshape_{}{}_qdq.onnx'.format(activation_type_str, weight_type_str)
+
         # Verify QOperator mode
         data_reader = self.input_feeds(1, {'input': [3, 7]})
-        quantize_static(model_fp32_path, model_uint8_path, data_reader)
+        quantize_static(model_fp32_path, model_uint8_path, data_reader,
+                        activation_type = activation_type, weight_type = weight_type, extra_options = extra_options)
         # make sure transpose become xint8 operator, its input name could tell that
         check_op_nodes(self, model_uint8_path, lambda node: (node.name != "reshape_node" or node.input[0] != 'matmul_output'))
         qnode_counts = {'QLinearMatMul': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 1, 'Reshape': 1}
         check_op_type_count(self, model_uint8_path, **qnode_counts)
+        qnode_io_qtypes = {'QuantizeLinear' : [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
+        qnode_io_qtypes.update({'DequantizeLinear' : [['i', 2, activation_proto_qtype]]})
+        check_qtype_by_node_type(self, model_uint8_path, qnode_io_qtypes)
         data_reader.rewind()
         check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next())
 
         # Verify QDQ mode
         data_reader.rewind()
-        quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ)
+        quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ,
+                        activation_type = activation_type, weight_type = weight_type, extra_options = extra_options)
         qdqnode_counts = {'MatMul': 1, 'QuantizeLinear': 3, 'DequantizeLinear': 4, 'Reshape': 1}
         check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
+        qnode_io_qtypes = {'QuantizeLinear' : [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
+        check_qtype_by_node_type(self, model_uint8_qdq_path, qnode_io_qtypes)
         data_reader.rewind()
         check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())
 
+    def test_quantize_reshape(self):
+        self.quantize_reshape_test(QuantType.QUInt8, QuantType.QUInt8)
+
+    def test_quantize_reshape_s8s8(self):
+        self.quantize_reshape_test(QuantType.QInt8, QuantType.QInt8, extra_options = {'ActivationSymmetric' : True})
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/onnxruntime/test/python/quantization/test_op_resize.py b/onnxruntime/test/python/quantization/test_op_resize.py
index 66724c38a1..df9371c2c0 100644
--- a/onnxruntime/test/python/quantization/test_op_resize.py
+++ b/onnxruntime/test/python/quantization/test_op_resize.py
@@ -10,8 +10,8 @@ import unittest
 import onnx
 import numpy as np
 from onnx import helper, TensorProto
-from onnxruntime.quantization import quantize_static, QuantFormat
-from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_nodes
+from onnxruntime.quantization import quantize_static, QuantFormat, QuantType
+from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_nodes, check_qtype_by_node_type
 
 
 class TestOpResize(unittest.TestCase):
@@ -81,12 +81,9 @@ class TestOpResize(unittest.TestCase):
         model.ir_version = 7 # use stable onnx ir version
         onnx.save(model, output_model_path)
 
-    def test_quantize_resize(self):
+    def quantize_resize_test(self, activation_type, weight_type, extra_options = {}):
         np.random.seed(1)
-
         model_fp32_path = 'resize_fp32.onnx'
-        model_uint8_path = 'resize_uint8.onnx'
-        model_uint8_qdq_path = 'resize_uint8_qdq.onnx'
 
         kwargs = {'coordinate_transformation_mode': 'asymmetric', 'mode': 'nearest', 'nearest_mode': 'floor'}
         self.construct_model_conv_resize(model_fp32_path,
@@ -95,25 +92,43 @@ class TestOpResize(unittest.TestCase):
                                          kwargs,
                                          [0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 2.0, 2.0], None)
 
+        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
+        activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8'
+        weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
+        model_uint8_path = 'resize_{}{}.onnx'.format(activation_type_str, weight_type_str)
+        model_uint8_qdq_path = 'resize_{}{}_qdq.onnx'.format(activation_type_str, weight_type_str)
+
         # Verify QOperator mode
         data_reader = self.input_feeds(1, {'input': [1, 2, 26, 42]})
-        quantize_static(model_fp32_path, model_uint8_path, data_reader)
-
+        quantize_static(model_fp32_path, model_uint8_path, data_reader,
+                        activation_type = activation_type, weight_type = weight_type, extra_options = extra_options)
         # make sure resize become xint8 operator, its input name could tell that
         check_op_nodes(self, model_uint8_path, lambda node: (node.name != "resize_node" or node.input[0] != 'conv_output'))
         qnode_counts = {'QLinearConv': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 2, 'Resize': 1}
         check_op_type_count(self, model_uint8_path, **qnode_counts)
+        qnode_io_qtypes = {'QuantizeLinear' : [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
+        qnode_io_qtypes.update({'DequantizeLinear' : [['i', 2, activation_proto_qtype]]})
+        check_qtype_by_node_type(self, model_uint8_path, qnode_io_qtypes)
         data_reader.rewind()
         check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next())
 
         # Verify QDQ mode
         data_reader.rewind()
-        quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ)
+        quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ,
+                        activation_type = activation_type, weight_type = weight_type, extra_options = extra_options)
         qdqnode_counts = {'Conv': 1, 'QuantizeLinear': 3, 'DequantizeLinear': 4, 'Resize': 1}
         check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
+        qnode_io_qtypes = {'QuantizeLinear' : [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
+        check_qtype_by_node_type(self, model_uint8_qdq_path, qnode_io_qtypes)
         data_reader.rewind()
         check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())
 
+    def test_quantize_resize(self):
+        self.quantize_resize_test(QuantType.QUInt8, QuantType.QUInt8)
+
+    # TODO: Uncomment following after resize s8 support is enabled
+    # def test_quantize_resize_s8s8(self):
+    #     self.quantize_resize_test(QuantType.QInt8, QuantType.QInt8, extra_options = {'ActivationSymmetric' : True})
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/onnxruntime/test/python/quantization/test_op_squeeze_unsqueeze.py b/onnxruntime/test/python/quantization/test_op_squeeze_unsqueeze.py
index 579dba9c9c..18252ec7a1 100644
--- a/onnxruntime/test/python/quantization/test_op_squeeze_unsqueeze.py
+++ b/onnxruntime/test/python/quantization/test_op_squeeze_unsqueeze.py
@@ -10,8 +10,8 @@ import unittest
 import onnx
 import numpy as np
 from onnx import helper, TensorProto
-from onnxruntime.quantization import quantize_static, QuantFormat
-from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count
+from onnxruntime.quantization import quantize_static, QuantFormat, QuantType
+from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_qtype_by_node_type
 
 
 class TestOpSqueezeUnsqueeze(unittest.TestCase):
@@ -26,8 +26,8 @@ class TestOpSqueezeUnsqueeze(unittest.TestCase):
         return dr
 
     def construct_model_conv_squeezes(self, output_model_path,
-                                     conv_input_shape, conv_weight_shape, conv_output_shape,
-                                     opset = 13):
+                                      conv_input_shape, conv_weight_shape, conv_output_shape,
+                                      opset=13):
         #             (input)
         #            /   |     \
         #         Conv1 conv2    conv3
@@ -55,7 +55,6 @@ class TestOpSqueezeUnsqueeze(unittest.TestCase):
         conv3_weight_initializer = onnx.numpy_helper.from_array(conv3_weight_arr, name='conv3_weight')
         conv3_node = onnx.helper.make_node('Conv', ['input', 'conv3_weight'], ['conv3_output'], name='conv3_node')
 
-
         if (opset >= 13):
             squeeze_axes_initializer = onnx.numpy_helper.from_array(np.array([0], dtype=np.int64), name='squeeze_axes')
             squeeze1_node = helper.make_node('Squeeze', ['conv1_output', 'squeeze_axes'], ['squeeze1_output'], name='suqeeze1_node')
@@ -66,9 +65,10 @@ class TestOpSqueezeUnsqueeze(unittest.TestCase):
 
         add1_node = helper.make_node('Add', ['squeeze1_output', 'squeeze2_output'], ['add1_output'], name='add1_node')
         if (opset >= 13):
-            unsqueeze_node = helper.make_node('Unsqueeze', ['add1_output', 'squeeze_axes'], ['unsqueeze_output'], name = 'unsqueeze_node')
+            unsqueeze_node = helper.make_node('Unsqueeze', ['add1_output', 'squeeze_axes'], [
+                                              'unsqueeze_output'], name='unsqueeze_node')
         else:
-            unsqueeze_node = helper.make_node('Unsqueeze', ['add1_output'], ['unsqueeze_output'], name = 'unsqueeze_node', axes=[0])
+            unsqueeze_node = helper.make_node('Unsqueeze', ['add1_output'], ['unsqueeze_output'], name='unsqueeze_node', axes=[0])
 
         output_tensor = helper.make_tensor_value_info('output', TensorProto.FLOAT, conv_output_shape)
         add2_node = helper.make_node('Add', ['unsqueeze_output', 'conv3_output'], ['output'], name='add2_node')
@@ -79,33 +79,43 @@ class TestOpSqueezeUnsqueeze(unittest.TestCase):
         graph = helper.make_graph([conv1_node, conv2_node, conv3_node, squeeze1_node, squeeze2_node, add1_node, unsqueeze_node, add2_node],
                                   'TestOpSuqeezes_test_model', [input_tensor], [output_tensor], initializer=initializers)
         model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", opset)])
-        model.ir_version = 7 # use stable onnx ir version
+        model.ir_version = 7  # use stable onnx ir version
         onnx.save(model, output_model_path)
 
-    def run_quantize_squeezes_of_opset(self, opset = 13):
+    def run_quantize_squeezes_of_opset(self, opset=13, activation_type=QuantType.QUInt8, weight_type=QuantType.QUInt8, extra_options={}):
         np.random.seed(1)
 
         model_fp32_path = 'squeezes_opset{}_fp32.onnx'.format(opset)
-        model_uint8_path = 'squeezes_opset{}_uint8.onnx'.format(opset)
-        model_uint8_qdq_path = 'squeezes_opset{}_uint8_qdq.onnx'.format(opset)
-
         self.construct_model_conv_squeezes(model_fp32_path, [1, 2, 26, 42], [3, 2, 3, 3], [1, 3, 24, 40], opset=opset)
 
+        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
+        activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8'
+        weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
+        model_uint8_path = 'squeezes_opset{}_{}{}.onnx'.format(opset, activation_type_str, weight_type_str)
+        model_uint8_qdq_path = 'squeezes_opset{}_{}{}_qdq.onnx'.format(opset, activation_type_str, weight_type_str)
+
         # Verify QOperator mode
         data_reader = self.input_feeds(1, {'input': [1, 2, 26, 42]})
-        quantize_static(model_fp32_path, model_uint8_path, data_reader)
+        quantize_static(model_fp32_path, model_uint8_path, data_reader,
+                        activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
 
         # make sure squeezes become xint8 operator, its input name could tell that
         qnode_counts = {'QuantizeLinear': 1, 'DequantizeLinear': 1}
         check_op_type_count(self, model_uint8_path, **qnode_counts)
+        qnode_io_qtypes = {'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
+        qnode_io_qtypes.update({'DequantizeLinear': [['i', 2, activation_proto_qtype]]})
+        check_qtype_by_node_type(self, model_uint8_path, qnode_io_qtypes)
         data_reader.rewind()
         check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next(), rtol=0.01, atol=0.5)
 
         # Verify QDQ mode
         data_reader.rewind()
-        quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ)
+        quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ,
+                        activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
         qdqnode_counts = {'Conv': 3, 'QuantizeLinear': 9, 'DequantizeLinear': 12}
         check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
+        qnode_io_qtypes = {'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
+        check_qtype_by_node_type(self, model_uint8_qdq_path, qnode_io_qtypes)
         data_reader.rewind()
         check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next(), rtol=0.01, atol=0.5)
 
@@ -113,5 +123,10 @@ class TestOpSqueezeUnsqueeze(unittest.TestCase):
         self.run_quantize_squeezes_of_opset(11)
         self.run_quantize_squeezes_of_opset(13)
 
+    def test_quantize_squeeze_unsqueeze_s8s8(self):
+        self.run_quantize_squeezes_of_opset(11, QuantType.QInt8, QuantType.QInt8, extra_options={'ActivationSymmetric': True})
+        self.run_quantize_squeezes_of_opset(13, QuantType.QInt8, QuantType.QInt8, extra_options={'ActivationSymmetric': True})
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/onnxruntime/test/python/quantization/test_op_transpose.py b/onnxruntime/test/python/quantization/test_op_transpose.py
index f1dd8a780e..a83ec6022d 100644
--- a/onnxruntime/test/python/quantization/test_op_transpose.py
+++ b/onnxruntime/test/python/quantization/test_op_transpose.py
@@ -10,8 +10,8 @@ import unittest
 import onnx
 import numpy as np
 from onnx import helper, TensorProto
-from onnxruntime.quantization import quantize_static, QuantFormat
-from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_nodes
+from onnxruntime.quantization import quantize_static, QuantFormat, QuantType
+from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_nodes, check_qtype_by_node_type
 
 
 class TestOpTranspose(unittest.TestCase):
@@ -62,32 +62,47 @@ class TestOpTranspose(unittest.TestCase):
 
         onnx.save(model, output_model_path)
 
-    def test_quantize_transpose(self):
+    def quantize_transpose_test(self, activation_type, weight_type, extra_options = {}):
         np.random.seed(1)
         model_fp32_path = 'transpose_fp32.onnx'
-        model_uint8_path = 'transpose_uint8.onnx'
-        model_uint8_qdq_path = 'transpose_uint8_qdq.onnx'
-
         self.construct_model_matmul_transpose(model_fp32_path, [3, 7], [7, 5], [5, 3])
 
+        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
+        activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8'
+        weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
+        model_uint8_path = 'transpose_{}{}.onnx'.format(activation_type_str, weight_type_str)
+        model_uint8_qdq_path = 'transpose_{}{}_qdq.onnx'.format(activation_type_str, weight_type_str)
+
         # Verify QOperator model
         data_reader = self.input_feeds(1, {'input': [3, 7]})
-        quantize_static(model_fp32_path, model_uint8_path, data_reader)
+        quantize_static(model_fp32_path, model_uint8_path, data_reader,
+                        activation_type = activation_type, weight_type = weight_type, extra_options = extra_options)
         # make sure transpose become xint8 operator, its input name could tell that
         check_op_nodes(self, model_uint8_path, lambda node: (node.name != "transpose_node" or node.input[0] != 'matmul_output'))
         qnode_counts = {'QLinearMatMul': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 1, 'Transpose': 1}
         check_op_type_count(self, model_uint8_path, **qnode_counts)
+        qnode_io_qtypes = {'QuantizeLinear' : [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
+        qnode_io_qtypes.update({'DequantizeLinear' : [['i', 2, activation_proto_qtype]]})
+        check_qtype_by_node_type(self, model_uint8_path, qnode_io_qtypes)
         data_reader.rewind()
         check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next())
 
         # Verify QDQ model
         data_reader.rewind()
-        quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ)
+        quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ,
+                        activation_type = activation_type, weight_type = weight_type, extra_options = extra_options)
         qdqnode_counts = {'MatMul': 1, 'QuantizeLinear': 3, 'DequantizeLinear': 4, 'Transpose': 1}
         check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
+        qnode_io_qtypes = {'QuantizeLinear' : [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
+        check_qtype_by_node_type(self, model_uint8_qdq_path, qnode_io_qtypes)
         data_reader.rewind()
         check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())
 
+    def test_quantize_transpose(self):
+        self.quantize_transpose_test(QuantType.QUInt8, QuantType.QUInt8)
+
+    def test_quantize_transpose_s8s8(self):
+        self.quantize_transpose_test(QuantType.QInt8, QuantType.QInt8, extra_options = {'ActivationSymmetric' : True})
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/onnxruntime/test/python/quantization/test_qdq.py b/onnxruntime/test/python/quantization/test_qdq.py
index d8d4280e37..2aca1eacdb 100644
--- a/onnxruntime/test/python/quantization/test_qdq.py
+++ b/onnxruntime/test/python/quantization/test_qdq.py
@@ -104,7 +104,7 @@ class TestQDQExtraOptions(unittest.TestCase):
             # This QuantizeLinear node should be followed by Add1
             if node.name == 'P_QuantizeLinear':
                 qdq_added_to_node_output_flag = True
-                self.assertTrue(node.input[0] is 'P')
+                self.assertTrue(node.input[0] == 'P')
 
         self.assertTrue(qdq_added_to_node_output_flag)