mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-07-01 03:45:06 +00:00
Add quantization tool and its unittest with s8s8 support (#10007)
* Add quantization tool with s8s8 support * Add unittest for existing s8s8 support operators * Comment ready unittest for upcomming s8s8 operator (ConvInteger, and Resize) * Minor change on quantization tools * Use different s8 min value upon weight or activation. * use same qmin for reduce ranged s8.
This commit is contained in:
parent
7a70d22150
commit
b000ec91cc
15 changed files with 418 additions and 176 deletions
|
|
@ -12,7 +12,8 @@ class QLinearConcat(QuantOperatorBase):
|
|||
|
||||
data_found, output_scale_name, output_zp_name, _, _ = \
|
||||
self.quantizer._get_quantization_params(node.output[0])
|
||||
(q_input_names, zero_point_names, scale_names, nodes) = self.quantizer.quantize_inputs(node, [*range(0, len(node.input))])
|
||||
(q_input_names, zero_point_names, scale_names, nodes) = \
|
||||
self.quantizer.quantize_inputs(node, [*range(0, len(node.input))], initializer_use_weight_qType=False)
|
||||
if not data_found or q_input_names is None:
|
||||
return super().quantize()
|
||||
|
||||
|
|
|
|||
|
|
@ -176,7 +176,7 @@ def quantize_data(data, qType, symmetric, reduce_range=False):
|
|||
if len(data):
|
||||
rmin = min(data)
|
||||
rmax = max(data)
|
||||
qmin, qmax = get_qmin_qmax_for_qType(qType, reduce_range)
|
||||
qmin, qmax = get_qmin_qmax_for_qType(qType, reduce_range, for_weight=True)
|
||||
|
||||
zero_point, scale = compute_scale_zp(rmin, rmax, qmin, qmax, symmetric)
|
||||
|
||||
|
|
@ -184,7 +184,7 @@ def quantize_data(data, qType, symmetric, reduce_range=False):
|
|||
|
||||
return rmin, rmax, zero_point, scale, quantized_data
|
||||
|
||||
def get_qmin_qmax_for_qType(qType, reduce_range=False):
|
||||
def get_qmin_qmax_for_qType(qType, reduce_range=False, for_weight=False):
|
||||
'''
|
||||
Return qmin and qmax, the minimum and maximum value representable by the given qType
|
||||
:parameter qType: onnx.onnx_pb.TensorProto.UINT8 or onnx.onnx_pb.TensorProto.UINT8
|
||||
|
|
@ -193,18 +193,21 @@ def get_qmin_qmax_for_qType(qType, reduce_range=False):
|
|||
if qType == onnx_proto.TensorProto.UINT8:
|
||||
(qmin, qmax) = (0,127) if reduce_range else (0,255)
|
||||
elif qType == onnx_proto.TensorProto.INT8:
|
||||
(qmin, qmax) = (-64,64) if reduce_range else (-127,127)
|
||||
if for_weight:
|
||||
(qmin, qmax) = (-64,64) if reduce_range else (-127,127)
|
||||
else:
|
||||
(qmin, qmax) = (-64,64) if reduce_range else (-128,127)
|
||||
else:
|
||||
raise ValueError("Unexpected data type {} requested. Only INT8 and UINT8 are supported.".format(qType))
|
||||
return qmin, qmax
|
||||
|
||||
def get_qrange_for_qType(qType, reduce_range=False):
|
||||
def get_qrange_for_qType(qType, reduce_range=False, for_weight=False):
|
||||
'''
|
||||
Helper function to get the quantization range for a type.
|
||||
parameter qType: quantization type.
|
||||
return: quantization range.
|
||||
'''
|
||||
qmin, qmax = get_qmin_qmax_for_qType(qType, reduce_range)
|
||||
qmin, qmax = get_qmin_qmax_for_qType(qType, reduce_range, for_weight=for_weight)
|
||||
return qmax - qmin
|
||||
|
||||
class QuantizedInitializer:
|
||||
|
|
|
|||
|
|
@ -34,7 +34,6 @@ def InputFeedsNegOneZeroOne(n, name2shape):
|
|||
dr = TestDataFeeds(input_data_list)
|
||||
return dr
|
||||
|
||||
|
||||
def check_op_type_order(testcase, model_to_check, ops):
|
||||
if isinstance(model_to_check, string_types):
|
||||
model = onnx.load(model_to_check)
|
||||
|
|
@ -78,3 +77,28 @@ def check_op_nodes(testcase, model_path, node_checker):
|
|||
model = onnx.load(Path(model_path))
|
||||
for node in model.graph.node:
|
||||
testcase.assertTrue(node_checker(node))
|
||||
|
||||
def check_qtype_by_node_type(testcase, model_to_check, check_list):
|
||||
if isinstance(model_to_check, string_types):
|
||||
model = onnx.load(model_to_check)
|
||||
elif isinstance(model_to_check, onnx.ModelProto):
|
||||
model = model_to_check
|
||||
model = onnx.shape_inference.infer_shapes(model)
|
||||
value_infos = {vi.name: vi for vi in model.graph.value_info}
|
||||
value_infos.update({ot.name: ot for ot in model.graph.output})
|
||||
value_infos.update({it.name: it for it in model.graph.input})
|
||||
initializers = {init.name : init for init in model.graph.initializer}
|
||||
|
||||
for node in model.graph.node:
|
||||
if node.op_type in check_list:
|
||||
input_output_check_list = check_list[node.op_type]
|
||||
for check_item in input_output_check_list:
|
||||
tensor_name = node.input[check_item[1]] if check_item[0] == 'i' else node.output[check_item[1]]
|
||||
testcase.assertTrue((tensor_name in value_infos) or (tensor_name in initializers))
|
||||
if tensor_name in value_infos:
|
||||
vi = value_infos[tensor_name]
|
||||
testcase.assertTrue(vi.type.HasField('tensor_type'))
|
||||
testcase.assertTrue(vi.type.tensor_type.elem_type == check_item[2])
|
||||
else: #if (tensor_name in initializers):
|
||||
init = initializers[tensor_name]
|
||||
testcase.assertTrue(init.data_type == check_item[2])
|
||||
|
|
|
|||
|
|
@ -11,17 +11,18 @@ import onnx
|
|||
import onnxruntime
|
||||
import numpy as np
|
||||
from onnx import helper, TensorProto, numpy_helper
|
||||
from onnxruntime.quantization import quantize_dynamic
|
||||
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_type_order
|
||||
from onnxruntime.quantization import quantize_dynamic, QuantType
|
||||
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_type_order, check_qtype_by_node_type
|
||||
|
||||
|
||||
def generate_input_initializer(tensor_shape, tensor_dtype, input_name):
|
||||
'''
|
||||
Helper function to generate initializers for test inputs
|
||||
'''
|
||||
tensor = np.random.normal(0, 0.3, tensor_shape).astype(tensor_dtype)
|
||||
init = numpy_helper.from_array(tensor, input_name)
|
||||
return init
|
||||
'''
|
||||
Helper function to generate initializers for test inputs
|
||||
'''
|
||||
tensor = np.random.normal(0, 0.3, tensor_shape).astype(tensor_dtype)
|
||||
init = numpy_helper.from_array(tensor, input_name)
|
||||
return init
|
||||
|
||||
|
||||
class TestONNXModel(unittest.TestCase):
|
||||
def construct_model(self, model_path):
|
||||
|
|
@ -52,19 +53,31 @@ class TestONNXModel(unittest.TestCase):
|
|||
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||
onnx.save(model, model_path)
|
||||
|
||||
def dynamic_quant_conv(self, model_fp32_path, model_int8_path):
|
||||
quantize_dynamic(model_fp32_path, model_int8_path)
|
||||
quant_nodes = {'ConvInteger' : 2}
|
||||
def dynamic_quant_conv_test(self, activation_type, weight_type, extra_options={}):
|
||||
np.random.seed(1)
|
||||
model_fp32_path = 'conv_bias.fp32.onnx'
|
||||
self.construct_model(model_fp32_path)
|
||||
|
||||
activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
|
||||
activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8'
|
||||
weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
|
||||
model_int8_path = 'conv_bias.quant.{}{}.onnx'.format(activation_type_str, weight_type_str)
|
||||
|
||||
quantize_dynamic(model_fp32_path, model_int8_path,
|
||||
activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
|
||||
quant_nodes = {'ConvInteger': 2}
|
||||
check_op_type_count(self, model_int8_path, **quant_nodes)
|
||||
qnode_io_qtypes = {'ConvInteger': [['i', 2, activation_proto_qtype]]}
|
||||
check_qtype_by_node_type(self, model_int8_path, qnode_io_qtypes)
|
||||
check_model_correctness(self, model_fp32_path, model_int8_path, {'input': np.random.rand(4, 2, 8, 8).astype(np.float32)})
|
||||
|
||||
def test_quant_conv(self):
|
||||
np.random.seed(1)
|
||||
model_fp32_path = 'conv_bias.fp32.onnx'
|
||||
model_int8_path = 'conv_bias.quant.onnx'
|
||||
self.construct_model(model_fp32_path)
|
||||
self.dynamic_quant_conv_test(QuantType.QUInt8, QuantType.QUInt8, extra_options={})
|
||||
|
||||
# TODO: uncomment following after ConvInteger s8 supportted
|
||||
# def test_quant_conv_s8s8(self):
|
||||
# self.dynamic_quant_conv_test(QuantType.QInt8, QuantType.QInt8, extra_options={'ActivationSymmetric': True})
|
||||
|
||||
self.dynamic_quant_conv(model_fp32_path, model_int8_path)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
|||
|
|
@ -7,8 +7,8 @@
|
|||
import unittest
|
||||
import numpy as np
|
||||
from onnx import helper, TensorProto, numpy_helper, save
|
||||
from onnxruntime.quantization import quantize_static, QuantFormat
|
||||
from op_test_utils import InputFeedsNegOneZeroOne, check_model_correctness, check_op_type_count
|
||||
from onnxruntime.quantization import quantize_static, QuantFormat, QuantType
|
||||
from op_test_utils import InputFeedsNegOneZeroOne, check_model_correctness, check_op_type_count, check_qtype_by_node_type
|
||||
|
||||
|
||||
class TestONNXModel(unittest.TestCase):
|
||||
|
|
@ -47,7 +47,7 @@ class TestONNXModel(unittest.TestCase):
|
|||
conv3_node = helper.make_node('Conv', ['input', 'conv3_weight'], ['conv3_output'], name='conv3_node')
|
||||
|
||||
concat_node = helper.make_node('Concat', ['conv1_output', 'conv2_output', 'conv3_output'], [
|
||||
'concat_output'], name='concat_node', axis=1)
|
||||
'concat_output'], name='concat_node', axis=1)
|
||||
|
||||
identity_node = helper.make_node('Identity', ['concat_output'], ['output'], name='identity_node')
|
||||
|
||||
|
|
@ -57,31 +57,48 @@ class TestONNXModel(unittest.TestCase):
|
|||
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||
save(model, model_path)
|
||||
|
||||
def test_quantize_concat(self):
|
||||
def quantize_concat_test(self, activation_type, weight_type, extra_options={}):
|
||||
np.random.seed(1)
|
||||
|
||||
model_fp32_path = 'concat_fp32.onnx'
|
||||
model_uint8_path = 'concat_uint8.onnx'
|
||||
model_uint8_qdq_path = 'concat_uint8_qdq.onnx'
|
||||
|
||||
self.construct_model(model_fp32_path)
|
||||
data_reader = InputFeedsNegOneZeroOne(1, {'input': [1, 3, 15, 15]})
|
||||
|
||||
activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
|
||||
activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8'
|
||||
weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
|
||||
model_q8_path = 'concat_{}{}.onnx'.format(activation_type_str, weight_type_str)
|
||||
model_q8_qdq_path = 'concat_{}{}_qdq.onnx'.format(activation_type_str, weight_type_str)
|
||||
|
||||
# Verify QOperator mode
|
||||
data_reader = InputFeedsNegOneZeroOne(1, {'input': [1, 3, 15, 15]})
|
||||
quantize_static(model_fp32_path, model_uint8_path, data_reader)
|
||||
data_reader.rewind()
|
||||
quantize_static(model_fp32_path, model_q8_path, data_reader,
|
||||
activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
|
||||
|
||||
qnode_counts = {'QLinearConv': 3, 'QuantizeLinear': 1, 'DequantizeLinear': 1, 'QLinearConcat': 1}
|
||||
check_op_type_count(self, model_uint8_path, **qnode_counts)
|
||||
check_op_type_count(self, model_q8_path, **qnode_counts)
|
||||
qnode_io_qtypes = {'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
|
||||
qnode_io_qtypes.update({'QLinearConcat': [['i', 1, activation_proto_qtype], [
|
||||
'i', 4, activation_proto_qtype], ['i', 7, activation_proto_qtype]]})
|
||||
check_qtype_by_node_type(self, model_q8_path, qnode_io_qtypes)
|
||||
data_reader.rewind()
|
||||
check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next())
|
||||
check_model_correctness(self, model_fp32_path, model_q8_path, data_reader.get_next())
|
||||
|
||||
# Verify QDQ mode
|
||||
data_reader.rewind()
|
||||
quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ)
|
||||
quantize_static(model_fp32_path, model_q8_qdq_path, data_reader, quant_format=QuantFormat.QDQ,
|
||||
activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
|
||||
qdqnode_counts = {'Conv': 3, 'QuantizeLinear': 5, 'DequantizeLinear': 8, 'Concat': 1}
|
||||
check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
|
||||
check_op_type_count(self, model_q8_qdq_path, **qdqnode_counts)
|
||||
qnode_io_qtypes = {'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
|
||||
check_qtype_by_node_type(self, model_q8_qdq_path, qnode_io_qtypes)
|
||||
data_reader.rewind()
|
||||
check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())
|
||||
check_model_correctness(self, model_fp32_path, model_q8_qdq_path, data_reader.get_next())
|
||||
|
||||
def test_quantize_concat(self):
|
||||
self.quantize_concat_test(QuantType.QUInt8, QuantType.QUInt8, extra_options={})
|
||||
|
||||
def test_quantize_concat_s8s8(self):
|
||||
self.quantize_concat_test(QuantType.QInt8, QuantType.QInt8, extra_options={'ActivationSymmetric': True})
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
|||
|
|
@ -10,8 +10,8 @@ import unittest
|
|||
import onnx
|
||||
import numpy as np
|
||||
from onnx import helper, TensorProto
|
||||
from onnxruntime.quantization import quantize_static, quantize_dynamic
|
||||
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count
|
||||
from onnxruntime.quantization import quantize_static, quantize_dynamic, QuantType
|
||||
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_qtype_by_node_type
|
||||
|
||||
|
||||
class TestOpGlobalAveragePool(unittest.TestCase):
|
||||
|
|
@ -44,10 +44,10 @@ class TestOpGlobalAveragePool(unittest.TestCase):
|
|||
output_name = 'output'
|
||||
initializers = []
|
||||
|
||||
#make 1st GlobalAveragePool node
|
||||
# make 1st GlobalAveragePool node
|
||||
gavgpool_node_1 = onnx.helper.make_node('GlobalAveragePool', [input_name], [expand_input])
|
||||
|
||||
#make Expand node
|
||||
# make Expand node
|
||||
expand_shape_name = 'expand_shape'
|
||||
initializers.append(onnx.numpy_helper.from_array(np.array(input_shape, dtype=np.int64), name=expand_shape_name))
|
||||
expand_node = onnx.helper.make_node('Expand', [expand_input, expand_shape_name], [conv_input])
|
||||
|
|
@ -59,7 +59,7 @@ class TestOpGlobalAveragePool(unittest.TestCase):
|
|||
initializers.append(onnx.numpy_helper.from_array(conv_weight_data, name=weight_name))
|
||||
conv_node = onnx.helper.make_node('Conv', [conv_input, weight_name], [gavgpool_input_2nd], name=conv_name)
|
||||
|
||||
#make 1st GlobalAveragePool node
|
||||
# make 1st GlobalAveragePool node
|
||||
gavgpool_node_2 = onnx.helper.make_node('GlobalAveragePool', [gavgpool_input_2nd], [output_name])
|
||||
|
||||
# make graph
|
||||
|
|
@ -69,30 +69,42 @@ class TestOpGlobalAveragePool(unittest.TestCase):
|
|||
graph = helper.make_graph([gavgpool_node_1, expand_node, conv_node, gavgpool_node_2], graph_name,
|
||||
[input_tensor], [output_tensor], initializer=initializers)
|
||||
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||
model.ir_version = 7 # use stable onnx ir version
|
||||
model.ir_version = 7 # use stable onnx ir version
|
||||
|
||||
onnx.save(model, output_model_path)
|
||||
|
||||
def test_quantize_reshape(self):
|
||||
def quantize_gavgpool_test(self, activation_type, weight_type, extra_options={}):
|
||||
np.random.seed(1)
|
||||
model_fp32_path = 'gavg_pool_fp32.onnx'
|
||||
model_int8_path = 'gavg_pool_fp32.quant.onnx'
|
||||
data_reader = self.input_feeds(1, {'input': [1, 8, 33, 33]})
|
||||
self.construct_model_gavgpool(model_fp32_path,
|
||||
[1, 8, 33, 33],
|
||||
[16, 8, 3, 3],
|
||||
[1, 16, 1, 1])
|
||||
quantize_static(model_fp32_path,
|
||||
model_int8_path,
|
||||
data_reader)
|
||||
|
||||
activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
|
||||
activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8'
|
||||
weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
|
||||
model_q8_path = 'gavg_pool_{}{}.onnx'.format(activation_type_str, weight_type_str)
|
||||
|
||||
data_reader.rewind()
|
||||
quant_nodes = {'QLinearConv' : 1,
|
||||
'GlobalAveragePool' : 1,
|
||||
'QLinearGlobalAveragePool' : 1,
|
||||
'QuantizeLinear' : 1,
|
||||
'DequantizeLinear' : 1}
|
||||
check_op_type_count(self, model_int8_path, **quant_nodes)
|
||||
check_model_correctness(self, model_fp32_path, model_int8_path, data_reader.get_next())
|
||||
quantize_static(model_fp32_path, model_q8_path, data_reader,
|
||||
activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
|
||||
|
||||
quant_nodes = {'QLinearConv': 1, 'GlobalAveragePool': 1, 'QLinearGlobalAveragePool': 1,
|
||||
'QuantizeLinear': 1, 'DequantizeLinear': 1}
|
||||
check_op_type_count(self, model_q8_path, **quant_nodes)
|
||||
qnode_io_qtypes = {'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
|
||||
qnode_io_qtypes.update({'QLinearGlobalAveragePool': [['i', 2, activation_proto_qtype], ['i', 4, activation_proto_qtype]]})
|
||||
check_qtype_by_node_type(self, model_q8_path, qnode_io_qtypes)
|
||||
data_reader.rewind()
|
||||
check_model_correctness(self, model_fp32_path, model_q8_path, data_reader.get_next())
|
||||
|
||||
def test_quantize_gavgpool(self):
|
||||
self.quantize_gavgpool_test(QuantType.QUInt8, QuantType.QUInt8, extra_options={})
|
||||
|
||||
def test_quantize_gavgpool_s8s8(self):
|
||||
self.quantize_gavgpool_test(QuantType.QInt8, QuantType.QInt8, extra_options={'ActivationSymmetric': True})
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
|||
|
|
@ -10,8 +10,8 @@ import unittest
|
|||
import onnx
|
||||
import numpy as np
|
||||
from onnx import helper, TensorProto
|
||||
from onnxruntime.quantization import quantize_static, quantize_dynamic, QuantFormat
|
||||
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count
|
||||
from onnxruntime.quantization import quantize_static, quantize_dynamic, QuantFormat, QuantType
|
||||
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_qtype_by_node_type
|
||||
|
||||
|
||||
class TestOpGEMM(unittest.TestCase):
|
||||
|
|
@ -46,12 +46,12 @@ class TestOpGEMM(unittest.TestCase):
|
|||
bias_data = np.random.normal(0, 0.1, bias_shape).astype(np.float32)
|
||||
initializers.append(onnx.numpy_helper.from_array(bias_data, name=bias_name))
|
||||
|
||||
return onnx.helper.make_node('Gemm', [input_name, weight_name, bias_name], [output_name], alpha=1.0, beta=1.0, transB = 1)
|
||||
return onnx.helper.make_node('Gemm', [input_name, weight_name, bias_name], [output_name], alpha=1.0, beta=1.0, transB=1)
|
||||
# make gemm1 node
|
||||
gemm1_output_name = "gemm1_output"
|
||||
gemm1_node = make_gemm(input_name, [100, 10], 'linear1.weight', [100], 'linear1.bias', gemm1_output_name)
|
||||
|
||||
#make Clip
|
||||
# make Clip
|
||||
clip_min_name = 'clip_min'
|
||||
clip_max_name = 'clip_max'
|
||||
clip_output_name = 'clip_output'
|
||||
|
|
@ -71,7 +71,7 @@ class TestOpGEMM(unittest.TestCase):
|
|||
graph = helper.make_graph([gemm1_node, clip_node, gemm2_node], graph_name,
|
||||
[input_tensor], [output_tensor], initializer=initializers)
|
||||
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||
model.ir_version = 7 # use stable onnx ir version
|
||||
model.ir_version = 7 # use stable onnx ir version
|
||||
|
||||
onnx.save(model, output_model_path)
|
||||
|
||||
|
|
@ -121,55 +121,82 @@ class TestOpGEMM(unittest.TestCase):
|
|||
|
||||
onnx.save(model, output_model_path)
|
||||
|
||||
def static_quant_test(self, model_fp32_path, model_int8_path):
|
||||
data_reader = self.input_feeds(1, {'input': [5, 10]})
|
||||
quantize_static(model_fp32_path,
|
||||
model_int8_path,
|
||||
data_reader)
|
||||
def static_quant_test(self, model_fp32_path, data_reader, activation_type, weight_type, extra_options={}):
|
||||
activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
|
||||
activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8'
|
||||
weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
|
||||
model_int8_path = 'gemm_fp32.quant_{}{}.onnx'.format(activation_type_str, weight_type_str)
|
||||
|
||||
data_reader.rewind()
|
||||
quant_nodes = {'QLinearMatMul' : 2,
|
||||
'QLinearAdd' : 2,
|
||||
'QuantizeLinear' : 1,
|
||||
'DequantizeLinear' : 1}
|
||||
quantize_static(model_fp32_path, model_int8_path, data_reader,
|
||||
activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
|
||||
quant_nodes = {'QLinearMatMul': 2, 'QLinearAdd': 2, 'QuantizeLinear': 1, 'DequantizeLinear': 1}
|
||||
check_op_type_count(self, model_int8_path, **quant_nodes)
|
||||
qnode_io_qtypes = {'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
|
||||
qnode_io_qtypes.update({'DequantizeLinear': [['i', 2, activation_proto_qtype]]})
|
||||
check_qtype_by_node_type(self, model_int8_path, qnode_io_qtypes)
|
||||
data_reader.rewind()
|
||||
check_model_correctness(self, model_fp32_path, model_int8_path, data_reader.get_next())
|
||||
|
||||
def static_quant_test_qdq(self, model_fp32_path, model_int8_path):
|
||||
data_reader = self.input_feeds(1, {'input': [5, 10]})
|
||||
quantize_static(model_fp32_path,
|
||||
model_int8_path,
|
||||
data_reader,
|
||||
quant_format=QuantFormat.QDQ)
|
||||
def static_quant_test_qdq(self, model_fp32_path, data_reader, activation_type, weight_type, extra_options={}):
|
||||
activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
|
||||
activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8'
|
||||
weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
|
||||
model_int8_path = 'gemm_fp32.quant_dqd_{}{}.onnx'.format(activation_type_str, weight_type_str)
|
||||
|
||||
data_reader.rewind()
|
||||
quant_nodes = {'MatMul' : 2,
|
||||
'Add' : 2,
|
||||
'QuantizeLinear' : 5,
|
||||
'DequantizeLinear' : 9}
|
||||
quantize_static(model_fp32_path, model_int8_path, data_reader, quant_format=QuantFormat.QDQ,
|
||||
activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
|
||||
quant_nodes = {'MatMul': 2, 'Add': 2, 'QuantizeLinear': 5, 'DequantizeLinear': 9}
|
||||
check_op_type_count(self, model_int8_path, **quant_nodes)
|
||||
qnode_io_qtypes = {'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
|
||||
check_qtype_by_node_type(self, model_int8_path, qnode_io_qtypes)
|
||||
data_reader.rewind()
|
||||
check_model_correctness(self, model_fp32_path, model_int8_path, data_reader.get_next())
|
||||
|
||||
def dynamic_quant_test(self, model_fp32_path, data_reader, activation_type, weight_type, extra_options={}):
|
||||
activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
|
||||
activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8'
|
||||
weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
|
||||
model_int8_path = 'gemm_fp32.quant_dynamic_{}{}.onnx'.format(activation_type_str, weight_type_str)
|
||||
|
||||
def dynamic_quant_test(self, model_fp32_path, model_int8_path):
|
||||
quantize_dynamic(model_fp32_path, model_int8_path)
|
||||
quant_nodes = {'MatMulInteger' : 2}
|
||||
quantize_dynamic(model_fp32_path, model_int8_path,
|
||||
activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
|
||||
quant_nodes = {'MatMulInteger': 2}
|
||||
check_op_type_count(self, model_int8_path, **quant_nodes)
|
||||
check_model_correctness(self, model_fp32_path, model_int8_path, {'input': np.random.rand(5,10).astype(np.float32)})
|
||||
qnode_io_qtypes = {'MatMulInteger': [['i', 2, activation_proto_qtype]]}
|
||||
check_qtype_by_node_type(self, model_int8_path, qnode_io_qtypes)
|
||||
data_reader.rewind()
|
||||
check_model_correctness(self, model_fp32_path, model_int8_path, {'input': np.random.rand(5, 10).astype(np.float32)})
|
||||
|
||||
def dynamic_attention_quant_test(self, model_fp32_path, model_int8_path, per_channel, reduce_range):
|
||||
quantize_dynamic(model_fp32_path, model_int8_path, per_channel=per_channel, reduce_range=reduce_range)
|
||||
quant_nodes = {'QAttention' : 1, 'MatMulInteger' : 1}
|
||||
quant_nodes = {'QAttention': 1, 'MatMulInteger': 1}
|
||||
check_op_type_count(self, model_int8_path, **quant_nodes)
|
||||
check_model_correctness(self, model_fp32_path, model_int8_path, {'input': np.random.rand(1,5,10).astype(np.float32)})
|
||||
check_model_correctness(self, model_fp32_path, model_int8_path, {'input': np.random.rand(1, 5, 10).astype(np.float32)})
|
||||
|
||||
def test_quantize_reshape(self):
|
||||
def test_quantize_gemm(self):
|
||||
np.random.seed(1)
|
||||
model_fp32_path = 'gemm_fp32.onnx'
|
||||
model_int8_path = 'gemm_fp32.quant.onnx'
|
||||
self.construct_model_gemm(model_fp32_path)
|
||||
data_reader = self.input_feeds(1, {'input': [5, 10]})
|
||||
|
||||
self.static_quant_test(model_fp32_path, model_int8_path)
|
||||
self.static_quant_test_qdq(model_fp32_path, model_int8_path)
|
||||
self.dynamic_quant_test(model_fp32_path, model_int8_path)
|
||||
self.static_quant_test(model_fp32_path, data_reader, activation_type=QuantType.QUInt8, weight_type=QuantType.QUInt8)
|
||||
self.static_quant_test_qdq(model_fp32_path, data_reader, activation_type=QuantType.QUInt8, weight_type=QuantType.QUInt8)
|
||||
self.dynamic_quant_test(model_fp32_path, data_reader, activation_type=QuantType.QUInt8, weight_type=QuantType.QUInt8)
|
||||
|
||||
def test_quantize_gemm_s8s8(self):
|
||||
np.random.seed(1)
|
||||
model_fp32_path = 'gemm_fp32.onnx'
|
||||
self.construct_model_gemm(model_fp32_path)
|
||||
data_reader = self.input_feeds(1, {'input': [5, 10]})
|
||||
|
||||
self.static_quant_test(model_fp32_path, data_reader, activation_type=QuantType.QInt8, weight_type=QuantType.QInt8,
|
||||
extra_options={'ActivationSymmetric': True})
|
||||
self.static_quant_test_qdq(model_fp32_path, data_reader, activation_type=QuantType.QInt8, weight_type=QuantType.QInt8,
|
||||
extra_options={'ActivationSymmetric': True})
|
||||
self.dynamic_quant_test(model_fp32_path, data_reader, activation_type=QuantType.QInt8, weight_type=QuantType.QInt8,
|
||||
extra_options={'ActivationSymmetric': True})
|
||||
|
||||
def test_quantize_attention(self):
|
||||
np.random.seed(1)
|
||||
|
|
@ -182,5 +209,6 @@ class TestOpGEMM(unittest.TestCase):
|
|||
self.dynamic_attention_quant_test(model_fp32_path, model_int8_path, False, True)
|
||||
self.dynamic_attention_quant_test(model_fp32_path, model_int8_path, False, False)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
|||
|
|
@ -10,8 +10,8 @@ import unittest
|
|||
import onnx
|
||||
import numpy as np
|
||||
from onnx import helper, TensorProto
|
||||
from onnxruntime.quantization import quantize_static, QuantFormat
|
||||
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_nodes
|
||||
from onnxruntime.quantization import quantize_static, QuantFormat, QuantType
|
||||
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_nodes, check_qtype_by_node_type
|
||||
|
||||
|
||||
class TestOpMaxPool(unittest.TestCase):
|
||||
|
|
@ -54,40 +54,55 @@ class TestOpMaxPool(unittest.TestCase):
|
|||
graph = helper.make_graph([conv_node, identity_node, maxpool_node], 'TestOpQuantizerMaxPool_test_model',
|
||||
[input_tensor], [identity_out, output_tensor], initializer=initializers)
|
||||
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 14)])
|
||||
model.ir_version = 7 # use stable onnx ir version
|
||||
model.ir_version = 7 # use stable onnx ir version
|
||||
onnx.save(model, output_model_path)
|
||||
|
||||
def test_quantize_maxpool(self):
|
||||
def quantize_maxpool_test(self, activation_type, weight_type, extra_options={}):
|
||||
np.random.seed(1)
|
||||
|
||||
model_fp32_path = 'maxpool_fp32.onnx'
|
||||
model_uint8_path = 'maxpool_uint8.onnx'
|
||||
model_uint8_qdq_path = 'maxpool_uint8_qdq.onnx'
|
||||
|
||||
self.construct_model_conv_maxpool(model_fp32_path,
|
||||
[1, 2, 26, 42], [3, 2, 3, 3],
|
||||
[1, 3, 24, 40], {'kernel_shape': [3, 3]},
|
||||
[1, 3, 22, 38])
|
||||
data_reader = self.input_feeds(1, {'input': [1, 2, 26, 42]})
|
||||
|
||||
activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
|
||||
activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8'
|
||||
weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
|
||||
model_q8_path = 'maxpool_{}{}.onnx'.format(activation_type_str, weight_type_str)
|
||||
model_q8_qdq_path = 'maxpool_dqd_{}{}.onnx'.format(activation_type_str, weight_type_str)
|
||||
|
||||
# Verify QOperator mode
|
||||
data_reader = self.input_feeds(1, {'input': [1, 2, 26, 42]})
|
||||
quantize_static(model_fp32_path, model_uint8_path, data_reader)
|
||||
|
||||
# make sure maxpool become xint8 operator, its input name could tell that
|
||||
check_op_nodes(self, model_uint8_path, lambda node: (node.name != "maxpool_node" or node.input[0] != 'conv_output'))
|
||||
qnode_counts = {'QLinearConv': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 2, 'MaxPool': 1}
|
||||
check_op_type_count(self, model_uint8_path, **qnode_counts)
|
||||
data_reader.rewind()
|
||||
check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next())
|
||||
quantize_static(model_fp32_path, model_q8_path, data_reader,
|
||||
activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
|
||||
# make sure maxpool become xint8 operator, its input name could tell that
|
||||
check_op_nodes(self, model_q8_path, lambda node: (node.name != "maxpool_node" or node.input[0] != 'conv_output'))
|
||||
qnode_counts = {'QLinearConv': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 2, 'MaxPool': 1}
|
||||
check_op_type_count(self, model_q8_path, **qnode_counts)
|
||||
qnode_io_qtypes = {'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
|
||||
qnode_io_qtypes.update({'DequantizeLinear': [['i', 2, activation_proto_qtype]]})
|
||||
check_qtype_by_node_type(self, model_q8_path, qnode_io_qtypes)
|
||||
data_reader.rewind()
|
||||
check_model_correctness(self, model_fp32_path, model_q8_path, data_reader.get_next())
|
||||
|
||||
# Verify QDQ mode
|
||||
data_reader.rewind()
|
||||
quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ)
|
||||
quantize_static(model_fp32_path, model_q8_qdq_path, data_reader, quant_format=QuantFormat.QDQ,
|
||||
activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
|
||||
qdqnode_counts = {'Conv': 1, 'QuantizeLinear': 3, 'DequantizeLinear': 4, 'MaxPool': 1}
|
||||
check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
|
||||
check_op_type_count(self, model_q8_qdq_path, **qdqnode_counts)
|
||||
qnode_io_qtypes = {'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
|
||||
qnode_io_qtypes.update({'DequantizeLinear': [['i', 2, activation_proto_qtype]]})
|
||||
check_qtype_by_node_type(self, model_q8_qdq_path, qnode_io_qtypes)
|
||||
data_reader.rewind()
|
||||
check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())
|
||||
check_model_correctness(self, model_fp32_path, model_q8_qdq_path, data_reader.get_next())
|
||||
|
||||
def test_quantize_maxpool(self):
|
||||
self.quantize_maxpool_test(QuantType.QUInt8, QuantType.QUInt8, extra_options={ })
|
||||
|
||||
def test_quantize_maxpool_s8s8(self):
|
||||
self.quantize_maxpool_test(QuantType.QInt8, QuantType.QInt8, extra_options={'ActivationSymmetric': True})
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
|||
|
|
@ -10,8 +10,8 @@ import unittest
|
|||
import onnx
|
||||
import numpy as np
|
||||
from onnx import helper, TensorProto
|
||||
from onnxruntime.quantization import quantize_static, quantize_dynamic
|
||||
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count
|
||||
from onnxruntime.quantization import quantize_static, quantize_dynamic, QuantType
|
||||
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_qtype_by_node_type
|
||||
|
||||
|
||||
class TestOpQuatizerPad(unittest.TestCase):
|
||||
|
|
@ -51,7 +51,7 @@ class TestOpQuatizerPad(unittest.TestCase):
|
|||
graph = helper.make_graph([pad_node], 'TestOpQuantizerPad_test_model',
|
||||
[input_tensor], [output_tensor], initializer=initializers)
|
||||
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||
model.ir_version = 7 # use stable onnx ir version
|
||||
model.ir_version = 7 # use stable onnx ir version
|
||||
|
||||
onnx.save(model, output_model_path)
|
||||
|
||||
|
|
@ -91,14 +91,17 @@ class TestOpQuatizerPad(unittest.TestCase):
|
|||
graph = helper.make_graph([conv_node, identity_node, pad_node], 'TestOpQuantizerPad_test_model',
|
||||
[input_tensor], [identity_out, output_tensor], initializer=initializers)
|
||||
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||
model.ir_version = 7 # use stable onnx ir version
|
||||
model.ir_version = 7 # use stable onnx ir version
|
||||
onnx.save(model, output_model_path)
|
||||
|
||||
def quantize_model(self, model_fp32_path, model_i8_path, data_reader=None):
|
||||
def quantize_model(self, model_fp32_path, model_i8_path, data_reader=None,
|
||||
activation_type=QuantType.QUInt8, weight_type=QuantType.QUInt8, extra_options={}):
|
||||
if data_reader is not None:
|
||||
quantize_static(model_fp32_path, model_i8_path, data_reader, reduce_range=True)
|
||||
quantize_static(model_fp32_path, model_i8_path, data_reader, reduce_range=True,
|
||||
activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
|
||||
else:
|
||||
quantize_dynamic(model_fp32_path, model_i8_path, reduce_range=True)
|
||||
quantize_dynamic(model_fp32_path, model_i8_path, reduce_range=True,
|
||||
activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
|
||||
|
||||
def verify_should_not_trigger(self, quantize_mode='static'):
|
||||
np.random.seed(108)
|
||||
|
|
@ -118,23 +121,39 @@ class TestOpQuatizerPad(unittest.TestCase):
|
|||
def test_dynamic_quantize_no_trigger(self):
|
||||
self.verify_should_not_trigger(quantize_mode='dynamic')
|
||||
|
||||
def verify_quantize_with_pad_mode(self, pad_mode, constant_value=None, quantize_mode='static'):
|
||||
def verify_quantize_with_pad_mode(self, pad_mode, constant_value=None, quantize_mode='static', rtol=0.01, atol=0.05,
|
||||
activation_type=QuantType.QUInt8, weight_type=QuantType.QUInt8, extra_options={}):
|
||||
np.random.seed(108)
|
||||
tag_pad_mode = pad_mode if pad_mode is not None else 'none'
|
||||
tag_constant_value = '' if constant_value is None else '_value'
|
||||
model_fp32_path = 'qop_pad_{}_fp32_{}{}.onnx'.format(quantize_mode, tag_pad_mode, tag_constant_value)
|
||||
model_i8_path = 'qop_pad_{}_i8_{}{}.onnx'.format(quantize_mode, tag_pad_mode, tag_constant_value)
|
||||
data_reader = self.input_feeds(1, {'input': [1, 8, 33, 33]})
|
||||
self.construct_model_conv_pad(model_fp32_path, [1, 8, 33, 33], [16, 8, 3, 3], [1, 16, 31, 31],
|
||||
pad_mode, [0, 0, 1, 2, 0, 0, 3, 4], constant_value=constant_value)
|
||||
self.quantize_model(model_fp32_path, model_i8_path, None if quantize_mode != 'static' else data_reader)
|
||||
|
||||
activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
|
||||
activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8'
|
||||
weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
|
||||
model_i8_path = 'qop_pad_{}_i8_{}{}_{}{}.onnx'.format(
|
||||
quantize_mode, tag_pad_mode, tag_constant_value, activation_type_str, weight_type_str)
|
||||
data_reader.rewind()
|
||||
self.quantize_model(model_fp32_path, model_i8_path, None if quantize_mode != 'static' else data_reader,
|
||||
activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
|
||||
# DequantizeLinear=2 means there are one DequantizeLinear Node aftr both conv and pad,
|
||||
# which means pad node is running in quantized semantic.
|
||||
# In dynamic quantize mode, pad operator in fact not quantized as input is fp32.
|
||||
kwargs = {'DynamicQuantizeLinear': 1} if quantize_mode != 'static' else {'DequantizeLinear': 2, 'QuantizeLinear': 1}
|
||||
if quantize_mode != 'static':
|
||||
kwargs = {'DynamicQuantizeLinear': 1} if activation_type == QuantType.QUInt8 else {'QuantizeLinear': 1}
|
||||
else:
|
||||
kwargs = {'DequantizeLinear': 2, 'QuantizeLinear': 1}
|
||||
check_op_type_count(self, model_i8_path, **kwargs)
|
||||
check_model_correctness(self, model_fp32_path, model_i8_path, data_reader.get_next())
|
||||
# check node input/output type if such node exists in the graph
|
||||
qnode_io_qtypes = {'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
|
||||
qnode_io_qtypes.update({'DequantizeLinear': [['i', 2, activation_proto_qtype]]})
|
||||
qnode_io_qtypes.update({'ConvInteger': [['i', 2, activation_proto_qtype]]})
|
||||
check_qtype_by_node_type(self, model_i8_path, qnode_io_qtypes)
|
||||
data_reader.rewind()
|
||||
check_model_correctness(self, model_fp32_path, model_i8_path, data_reader.get_next(), rtol=rtol, atol=atol)
|
||||
|
||||
def test_static_mode_edge(self):
|
||||
self.verify_quantize_with_pad_mode('edge', constant_value=None)
|
||||
|
|
@ -148,6 +167,22 @@ class TestOpQuatizerPad(unittest.TestCase):
|
|||
def test_static_mode_constant_value(self):
|
||||
self.verify_quantize_with_pad_mode('constant', constant_value=3.75)
|
||||
|
||||
def test_static_mode_edge_s8s8(self):
|
||||
self.verify_quantize_with_pad_mode('edge', constant_value=None, rtol=0.1, atol=0.1, activation_type=QuantType.QInt8,
|
||||
weight_type=QuantType.QInt8, extra_options={'ActivationSymmetric': True})
|
||||
|
||||
def test_static_mode_reflect_s8s8(self):
|
||||
self.verify_quantize_with_pad_mode('reflect', constant_value=None, rtol=0.1, atol=0.1, activation_type=QuantType.QInt8,
|
||||
weight_type=QuantType.QInt8, extra_options={'ActivationSymmetric': True})
|
||||
|
||||
def test_static_mode_constant_default_s8s8(self):
|
||||
self.verify_quantize_with_pad_mode('constant', constant_value=None, rtol=0.1, atol=0.1, activation_type=QuantType.QInt8,
|
||||
weight_type=QuantType.QInt8, extra_options={'ActivationSymmetric': True})
|
||||
|
||||
def test_static_mode_constant_value_s8s8(self):
|
||||
self.verify_quantize_with_pad_mode('constant', constant_value=3.75, rtol=0.1, atol=0.1, activation_type=QuantType.QInt8,
|
||||
weight_type=QuantType.QInt8, extra_options={'ActivationSymmetric': True})
|
||||
|
||||
def test_dynamic_mode_edge(self):
|
||||
self.verify_quantize_with_pad_mode('edge', constant_value=None, quantize_mode='dynamic')
|
||||
|
||||
|
|
@ -160,6 +195,23 @@ class TestOpQuatizerPad(unittest.TestCase):
|
|||
def test_dynamic_mode_constant_value(self):
|
||||
self.verify_quantize_with_pad_mode('constant', constant_value=3.75, quantize_mode='dynamic')
|
||||
|
||||
# TODO: uncomment following after ConvInteger s8 supported
|
||||
# def test_dynamic_mode_edge_s8s8(self):
|
||||
# self.verify_quantize_with_pad_mode('edge', constant_value=None, quantize_mode='dynamic', activation_type=QuantType.QInt8,
|
||||
# weight_type=QuantType.QInt8, extra_options={'ActivationSymmetric': True})
|
||||
|
||||
# def test_dynamic_mode_reflect_s8s8(self):
|
||||
# self.verify_quantize_with_pad_mode('reflect', constant_value=None, quantize_mode='dynamic', activation_type=QuantType.QInt8,
|
||||
# weight_type=QuantType.QInt8, extra_options={'ActivationSymmetric': True})
|
||||
|
||||
# def test_dynamic_mode_constant_default_s8s8(self):
|
||||
# self.verify_quantize_with_pad_mode('constant', constant_value=None, quantize_mode='dynamic', activation_type=QuantType.QInt8,
|
||||
# weight_type=QuantType.QInt8, extra_options={'ActivationSymmetric': True})
|
||||
|
||||
# def test_dynamic_mode_constant_value_s8s8(self):
|
||||
# self.verify_quantize_with_pad_mode('constant', constant_value=3.75, quantize_mode='dynamic', activation_type=QuantType.QInt8,
|
||||
# weight_type=QuantType.QInt8, extra_options={'ActivationSymmetric': True})
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
|||
|
|
@ -10,8 +10,8 @@ import unittest
|
|||
import onnx
|
||||
import numpy as np
|
||||
from onnx import helper, TensorProto
|
||||
from onnxruntime.quantization import quantize_static, QuantFormat
|
||||
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_nodes
|
||||
from onnxruntime.quantization import quantize_static, QuantFormat, QuantType
|
||||
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_nodes, check_qtype_by_node_type
|
||||
|
||||
|
||||
class TestOpAveragePool(unittest.TestCase):
|
||||
|
|
@ -57,34 +57,50 @@ class TestOpAveragePool(unittest.TestCase):
|
|||
model.ir_version = 7 # use stable onnx ir version
|
||||
onnx.save(model, output_model_path)
|
||||
|
||||
def test_quantize_avgpool(self):
|
||||
def quantize_avgpool_test(self, activation_type, weight_type, extra_options = {}):
|
||||
np.random.seed(1)
|
||||
|
||||
model_fp32_path = 'avgpool_fp32.onnx'
|
||||
model_uint8_path = 'avgpool_uint8.onnx'
|
||||
model_uint8_qdq_path = 'avgpool_uint8_qdq.onnx'
|
||||
|
||||
self.construct_model_conv_avgpool(model_fp32_path,
|
||||
[1, 2, 26, 42], [3, 2, 3, 3],
|
||||
[1, 3, 24, 40], {'kernel_shape': [3, 3]},
|
||||
[1, 3, 22, 38])
|
||||
data_reader = self.input_feeds(1, {'input': [1, 2, 26, 42]})
|
||||
|
||||
activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
|
||||
activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8'
|
||||
weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
|
||||
model_q8_path = 'avgpool_{}{}.onnx'.format(activation_type_str, weight_type_str)
|
||||
model_q8_qdq_path = 'avgpool_qdq_{}{}.onnx'.format(activation_type_str, weight_type_str)
|
||||
|
||||
# Verify QOperator mode
|
||||
data_reader = self.input_feeds(1, {'input': [1, 2, 26, 42]})
|
||||
quantize_static(model_fp32_path, model_uint8_path, data_reader)
|
||||
qnode_counts = {'QLinearConv': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 2, 'QLinearAveragePool': 1}
|
||||
check_op_type_count(self, model_uint8_path, **qnode_counts)
|
||||
data_reader.rewind()
|
||||
check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next())
|
||||
quantize_static(model_fp32_path, model_q8_path, data_reader,
|
||||
activation_type = activation_type, weight_type = weight_type, extra_options = extra_options)
|
||||
qnode_counts = {'QLinearConv': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 2, 'QLinearAveragePool': 1}
|
||||
check_op_type_count(self, model_q8_path, **qnode_counts)
|
||||
qnode_io_qtypes = {'QuantizeLinear' : [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
|
||||
qnode_io_qtypes.update({'QLinearConv' : [['i', 2, activation_proto_qtype], ['i', 7, activation_proto_qtype], ['o', 0, activation_proto_qtype]]})
|
||||
qnode_io_qtypes.update({'QLinearAveragePool' : [['i', 4, activation_proto_qtype]]}) # shape info note workig on custome ops
|
||||
check_qtype_by_node_type(self, model_q8_path, qnode_io_qtypes)
|
||||
data_reader.rewind()
|
||||
check_model_correctness(self, model_fp32_path, model_q8_path, data_reader.get_next())
|
||||
|
||||
# Verify QDQ mode
|
||||
data_reader.rewind()
|
||||
quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ)
|
||||
quantize_static(model_fp32_path, model_q8_qdq_path, data_reader, quant_format=QuantFormat.QDQ,
|
||||
activation_type = activation_type, weight_type = weight_type, extra_options = extra_options)
|
||||
qdqnode_counts = {'Conv': 1, 'QuantizeLinear': 3, 'DequantizeLinear': 4, 'AveragePool': 1}
|
||||
check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
|
||||
check_op_type_count(self, model_q8_qdq_path, **qdqnode_counts)
|
||||
qnode_io_qtypes = {'QuantizeLinear' : [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
|
||||
check_qtype_by_node_type(self, model_q8_qdq_path, qnode_io_qtypes)
|
||||
data_reader.rewind()
|
||||
check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())
|
||||
check_model_correctness(self, model_fp32_path, model_q8_qdq_path, data_reader.get_next())
|
||||
|
||||
def test_quantize_avgpool(self):
|
||||
self.quantize_avgpool_test(QuantType.QUInt8, QuantType.QUInt8)
|
||||
|
||||
def test_quantize_avgpool_s8s8(self):
|
||||
self.quantize_avgpool_test(QuantType.QInt8, QuantType.QInt8, extra_options = {'ActivationSymmetric' : True})
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
|||
|
|
@ -10,8 +10,8 @@ import unittest
|
|||
import onnx
|
||||
import numpy as np
|
||||
from onnx import helper, TensorProto
|
||||
from onnxruntime.quantization import quantize_static, QuantFormat
|
||||
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_nodes
|
||||
from onnxruntime.quantization import quantize_static, QuantFormat, QuantType
|
||||
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_nodes, check_qtype_by_node_type
|
||||
|
||||
|
||||
class TestOpReshape(unittest.TestCase):
|
||||
|
|
@ -67,35 +67,51 @@ class TestOpReshape(unittest.TestCase):
|
|||
|
||||
onnx.save(model, output_model_path)
|
||||
|
||||
def test_quantize_reshape(self):
|
||||
def quantize_reshape_test(self, activation_type, weight_type, extra_options = {}):
|
||||
np.random.seed(1)
|
||||
model_fp32_path = 'reshape_fp32.onnx'
|
||||
model_uint8_path = 'reshape_uint8.onnx'
|
||||
model_uint8_qdq_path = 'reshape_uint8_qdq.onnx'
|
||||
|
||||
self.construct_model_matmul_reshape(model_fp32_path,
|
||||
[3, 7],
|
||||
[7, 3],
|
||||
[1, 9])
|
||||
|
||||
activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
|
||||
activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8'
|
||||
weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
|
||||
model_uint8_path = 'reshape_{}{}.onnx'.format(activation_type_str, weight_type_str)
|
||||
model_uint8_qdq_path = 'reshape_{}{}_qdq.onnx'.format(activation_type_str, weight_type_str)
|
||||
|
||||
# Verify QOperator mode
|
||||
data_reader = self.input_feeds(1, {'input': [3, 7]})
|
||||
quantize_static(model_fp32_path, model_uint8_path, data_reader)
|
||||
quantize_static(model_fp32_path, model_uint8_path, data_reader,
|
||||
activation_type = activation_type, weight_type = weight_type, extra_options = extra_options)
|
||||
# make sure transpose become xint8 operator, its input name could tell that
|
||||
check_op_nodes(self, model_uint8_path, lambda node: (node.name != "reshape_node" or node.input[0] != 'matmul_output'))
|
||||
qnode_counts = {'QLinearMatMul': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 1, 'Reshape': 1}
|
||||
check_op_type_count(self, model_uint8_path, **qnode_counts)
|
||||
qnode_io_qtypes = {'QuantizeLinear' : [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
|
||||
qnode_io_qtypes.update({'DequantizeLinear' : [['i', 2, activation_proto_qtype]]})
|
||||
check_qtype_by_node_type(self, model_uint8_path, qnode_io_qtypes)
|
||||
data_reader.rewind()
|
||||
check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next())
|
||||
|
||||
# Verify QDQ mode
|
||||
data_reader.rewind()
|
||||
quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ)
|
||||
quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ,
|
||||
activation_type = activation_type, weight_type = weight_type, extra_options = extra_options)
|
||||
qdqnode_counts = {'MatMul': 1, 'QuantizeLinear': 3, 'DequantizeLinear': 4, 'Reshape': 1}
|
||||
check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
|
||||
qnode_io_qtypes = {'QuantizeLinear' : [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
|
||||
check_qtype_by_node_type(self, model_uint8_qdq_path, qnode_io_qtypes)
|
||||
data_reader.rewind()
|
||||
check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())
|
||||
|
||||
def test_quantize_reshape(self):
|
||||
self.quantize_reshape_test(QuantType.QUInt8, QuantType.QUInt8)
|
||||
|
||||
def test_quantize_reshape_s8s8(self):
|
||||
self.quantize_reshape_test(QuantType.QInt8, QuantType.QInt8, extra_options = {'ActivationSymmetric' : True})
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
|||
|
|
@ -10,8 +10,8 @@ import unittest
|
|||
import onnx
|
||||
import numpy as np
|
||||
from onnx import helper, TensorProto
|
||||
from onnxruntime.quantization import quantize_static, QuantFormat
|
||||
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_nodes
|
||||
from onnxruntime.quantization import quantize_static, QuantFormat, QuantType
|
||||
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_nodes, check_qtype_by_node_type
|
||||
|
||||
|
||||
class TestOpResize(unittest.TestCase):
|
||||
|
|
@ -81,12 +81,9 @@ class TestOpResize(unittest.TestCase):
|
|||
model.ir_version = 7 # use stable onnx ir version
|
||||
onnx.save(model, output_model_path)
|
||||
|
||||
def test_quantize_resize(self):
|
||||
def quantize_resize_test(self, activation_type, weight_type, extra_options = {}):
|
||||
np.random.seed(1)
|
||||
|
||||
model_fp32_path = 'resize_fp32.onnx'
|
||||
model_uint8_path = 'resize_uint8.onnx'
|
||||
model_uint8_qdq_path = 'resize_uint8_qdq.onnx'
|
||||
|
||||
kwargs = {'coordinate_transformation_mode': 'asymmetric', 'mode': 'nearest', 'nearest_mode': 'floor'}
|
||||
self.construct_model_conv_resize(model_fp32_path,
|
||||
|
|
@ -95,25 +92,43 @@ class TestOpResize(unittest.TestCase):
|
|||
kwargs,
|
||||
[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 2.0, 2.0], None)
|
||||
|
||||
activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
|
||||
activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8'
|
||||
weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
|
||||
model_uint8_path = 'resize_{}{}.onnx'.format(activation_type_str, weight_type_str)
|
||||
model_uint8_qdq_path = 'resize_{}{}_qdq.onnx'.format(activation_type_str, weight_type_str)
|
||||
|
||||
# Verify QOperator mode
|
||||
data_reader = self.input_feeds(1, {'input': [1, 2, 26, 42]})
|
||||
quantize_static(model_fp32_path, model_uint8_path, data_reader)
|
||||
|
||||
quantize_static(model_fp32_path, model_uint8_path, data_reader,
|
||||
activation_type = activation_type, weight_type = weight_type, extra_options = extra_options)
|
||||
# make sure resize become xint8 operator, its input name could tell that
|
||||
check_op_nodes(self, model_uint8_path, lambda node: (node.name != "resize_node" or node.input[0] != 'conv_output'))
|
||||
qnode_counts = {'QLinearConv': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 2, 'Resize': 1}
|
||||
check_op_type_count(self, model_uint8_path, **qnode_counts)
|
||||
qnode_io_qtypes = {'QuantizeLinear' : [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
|
||||
qnode_io_qtypes.update({'DequantizeLinear' : [['i', 2, activation_proto_qtype]]})
|
||||
check_qtype_by_node_type(self, model_uint8_path, qnode_io_qtypes)
|
||||
data_reader.rewind()
|
||||
check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next())
|
||||
|
||||
# Verify QDQ mode
|
||||
data_reader.rewind()
|
||||
quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ)
|
||||
quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ,
|
||||
activation_type = activation_type, weight_type = weight_type, extra_options = extra_options)
|
||||
qdqnode_counts = {'Conv': 1, 'QuantizeLinear': 3, 'DequantizeLinear': 4, 'Resize': 1}
|
||||
check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
|
||||
qnode_io_qtypes = {'QuantizeLinear' : [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
|
||||
check_qtype_by_node_type(self, model_uint8_qdq_path, qnode_io_qtypes)
|
||||
data_reader.rewind()
|
||||
check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())
|
||||
|
||||
def test_quantize_resize(self):
|
||||
self.quantize_resize_test(QuantType.QUInt8, QuantType.QUInt8)
|
||||
|
||||
# TODO: Uncomment following after resize s8 support is enabled
|
||||
# def test_quantize_resize_s8s8(self):
|
||||
# self.quantize_resize_test(QuantType.QInt8, QuantType.QInt8, extra_options = {'ActivationSymmetric' : True})
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
|||
|
|
@ -10,8 +10,8 @@ import unittest
|
|||
import onnx
|
||||
import numpy as np
|
||||
from onnx import helper, TensorProto
|
||||
from onnxruntime.quantization import quantize_static, QuantFormat
|
||||
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count
|
||||
from onnxruntime.quantization import quantize_static, QuantFormat, QuantType
|
||||
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_qtype_by_node_type
|
||||
|
||||
|
||||
class TestOpSqueezeUnsqueeze(unittest.TestCase):
|
||||
|
|
@ -26,8 +26,8 @@ class TestOpSqueezeUnsqueeze(unittest.TestCase):
|
|||
return dr
|
||||
|
||||
def construct_model_conv_squeezes(self, output_model_path,
|
||||
conv_input_shape, conv_weight_shape, conv_output_shape,
|
||||
opset = 13):
|
||||
conv_input_shape, conv_weight_shape, conv_output_shape,
|
||||
opset=13):
|
||||
# (input)
|
||||
# / | \
|
||||
# Conv1 conv2 conv3
|
||||
|
|
@ -55,7 +55,6 @@ class TestOpSqueezeUnsqueeze(unittest.TestCase):
|
|||
conv3_weight_initializer = onnx.numpy_helper.from_array(conv3_weight_arr, name='conv3_weight')
|
||||
conv3_node = onnx.helper.make_node('Conv', ['input', 'conv3_weight'], ['conv3_output'], name='conv3_node')
|
||||
|
||||
|
||||
if (opset >= 13):
|
||||
squeeze_axes_initializer = onnx.numpy_helper.from_array(np.array([0], dtype=np.int64), name='squeeze_axes')
|
||||
squeeze1_node = helper.make_node('Squeeze', ['conv1_output', 'squeeze_axes'], ['squeeze1_output'], name='suqeeze1_node')
|
||||
|
|
@ -66,9 +65,10 @@ class TestOpSqueezeUnsqueeze(unittest.TestCase):
|
|||
|
||||
add1_node = helper.make_node('Add', ['squeeze1_output', 'squeeze2_output'], ['add1_output'], name='add1_node')
|
||||
if (opset >= 13):
|
||||
unsqueeze_node = helper.make_node('Unsqueeze', ['add1_output', 'squeeze_axes'], ['unsqueeze_output'], name = 'unsqueeze_node')
|
||||
unsqueeze_node = helper.make_node('Unsqueeze', ['add1_output', 'squeeze_axes'], [
|
||||
'unsqueeze_output'], name='unsqueeze_node')
|
||||
else:
|
||||
unsqueeze_node = helper.make_node('Unsqueeze', ['add1_output'], ['unsqueeze_output'], name = 'unsqueeze_node', axes=[0])
|
||||
unsqueeze_node = helper.make_node('Unsqueeze', ['add1_output'], ['unsqueeze_output'], name='unsqueeze_node', axes=[0])
|
||||
|
||||
output_tensor = helper.make_tensor_value_info('output', TensorProto.FLOAT, conv_output_shape)
|
||||
add2_node = helper.make_node('Add', ['unsqueeze_output', 'conv3_output'], ['output'], name='add2_node')
|
||||
|
|
@ -79,33 +79,43 @@ class TestOpSqueezeUnsqueeze(unittest.TestCase):
|
|||
graph = helper.make_graph([conv1_node, conv2_node, conv3_node, squeeze1_node, squeeze2_node, add1_node, unsqueeze_node, add2_node],
|
||||
'TestOpSuqeezes_test_model', [input_tensor], [output_tensor], initializer=initializers)
|
||||
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", opset)])
|
||||
model.ir_version = 7 # use stable onnx ir version
|
||||
model.ir_version = 7 # use stable onnx ir version
|
||||
onnx.save(model, output_model_path)
|
||||
|
||||
def run_quantize_squeezes_of_opset(self, opset = 13):
|
||||
def run_quantize_squeezes_of_opset(self, opset=13, activation_type=QuantType.QUInt8, weight_type=QuantType.QUInt8, extra_options={}):
|
||||
np.random.seed(1)
|
||||
|
||||
model_fp32_path = 'squeezes_opset{}_fp32.onnx'.format(opset)
|
||||
model_uint8_path = 'squeezes_opset{}_uint8.onnx'.format(opset)
|
||||
model_uint8_qdq_path = 'squeezes_opset{}_uint8_qdq.onnx'.format(opset)
|
||||
|
||||
self.construct_model_conv_squeezes(model_fp32_path, [1, 2, 26, 42], [3, 2, 3, 3], [1, 3, 24, 40], opset=opset)
|
||||
|
||||
activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
|
||||
activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8'
|
||||
weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
|
||||
model_uint8_path = 'squeezes_opset{}_{}{}.onnx'.format(opset, activation_type_str, weight_type_str)
|
||||
model_uint8_qdq_path = 'squeezes_opset{}_{}{}_qdq.onnx'.format(opset, activation_type_str, weight_type_str)
|
||||
|
||||
# Verify QOperator mode
|
||||
data_reader = self.input_feeds(1, {'input': [1, 2, 26, 42]})
|
||||
quantize_static(model_fp32_path, model_uint8_path, data_reader)
|
||||
quantize_static(model_fp32_path, model_uint8_path, data_reader,
|
||||
activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
|
||||
|
||||
# make sure squeezes become xint8 operator, its input name could tell that
|
||||
qnode_counts = {'QuantizeLinear': 1, 'DequantizeLinear': 1}
|
||||
check_op_type_count(self, model_uint8_path, **qnode_counts)
|
||||
qnode_io_qtypes = {'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
|
||||
qnode_io_qtypes.update({'DequantizeLinear': [['i', 2, activation_proto_qtype]]})
|
||||
check_qtype_by_node_type(self, model_uint8_path, qnode_io_qtypes)
|
||||
data_reader.rewind()
|
||||
check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next(), rtol=0.01, atol=0.5)
|
||||
|
||||
# Verify QDQ mode
|
||||
data_reader.rewind()
|
||||
quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ)
|
||||
quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ,
|
||||
activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
|
||||
qdqnode_counts = {'Conv': 3, 'QuantizeLinear': 9, 'DequantizeLinear': 12}
|
||||
check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
|
||||
qnode_io_qtypes = {'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
|
||||
check_qtype_by_node_type(self, model_uint8_qdq_path, qnode_io_qtypes)
|
||||
data_reader.rewind()
|
||||
check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next(), rtol=0.01, atol=0.5)
|
||||
|
||||
|
|
@ -113,5 +123,10 @@ class TestOpSqueezeUnsqueeze(unittest.TestCase):
|
|||
self.run_quantize_squeezes_of_opset(11)
|
||||
self.run_quantize_squeezes_of_opset(13)
|
||||
|
||||
def test_quantize_squeeze_unsqueeze_s8s8(self):
|
||||
self.run_quantize_squeezes_of_opset(11, QuantType.QInt8, QuantType.QInt8, extra_options={'ActivationSymmetric': True})
|
||||
self.run_quantize_squeezes_of_opset(13, QuantType.QInt8, QuantType.QInt8, extra_options={'ActivationSymmetric': True})
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
|||
|
|
@ -10,8 +10,8 @@ import unittest
|
|||
import onnx
|
||||
import numpy as np
|
||||
from onnx import helper, TensorProto
|
||||
from onnxruntime.quantization import quantize_static, QuantFormat
|
||||
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_nodes
|
||||
from onnxruntime.quantization import quantize_static, QuantFormat, QuantType
|
||||
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_nodes, check_qtype_by_node_type
|
||||
|
||||
|
||||
class TestOpTranspose(unittest.TestCase):
|
||||
|
|
@ -62,32 +62,47 @@ class TestOpTranspose(unittest.TestCase):
|
|||
|
||||
onnx.save(model, output_model_path)
|
||||
|
||||
def test_quantize_transpose(self):
|
||||
def quantize_transpose_test(self, activation_type, weight_type, extra_options = {}):
|
||||
np.random.seed(1)
|
||||
model_fp32_path = 'transpose_fp32.onnx'
|
||||
model_uint8_path = 'transpose_uint8.onnx'
|
||||
model_uint8_qdq_path = 'transpose_uint8_qdq.onnx'
|
||||
|
||||
self.construct_model_matmul_transpose(model_fp32_path, [3, 7], [7, 5], [5, 3])
|
||||
|
||||
activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
|
||||
activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8'
|
||||
weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
|
||||
model_uint8_path = 'transpose_{}{}.onnx'.format(activation_type_str, weight_type_str)
|
||||
model_uint8_qdq_path = 'transpose_{}{}_qdq.onnx'.format(activation_type_str, weight_type_str)
|
||||
|
||||
# Verify QOperator model
|
||||
data_reader = self.input_feeds(1, {'input': [3, 7]})
|
||||
quantize_static(model_fp32_path, model_uint8_path, data_reader)
|
||||
quantize_static(model_fp32_path, model_uint8_path, data_reader,
|
||||
activation_type = activation_type, weight_type = weight_type, extra_options = extra_options)
|
||||
# make sure transpose become xint8 operator, its input name could tell that
|
||||
check_op_nodes(self, model_uint8_path, lambda node: (node.name != "transpose_node" or node.input[0] != 'matmul_output'))
|
||||
qnode_counts = {'QLinearMatMul': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 1, 'Transpose': 1}
|
||||
check_op_type_count(self, model_uint8_path, **qnode_counts)
|
||||
qnode_io_qtypes = {'QuantizeLinear' : [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
|
||||
qnode_io_qtypes.update({'DequantizeLinear' : [['i', 2, activation_proto_qtype]]})
|
||||
check_qtype_by_node_type(self, model_uint8_path, qnode_io_qtypes)
|
||||
data_reader.rewind()
|
||||
check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next())
|
||||
|
||||
# Verify QDQ model
|
||||
data_reader.rewind()
|
||||
quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ)
|
||||
quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ,
|
||||
activation_type = activation_type, weight_type = weight_type, extra_options = extra_options)
|
||||
qdqnode_counts = {'MatMul': 1, 'QuantizeLinear': 3, 'DequantizeLinear': 4, 'Transpose': 1}
|
||||
check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
|
||||
qnode_io_qtypes = {'QuantizeLinear' : [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
|
||||
check_qtype_by_node_type(self, model_uint8_qdq_path, qnode_io_qtypes)
|
||||
data_reader.rewind()
|
||||
check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())
|
||||
|
||||
def test_quantize_transpose(self):
|
||||
self.quantize_transpose_test(QuantType.QUInt8, QuantType.QUInt8)
|
||||
|
||||
def test_quantize_transpose_s8s8(self):
|
||||
self.quantize_transpose_test(QuantType.QInt8, QuantType.QInt8, extra_options = {'ActivationSymmetric' : True})
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
|||
|
|
@ -104,7 +104,7 @@ class TestQDQExtraOptions(unittest.TestCase):
|
|||
# This QuantizeLinear node should be followed by Add1
|
||||
if node.name == 'P_QuantizeLinear':
|
||||
qdq_added_to_node_output_flag = True
|
||||
self.assertTrue(node.input[0] is 'P')
|
||||
self.assertTrue(node.input[0] == 'P')
|
||||
|
||||
self.assertTrue(qdq_added_to_node_output_flag)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue