mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-25 02:50:42 +00:00
Support transpose by merge Reshape etc into direct xint8 operators. (#7265)
* Suppose transpose by merge Reshape etc into direct xint8 operators. * Add resize operator quantization support * Add QDQ tests for resize, reshape, maxpool, transpose.
This commit is contained in:
parent
42051c912a
commit
a4fdb4dbd9
9 changed files with 419 additions and 49 deletions
|
|
@ -1,31 +1,37 @@
|
|||
import onnx
|
||||
from .base_operator import QuantOperatorBase
|
||||
from ..quant_utils import QuantizedValue, QuantizedValueType
|
||||
from onnx import onnx_pb as onnx_proto
|
||||
from .qdq_base_operator import QDQOperatorBase
|
||||
from ..quant_utils import QuantizedValue
|
||||
|
||||
|
||||
class ReshapeQuant(QuantOperatorBase):
|
||||
# For operators that support 8bits operations directly, and output could
|
||||
# reuse input[0]'s type, zeropoint, scale; For example,Transpose, Reshape, etc.
|
||||
class Direct8BitOp(QuantOperatorBase):
|
||||
def __init__(self, onnx_quantizer, onnx_node):
|
||||
super().__init__(onnx_quantizer, onnx_node)
|
||||
|
||||
def quantize(self):
|
||||
node = self.node
|
||||
assert (node.op_type == "Reshape")
|
||||
|
||||
# If input to this node is not quantized then keep this node
|
||||
# Quantize when input[0] is quantized already. Otherwise keep it.
|
||||
if node.input[0] not in self.quantizer.quantized_value_map:
|
||||
self.quantizer.new_nodes += [node]
|
||||
return
|
||||
|
||||
# Reshape is a no-op in terms of quantization
|
||||
# Create an entry for output quantized value
|
||||
quantized_input_value = self.quantizer.quantized_value_map[node.input[0]]
|
||||
quantized_output_value = QuantizedValue(node.output[0], node.output[0] + "_quantized",
|
||||
quantized_input_value.scale_name, quantized_input_value.zp_name,
|
||||
QuantizedValueType.Input)
|
||||
# Create an entry for output quantized value
|
||||
quantized_input_value.value_type)
|
||||
self.quantizer.quantized_value_map[node.output[0]] = quantized_output_value
|
||||
|
||||
node.input[0] = quantized_input_value.q_name
|
||||
node.output[0] = quantized_output_value.q_name
|
||||
self.quantizer.new_nodes += [node]
|
||||
|
||||
|
||||
class QDQDirect8BitOp(QDQOperatorBase):
|
||||
def __init__(self, onnx_quantizer, onnx_node):
|
||||
self.quantizer = onnx_quantizer
|
||||
self.node = onnx_node
|
||||
|
||||
def quantize(self):
|
||||
self.quantizer.quantize_tensor(self.node.input[0])
|
||||
|
|
@ -1,10 +1,7 @@
|
|||
import onnx
|
||||
from .base_operator import QuantOperatorBase
|
||||
from ..quant_utils import QuantizedValue, QuantizedValueType
|
||||
from onnx import onnx_pb as onnx_proto
|
||||
from .direct_q8 import Direct8BitOp, QDQDirect8BitOp
|
||||
|
||||
|
||||
class QMaxPool(QuantOperatorBase):
|
||||
class QMaxPool(Direct8BitOp):
|
||||
def __init__(self, onnx_quantizer, onnx_node):
|
||||
super().__init__(onnx_quantizer, onnx_node)
|
||||
|
||||
|
|
@ -12,24 +9,26 @@ class QMaxPool(QuantOperatorBase):
|
|||
node = self.node
|
||||
assert (node.op_type == "MaxPool")
|
||||
|
||||
# if version is less than 12, go to normal quantize.
|
||||
if self.quantizer.opset_version < 12:
|
||||
super().quantize()
|
||||
super(Direct8BitOp, self).quantize()
|
||||
return
|
||||
|
||||
# When mode is QLinearOps, the output quantization params are calculated based on outputs from
|
||||
# activation nodes, therefore these nodes can be removed from the graph if they follow a quantized op.
|
||||
# If input to this node is not quantized then keep this node
|
||||
if node.input[0] not in self.quantizer.quantized_value_map:
|
||||
self.quantizer.new_nodes += [node]
|
||||
# Direct 8bits op
|
||||
return super().quantize()
|
||||
|
||||
|
||||
class QDQMaxPool(QDQDirect8BitOp):
|
||||
def __init__(self, onnx_quantizer, onnx_node):
|
||||
super().__init__(onnx_quantizer, onnx_node)
|
||||
|
||||
def quantize(self):
|
||||
node = self.node
|
||||
assert (node.op_type == "MaxPool")
|
||||
|
||||
# if version is less than 12, just no change
|
||||
if self.quantizer.opset_version < 12:
|
||||
return
|
||||
|
||||
# Create an entry for output quantized value
|
||||
quantized_input_value = self.quantizer.quantized_value_map[node.input[0]]
|
||||
quantized_output_value = QuantizedValue(node.output[0], node.output[0] + "_quantized",
|
||||
quantized_input_value.scale_name, quantized_input_value.zp_name,
|
||||
QuantizedValueType.Input)
|
||||
self.quantizer.quantized_value_map[node.output[0]] = quantized_output_value
|
||||
|
||||
node.input[0] = quantized_input_value.q_name
|
||||
node.output[0] = quantized_output_value.q_name
|
||||
self.quantizer.new_nodes += [node]
|
||||
# Direct 8bits op
|
||||
return super().quantize()
|
||||
|
|
|
|||
34
onnxruntime/python/tools/quantization/operators/resize.py
Normal file
34
onnxruntime/python/tools/quantization/operators/resize.py
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
from .direct_q8 import Direct8BitOp, QDQDirect8BitOp
|
||||
|
||||
|
||||
class QResize(Direct8BitOp):
|
||||
def __init__(self, onnx_quantizer, onnx_node):
|
||||
super().__init__(onnx_quantizer, onnx_node)
|
||||
|
||||
def quantize(self):
|
||||
node = self.node
|
||||
assert (node.op_type == "Resize")
|
||||
|
||||
# if version is less than 11, go to normal quantize.
|
||||
if self.quantizer.opset_version < 11:
|
||||
super(Direct8BitOp, self).quantize()
|
||||
return
|
||||
|
||||
# Direct 8bits op
|
||||
return super().quantize()
|
||||
|
||||
|
||||
class QDQResize(QDQDirect8BitOp):
|
||||
def __init__(self, onnx_quantizer, onnx_node):
|
||||
super().__init__(onnx_quantizer, onnx_node)
|
||||
|
||||
def quantize(self):
|
||||
node = self.node
|
||||
assert (node.op_type == "Resize")
|
||||
|
||||
# if version is less than 11, just keep this node
|
||||
if self.quantizer.opset_version < 11:
|
||||
return
|
||||
|
||||
# Direct 8bits op
|
||||
return super().quantize()
|
||||
|
|
@ -8,16 +8,18 @@ from .operators.gather import GatherQuant
|
|||
from .operators.conv import QLinearConv, ConvInteger, QDQConv
|
||||
from .operators.activation import QLinearActivation, QDQRemovableActivation
|
||||
from .operators.binary_op import QLinearBinaryOp
|
||||
from .operators.maxpool import QMaxPool
|
||||
from .operators.maxpool import QDQMaxPool, QMaxPool
|
||||
from .operators.gavgpool import QGlobalAveragePool
|
||||
from .operators.lstm import LSTMQuant
|
||||
from .operators.split import QSplit
|
||||
from .operators.pad import QPad
|
||||
from .operators.reshape import ReshapeQuant
|
||||
from .operators.direct_q8 import Direct8BitOp, QDQDirect8BitOp
|
||||
from .operators.resize import QResize, QDQResize
|
||||
|
||||
CommonOpsRegistry = {"Gather": GatherQuant,
|
||||
"EmbedLayerNormalization": EmbedLayerNormalizationQuant,
|
||||
"Reshape": ReshapeQuant}
|
||||
CommonOpsRegistry = {
|
||||
"Gather": GatherQuant,
|
||||
"EmbedLayerNormalization": EmbedLayerNormalizationQuant,
|
||||
}
|
||||
|
||||
IntegerOpsRegistry = {
|
||||
"Conv": ConvInteger,
|
||||
|
|
@ -40,6 +42,9 @@ QLinearOpsRegistry = {
|
|||
"GlobalAveragePool": QGlobalAveragePool,
|
||||
"Split": QSplit,
|
||||
"Pad": QPad,
|
||||
"Reshape": Direct8BitOp,
|
||||
"Transpose" : Direct8BitOp,
|
||||
"Resize": QResize,
|
||||
}
|
||||
QLinearOpsRegistry.update(CommonOpsRegistry)
|
||||
|
||||
|
|
@ -47,6 +52,10 @@ QDQRegistry = {
|
|||
"Conv": QDQConv,
|
||||
"Clip": QDQRemovableActivation,
|
||||
"Relu": QDQRemovableActivation,
|
||||
"Reshape": QDQDirect8BitOp,
|
||||
"Transpose" : QDQDirect8BitOp,
|
||||
"Resize": QDQResize,
|
||||
"MaxPool": QDQMaxPool,
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -57,3 +57,8 @@ def check_model_correctness(testcase, model_path_origin, model_path_to_check, in
|
|||
for idx, ref_output in enumerate(origin_results):
|
||||
output = target_results[idx]
|
||||
np.testing.assert_allclose(ref_output, output, rtol=rtol, atol=atol)
|
||||
|
||||
def check_op_nodes(testcase, model_path, node_checker):
|
||||
model = onnx.load(Path(model_path))
|
||||
for node in model.graph.node:
|
||||
testcase.assertTrue(node_checker(node))
|
||||
|
|
|
|||
93
onnxruntime/test/python/quantization/test_op_maxpool.py
Normal file
93
onnxruntime/test/python/quantization/test_op_maxpool.py
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
# -------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License. See License.txt in the project root for
|
||||
# license information.
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
import unittest
|
||||
import onnx
|
||||
import numpy as np
|
||||
from onnx import helper, TensorProto
|
||||
from onnxruntime.quantization import quantize_static, QuantFormat
|
||||
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_nodes
|
||||
|
||||
|
||||
class TestOpMaxPool(unittest.TestCase):
|
||||
def input_feeds(self, n, name2shape):
|
||||
input_data_list = []
|
||||
for i in range(n):
|
||||
inputs = {}
|
||||
for name, shape in name2shape.items():
|
||||
inputs.update({name: np.random.randint(-1, 2, shape).astype(np.float32)})
|
||||
input_data_list.extend([inputs])
|
||||
dr = TestDataFeeds(input_data_list)
|
||||
return dr
|
||||
|
||||
def construct_model_conv_maxpool(self, output_model_path,
|
||||
conv_input_shape, conv_weight_shape,
|
||||
maxpool_input_shape, maxpool_attributes,
|
||||
output_shape,
|
||||
):
|
||||
# (input)
|
||||
# \
|
||||
# Conv
|
||||
# / \
|
||||
# Identity MaxPool
|
||||
# / \
|
||||
# (identity_out) (output)
|
||||
input_tensor = helper.make_tensor_value_info('input', TensorProto.FLOAT, conv_input_shape)
|
||||
|
||||
conv_weight_arr = np.random.randint(-1, 2, conv_weight_shape).astype(np.float32)
|
||||
conv_weight_initializer = onnx.numpy_helper.from_array(conv_weight_arr, name='conv1_weight')
|
||||
conv_node = onnx.helper.make_node('Conv', ['input', 'conv1_weight'], ['conv_output'], name='conv_node')
|
||||
|
||||
identity_out = helper.make_tensor_value_info('identity_out', TensorProto.FLOAT, maxpool_input_shape)
|
||||
identity_node = helper.make_node('Identity', ['conv_output'], ['identity_out'], name='IdentityNode')
|
||||
|
||||
initializers = [conv_weight_initializer]
|
||||
|
||||
output_tensor = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape)
|
||||
maxpool_node = helper.make_node('MaxPool', ['conv_output'], ['output'], name='maxpool_node', **maxpool_attributes)
|
||||
|
||||
graph = helper.make_graph([conv_node, identity_node, maxpool_node], 'TestOpQuantizerMaxPool_test_model',
|
||||
[input_tensor], [identity_out, output_tensor], initializer=initializers)
|
||||
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 12)])
|
||||
model.ir_version = onnx.IR_VERSION
|
||||
onnx.save(model, output_model_path)
|
||||
|
||||
def test_quantize_maxpool(self):
|
||||
np.random.seed(1)
|
||||
|
||||
model_fp32_path = 'maxpool_fp32.onnx'
|
||||
model_uint8_path = 'maxpool_uint8.onnx'
|
||||
model_uint8_qdq_path = 'maxpool_uint8_qdq.onnx'
|
||||
|
||||
self.construct_model_conv_maxpool(model_fp32_path,
|
||||
[1, 2, 26, 42], [3, 2, 3, 3],
|
||||
[1, 3, 24, 40], {'kernel_shape': [3, 3]},
|
||||
[1, 3, 22, 38])
|
||||
|
||||
# Verify QOperator mode
|
||||
data_reader = self.input_feeds(1, {'input': [1, 2, 26, 42]})
|
||||
quantize_static(model_fp32_path, model_uint8_path, data_reader)
|
||||
|
||||
# make sure maxpool become xint8 operator, its input name could tell that
|
||||
check_op_nodes(self, model_uint8_path, lambda node: (node.name != "maxpool_node" or node.input[0] != 'conv_output'))
|
||||
qnode_counts = {'QLinearConv': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 2, 'MaxPool': 1}
|
||||
check_op_type_count(self, model_uint8_path, **qnode_counts)
|
||||
data_reader.rewind()
|
||||
check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next())
|
||||
|
||||
# Verify QDQ mode
|
||||
data_reader.rewind()
|
||||
quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ)
|
||||
qdqnode_counts = {'Conv': 1, 'QuantizeLinear': 2, 'DequantizeLinear': 3, 'MaxPool': 1}
|
||||
check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
|
||||
data_reader.rewind()
|
||||
check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
@ -10,8 +10,9 @@ import unittest
|
|||
import onnx
|
||||
import numpy as np
|
||||
from onnx import helper, TensorProto
|
||||
from onnxruntime.quantization import quantize_static
|
||||
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count
|
||||
from onnxruntime.quantization import quantize_static, QuantFormat
|
||||
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_nodes
|
||||
|
||||
|
||||
class TestOpReshape(unittest.TestCase):
|
||||
def input_feeds(self, n, name2shape):
|
||||
|
|
@ -55,7 +56,6 @@ class TestOpReshape(unittest.TestCase):
|
|||
initializers.append(onnx.numpy_helper.from_array(np.array(output_shape, dtype=np.int64), name=reshape_shape))
|
||||
reshape_node = onnx.helper.make_node('Reshape', reshape_inputs, reshape_output, name=reshape_name)
|
||||
|
||||
|
||||
# make graph
|
||||
input_tensor = helper.make_tensor_value_info(input_name, TensorProto.FLOAT, input_shape)
|
||||
output_tensor = helper.make_tensor_value_info(output_name, TensorProto.FLOAT, output_shape)
|
||||
|
|
@ -71,19 +71,31 @@ class TestOpReshape(unittest.TestCase):
|
|||
np.random.seed(1)
|
||||
model_fp32_path = 'reshape_fp32.onnx'
|
||||
model_uint8_path = 'reshape_uint8.onnx'
|
||||
data_reader = self.input_feeds(1, {'input': [3, 7]})
|
||||
model_uint8_qdq_path = 'reshape_uint8_qdq.onnx'
|
||||
|
||||
self.construct_model_matmul_reshape(model_fp32_path,
|
||||
[3, 7],
|
||||
[7, 3],
|
||||
[1, 9])
|
||||
quantize_static(model_fp32_path,
|
||||
model_uint8_path,
|
||||
data_reader
|
||||
)
|
||||
[3, 7],
|
||||
[7, 3],
|
||||
[1, 9])
|
||||
|
||||
# Verify QOperator mode
|
||||
data_reader = self.input_feeds(1, {'input': [3, 7]})
|
||||
quantize_static(model_fp32_path, model_uint8_path, data_reader)
|
||||
# make sure transpose become xint8 operator, its input name could tell that
|
||||
check_op_nodes(self, model_uint8_path, lambda node: (node.name != "reshape_node" or node.input[0] != 'matmul_output'))
|
||||
qnode_counts = {'QLinearMatMul': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 1, 'Reshape': 1}
|
||||
check_op_type_count(self, model_uint8_path, **qnode_counts)
|
||||
data_reader.rewind()
|
||||
qdq_nodes = {'QLinearMatMul': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 1, 'Reshape': 1}
|
||||
check_op_type_count(self, model_uint8_path, **qdq_nodes)
|
||||
check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next())
|
||||
|
||||
# Verify QDQ mode
|
||||
data_reader.rewind()
|
||||
quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ)
|
||||
qdqnode_counts = {'MatMul': 1, 'QuantizeLinear': 2, 'DequantizeLinear': 3, 'Reshape': 1}
|
||||
check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
|
||||
data_reader.rewind()
|
||||
check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
|||
119
onnxruntime/test/python/quantization/test_op_resize.py
Normal file
119
onnxruntime/test/python/quantization/test_op_resize.py
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
# -------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License. See License.txt in the project root for
|
||||
# license information.
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
import unittest
|
||||
import onnx
|
||||
import numpy as np
|
||||
from onnx import helper, TensorProto
|
||||
from onnxruntime.quantization import quantize_static, QuantFormat
|
||||
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_nodes
|
||||
|
||||
|
||||
class TestOpResize(unittest.TestCase):
|
||||
def input_feeds(self, n, name2shape):
|
||||
input_data_list = []
|
||||
for i in range(n):
|
||||
inputs = {}
|
||||
for name, shape in name2shape.items():
|
||||
inputs.update({name: np.random.randint(-1, 2, shape).astype(np.float32)})
|
||||
input_data_list.extend([inputs])
|
||||
dr = TestDataFeeds(input_data_list)
|
||||
return dr
|
||||
|
||||
def construct_model_conv_resize(self, output_model_path,
|
||||
conv_input_shape, conv_weight_shape,
|
||||
resize_input_shape, resize_output_shape,
|
||||
resize_attrs,
|
||||
resize_roi, resize_scales, resize_sizes):
|
||||
# (input)
|
||||
# \
|
||||
# Conv
|
||||
# / \
|
||||
# Identity Resize
|
||||
# / \
|
||||
# (identity_out) (output)
|
||||
input_tensor = helper.make_tensor_value_info('input', TensorProto.FLOAT, conv_input_shape)
|
||||
|
||||
conv_weight_arr = np.random.randint(-1, 2, conv_weight_shape).astype(np.float32)
|
||||
conv_weight_initializer = onnx.numpy_helper.from_array(conv_weight_arr, name='conv1_weight')
|
||||
conv_node = onnx.helper.make_node('Conv', ['input', 'conv1_weight'], ['conv_output'], name='conv_node')
|
||||
|
||||
identity_out = helper.make_tensor_value_info('identity_out', TensorProto.FLOAT, resize_input_shape)
|
||||
identity_node = helper.make_node('Identity', ['conv_output'], ['identity_out'], name='IdentityNode')
|
||||
|
||||
initializers = [conv_weight_initializer]
|
||||
|
||||
output_tensor = helper.make_tensor_value_info('output', TensorProto.FLOAT, resize_output_shape)
|
||||
resize_inputs = ['conv_output'] # resize_roi_name, resize_scales_name, resize_sizes_name]
|
||||
resize_node = helper.make_node('Resize', resize_inputs, ['output'], name='resize_node', **resize_attrs)
|
||||
|
||||
if (resize_roi is not None):
|
||||
resize_roi_name = 'resize_roi'
|
||||
resize_roi_initializer = helper.make_tensor(resize_roi_name, TensorProto.FLOAT, [len(resize_roi)], resize_roi)
|
||||
initializers.extend([resize_roi_initializer])
|
||||
resize_node.input.extend([resize_roi_name])
|
||||
else:
|
||||
resize_node.input.extend([''])
|
||||
|
||||
if (resize_scales is not None):
|
||||
resize_scales_name = 'resize_scales'
|
||||
resize_scales_initializer = helper.make_tensor(resize_scales_name, TensorProto.FLOAT, [
|
||||
len(resize_scales)], resize_scales)
|
||||
initializers.extend([resize_scales_initializer])
|
||||
resize_node.input.extend([resize_scales_name])
|
||||
else:
|
||||
resize_node.input.extend([''])
|
||||
|
||||
if (resize_sizes is not None):
|
||||
resize_sizes_name = 'resize_sizes'
|
||||
resize_sizes_initializer = helper.make_tensor(resize_sizes_name, TensorProto.INT64, [len(resize_sizes)], resize_sizes)
|
||||
initializers.extend([resize_sizes_initializer])
|
||||
resize_node.input.extend([resize_sizes_name])
|
||||
|
||||
graph = helper.make_graph([conv_node, identity_node, resize_node], 'TestOpQuantizerResize_test_model',
|
||||
[input_tensor], [identity_out, output_tensor], initializer=initializers)
|
||||
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||
model.ir_version = onnx.IR_VERSION
|
||||
onnx.save(model, output_model_path)
|
||||
|
||||
def test_quantize_resize(self):
|
||||
np.random.seed(1)
|
||||
|
||||
model_fp32_path = 'resize_fp32.onnx'
|
||||
model_uint8_path = 'resize_uint8.onnx'
|
||||
model_uint8_qdq_path = 'resize_uint8_qdq.onnx'
|
||||
|
||||
kwargs = {'coordinate_transformation_mode': 'asymmetric', 'mode': 'nearest', 'nearest_mode': 'floor'}
|
||||
self.construct_model_conv_resize(model_fp32_path,
|
||||
[1, 2, 26, 42], [3, 2, 3, 3],
|
||||
[1, 3, 24, 40], [1, 3, 48, 80],
|
||||
kwargs,
|
||||
[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 2.0, 2.0], None)
|
||||
|
||||
# Verify QOperator mode
|
||||
data_reader = self.input_feeds(1, {'input': [1, 2, 26, 42]})
|
||||
quantize_static(model_fp32_path, model_uint8_path, data_reader)
|
||||
|
||||
# make sure resize become xint8 operator, its input name could tell that
|
||||
check_op_nodes(self, model_uint8_path, lambda node: (node.name != "resize_node" or node.input[0] != 'conv_output'))
|
||||
qnode_counts = {'QLinearConv': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 2, 'Resize': 1}
|
||||
check_op_type_count(self, model_uint8_path, **qnode_counts)
|
||||
data_reader.rewind()
|
||||
check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next())
|
||||
|
||||
# Verify QDQ mode
|
||||
data_reader.rewind()
|
||||
quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ)
|
||||
qdqnode_counts = {'Conv': 1, 'QuantizeLinear': 2, 'DequantizeLinear': 3, 'Resize': 1}
|
||||
check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
|
||||
data_reader.rewind()
|
||||
check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
93
onnxruntime/test/python/quantization/test_op_transpose.py
Normal file
93
onnxruntime/test/python/quantization/test_op_transpose.py
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
# -------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License. See License.txt in the project root for
|
||||
# license information.
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
import unittest
|
||||
import onnx
|
||||
import numpy as np
|
||||
from onnx import helper, TensorProto
|
||||
from onnxruntime.quantization import quantize_static, QuantFormat
|
||||
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_nodes
|
||||
|
||||
|
||||
class TestOpTranspose(unittest.TestCase):
|
||||
def input_feeds(self, n, name2shape):
|
||||
input_data_list = []
|
||||
for i in range(n):
|
||||
inputs = {}
|
||||
for name, shape in name2shape.items():
|
||||
inputs.update({name: np.random.randint(-1, 2, shape).astype(np.float32)})
|
||||
input_data_list.extend([inputs])
|
||||
dr = TestDataFeeds(input_data_list)
|
||||
return dr
|
||||
|
||||
def construct_model_matmul_transpose(self, output_model_path, input_shape, weight_shape, output_shape):
|
||||
# (input)
|
||||
# |
|
||||
# MatMul
|
||||
# |
|
||||
# Transpose
|
||||
# |
|
||||
# (output)
|
||||
input_name = 'input'
|
||||
output_name = 'output'
|
||||
initializers = []
|
||||
|
||||
# make MatMul node
|
||||
weight_name = 'matmul_weight'
|
||||
matmul_output_name = 'matmul_output'
|
||||
matmul_inputs = [input_name, weight_name]
|
||||
matmul_outputs = [matmul_output_name]
|
||||
matmul_name = 'matmul_node'
|
||||
matmul_weight_data = np.random.normal(0, 0.1, weight_shape).astype(np.float32)
|
||||
initializers.append(onnx.numpy_helper.from_array(matmul_weight_data, name=weight_name))
|
||||
matmul_node = onnx.helper.make_node('MatMul', matmul_inputs, matmul_outputs, name=matmul_name)
|
||||
|
||||
# make Transpose node
|
||||
kwargs = {'perm': (1, 0)}
|
||||
transpose_node = onnx.helper.make_node('Transpose', [matmul_output_name], [output_name], name="transpose_node", **kwargs)
|
||||
|
||||
# make graph
|
||||
input_tensor = helper.make_tensor_value_info(input_name, TensorProto.FLOAT, input_shape)
|
||||
output_tensor = helper.make_tensor_value_info(output_name, TensorProto.FLOAT, output_shape)
|
||||
graph_name = 'Transpose_Quant_Test'
|
||||
graph = helper.make_graph([matmul_node, transpose_node], graph_name,
|
||||
[input_tensor], [output_tensor], initializer=initializers)
|
||||
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 11)])
|
||||
model.ir_version = onnx.IR_VERSION
|
||||
|
||||
onnx.save(model, output_model_path)
|
||||
|
||||
def test_quantize_transpose(self):
|
||||
np.random.seed(1)
|
||||
model_fp32_path = 'transpose_fp32.onnx'
|
||||
model_uint8_path = 'transpose_uint8.onnx'
|
||||
model_uint8_qdq_path = 'transpose_uint8_qdq.onnx'
|
||||
|
||||
self.construct_model_matmul_transpose(model_fp32_path, [3, 7], [7, 5], [5, 3])
|
||||
|
||||
# Verify QOperator model
|
||||
data_reader = self.input_feeds(1, {'input': [3, 7]})
|
||||
quantize_static(model_fp32_path, model_uint8_path, data_reader)
|
||||
# make sure transpose become xint8 operator, its input name could tell that
|
||||
check_op_nodes(self, model_uint8_path, lambda node: (node.name != "transpose_node" or node.input[0] != 'matmul_output'))
|
||||
qnode_counts = {'QLinearMatMul': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 1, 'Transpose': 1}
|
||||
check_op_type_count(self, model_uint8_path, **qnode_counts)
|
||||
data_reader.rewind()
|
||||
check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next())
|
||||
|
||||
# Verify QDQ model
|
||||
data_reader.rewind()
|
||||
quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ)
|
||||
qdqnode_counts = {'MatMul': 1, 'QuantizeLinear': 2, 'DequantizeLinear': 3, 'Transpose': 1}
|
||||
check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
|
||||
data_reader.rewind()
|
||||
check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
Loading…
Reference in a new issue