mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-27 03:11:28 +00:00
Add unit test for EmbedLayerNormalization quantization op. (#8033)
This commit is contained in:
parent
e6225c62a5
commit
1d7f44a832
1 changed files with 144 additions and 0 deletions
144
onnxruntime/test/python/quantization/test_op_embed_layernorm.py
Normal file
144
onnxruntime/test/python/quantization/test_op_embed_layernorm.py
Normal file
|
|
@ -0,0 +1,144 @@
|
|||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
# -------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License. See License.txt in the project root for
|
||||
# license information.
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
import unittest
|
||||
import onnx
|
||||
import numpy as np
|
||||
from onnx import helper, TensorProto
|
||||
from onnxruntime.quantization import quantize_dynamic
|
||||
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count
|
||||
|
||||
|
||||
class TestOpEmbedLayerNormalization(unittest.TestCase):
|
||||
def input_feeds_int32(self, n, name2shape):
|
||||
input_data_list = []
|
||||
for i in range(n):
|
||||
inputs = {}
|
||||
for name, shape in name2shape.items():
|
||||
inputs.update({name: np.ones(shape).astype(np.int32)})
|
||||
input_data_list.extend([inputs])
|
||||
|
||||
dr = TestDataFeeds(input_data_list)
|
||||
return dr
|
||||
|
||||
def construct_model(self, batch, hidden_size, sequence_length, model_path):
|
||||
# <segment_ids> <input_ids>
|
||||
# \ /
|
||||
# (EmbedLayerNormalization)
|
||||
# / \
|
||||
# <layernorm_output> <mask_index_output>
|
||||
|
||||
# Inputs to EmbedLayerNormalizationNode
|
||||
input_ids_shape = [batch, sequence_length]
|
||||
input_ids_tensor = helper.make_tensor_value_info('input_ids', TensorProto.INT32, input_ids_shape)
|
||||
|
||||
segment_ids_shape = [batch, sequence_length]
|
||||
segment_ids_tensor = helper.make_tensor_value_info('segment_ids', TensorProto.INT32, segment_ids_shape)
|
||||
|
||||
# EmbedLayerNormalization Node Constants and Weights:
|
||||
word_embed_shape = [32, hidden_size]
|
||||
word_embed_weights = np.random.random_sample(word_embed_shape).astype(dtype='float32')
|
||||
word_embed_initializer = onnx.numpy_helper.from_array(word_embed_weights, name='word_embed')
|
||||
|
||||
pos_embed_shape = [16, hidden_size]
|
||||
pos_embed_weights = np.random.random_sample(pos_embed_shape).astype(dtype='float32')
|
||||
pos_embed_initializer = onnx.numpy_helper.from_array(pos_embed_weights, name='pos_embed')
|
||||
|
||||
seg_embed_shape = [2, hidden_size]
|
||||
seg_embed_weights = np.random.random_sample(seg_embed_shape).astype(dtype='float32')
|
||||
seg_embed_initializer = onnx.numpy_helper.from_array(seg_embed_weights, name='seg_embed')
|
||||
|
||||
layer_norm_weight_shape = [hidden_size]
|
||||
layer_norm_weights = np.random.random_sample(layer_norm_weight_shape).astype(dtype='float32')
|
||||
layer_norm_weights_initializer = onnx.numpy_helper.from_array(layer_norm_weights, name='layer_norm_weight')
|
||||
|
||||
layer_norm_bias_shape = [hidden_size]
|
||||
layer_norm_bias_weights = np.random.random_sample(layer_norm_bias_shape).astype(dtype='float32')
|
||||
layer_norm_bias_initializer = onnx.numpy_helper.from_array(layer_norm_bias_weights, name='layer_norm_bias')
|
||||
|
||||
# EmbedLayerNormalization Outputs:
|
||||
layernorm_out_shape = [batch, sequence_length, hidden_size]
|
||||
layernorm_out_tensor = helper.make_tensor_value_info('layernorm_out', TensorProto.FLOAT, layernorm_out_shape)
|
||||
|
||||
mask_index_out_shape = [batch]
|
||||
mask_index_out_tensor = helper.make_tensor_value_info('mask_index_out', TensorProto.INT32, mask_index_out_shape)
|
||||
|
||||
# EmbedLayerNormalization Node:
|
||||
embed_layer_norm_inputs = [
|
||||
'input_ids', 'segment_ids', 'word_embed', 'pos_embed', 'seg_embed', 'layer_norm_weight', 'layer_norm_bias'
|
||||
]
|
||||
embed_layer_norm_outputs = ['layernorm_out', 'mask_index_out']
|
||||
embed_layer_norm_node = helper.make_node('EmbedLayerNormalization',
|
||||
embed_layer_norm_inputs,
|
||||
embed_layer_norm_outputs,
|
||||
domain='com.microsoft')
|
||||
|
||||
# Construct the Graph and Model:
|
||||
nodes = [embed_layer_norm_node]
|
||||
graph_name = 'embed_layernorm_graph'
|
||||
inputs = [input_ids_tensor, segment_ids_tensor]
|
||||
outputs = [layernorm_out_tensor, mask_index_out_tensor]
|
||||
initializers = [
|
||||
word_embed_initializer, pos_embed_initializer, seg_embed_initializer, layer_norm_weights_initializer,
|
||||
layer_norm_bias_initializer
|
||||
]
|
||||
|
||||
graph = helper.make_graph(nodes, graph_name, inputs, outputs, initializer=initializers)
|
||||
model = helper.make_model(graph)
|
||||
|
||||
onnx.save(model, model_path)
|
||||
|
||||
def test_quantize_batch_size_1(self):
|
||||
batch = 1
|
||||
hidden_size = 4
|
||||
sequence_length = 4
|
||||
|
||||
model_f32_path = 'test_embed_layer_norm_unit_test_batch1.onnx'
|
||||
model_uint8_path = 'ttest_embed_layer_norm_unit_test_batch1_uint8.onnx'
|
||||
|
||||
self.construct_model(batch, hidden_size, sequence_length, model_f32_path)
|
||||
|
||||
data_reader = self.input_feeds_int32(1, {
|
||||
'input_ids': [batch, sequence_length],
|
||||
'segment_ids': [batch, sequence_length]
|
||||
})
|
||||
|
||||
quantize_dynamic(model_f32_path, model_uint8_path)
|
||||
|
||||
qnode_counts = {'DequantizeLinear': 3}
|
||||
check_op_type_count(self, model_uint8_path, **qnode_counts)
|
||||
data_reader.rewind()
|
||||
|
||||
check_model_correctness(self, model_f32_path, model_uint8_path, data_reader.get_next())
|
||||
|
||||
def test_quantize_batch_size_2(self):
|
||||
batch = 2
|
||||
hidden_size = 4
|
||||
sequence_length = 4
|
||||
|
||||
model_f32_path = 'test_embed_layer_norm_unit_test_batch2.onnx'
|
||||
model_uint8_path = 'ttest_embed_layer_norm_unit_test_batch2_uint8.onnx'
|
||||
|
||||
self.construct_model(batch, hidden_size, sequence_length, model_f32_path)
|
||||
|
||||
data_reader = self.input_feeds_int32(1, {
|
||||
'input_ids': [batch, sequence_length],
|
||||
'segment_ids': [batch, sequence_length]
|
||||
})
|
||||
|
||||
quantize_dynamic(model_f32_path, model_uint8_path)
|
||||
|
||||
qnode_counts = {'DequantizeLinear': 3}
|
||||
check_op_type_count(self, model_uint8_path, **qnode_counts)
|
||||
data_reader.rewind()
|
||||
|
||||
check_model_correctness(self, model_f32_path, model_uint8_path, data_reader.get_next())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
Loading…
Reference in a new issue