onnxruntime/orttraining/tools/scripts/opset12_model_transform.py
ytaous e0334f177c
Opset12 upgrade for existing models used by perf/e2e pipelines (#4238)
* opset12 support

* opset12 support

* on comments

Co-authored-by: Ethan Tao <ettao@microsoft.com>
2020-06-15 14:26:53 -07:00

133 lines
4.7 KiB
Python

# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
#
# This converter is an internal util to upgrade existing bert/gpt-2 models,
# which were previously transformed/optimized from orginal model, to Opset 12
# version as well as replacing deprecated node, i.e., TrainableDropout with
# the "Dropout" node matching the Opset 12 Spec. Typically, a model to
# be run by this scripts would have "_optimized" substring in its model name,
# and the graph should have one or more "TrainableDropout" nodes in its graph.
# Example usage:
# python opset12_model_transform.py bert-base-uncased_L_12_H_768_A_12_V_30528_S_512_Dp_0.1_optimized_layer_norm.onnx
# Output:
# bert-base-uncased_L_12_H_768_A_12_V_30528_S_512_Dp_0.1_optimized_layer_norm_opset12.onnx
import sys
import onnx
from onnx import helper, shape_inference
from onnx import TensorProto
import numpy as np
from onnx import numpy_helper
if len(sys.argv) < 2:
print("Please give model path...")
exit(1)
input_model_name = sys.argv[1]
output_model_name = input_model_name[:-5] + '_opset12.onnx'
model = onnx.load(input_model_name)
def find_input_node(model, arg):
result = []
for node in model.graph.node:
for output in node.output:
if output == arg:
result.append(node)
return result[0] if len(result)== 1 else None
def find_output_node(model, arg):
result = []
for node in model.graph.node:
for input in node.input:
if input == arg:
result.append(node)
return result[0] if len(result) == 1 else None
def find_input(model, arg):
for initializer in model.graph.initializer:
if initializer.name == arg:
return initializer
return None
def get_node_index(model, node):
i = 0
while i < len(model.graph.node):
if model.graph.node[i] == node:
break;
i += 1
return i if i < len(model.graph.node) else None;
def add_const(model, name, output, t_value = None, f_value = None):
const_node = model.graph.node.add()
const_node.op_type = 'Constant'
const_node.name = name
const_node.output.extend([output])
attr = const_node.attribute.add()
attr.name = 'value'
if t_value is not None:
attr.type = 4
attr.t.CopyFrom(t_value)
else:
attr.type = 1
attr.f = f_value
return const_node
def process_trainabledropout(model):
delete_nodes = []
index = 0
for node in model.graph.node:
if node.op_type == 'TrainableDropout':
new_dropout = model.graph.node.add()
new_dropout.op_type = 'Dropout'
new_dropout.name = 'Dropout_%d' % index
# add seed attribute
attr = new_dropout.attribute.add()
attr.name = 'seed'
attr.type = 2
# find old ratio node
ratio_node = find_input_node(model, node.input[1])
assert ratio_node.op_type == 'Constant'
delete_nodes.append(get_node_index(model, ratio_node))
# make ratio scalar node
ratio_attr = ratio_node.attribute
ratio_data = numpy_helper.to_array(ratio_attr[0].t)
ratio_scalar = ratio_data.astype(np.float32).reshape(())
ratio_value = numpy_helper.from_array(ratio_scalar, "ratio")
new_ratio_node = add_const(model, 'dropout_ratio_node_%d' % index, 'dropout_ratio_%d' % index, t_value=ratio_value)
index+=1
# add training_mode output
mode_scalar = np.asarray([True]).astype(np.bool).reshape(())
mode_value = numpy_helper.from_array(mode_scalar, "training_mode")
training_mode_node = add_const(model, 'dropout_training_mode_node_%d' % index, 'dropout_training_mode_%d' % index, t_value=mode_value)
index+=1
new_dropout.input.extend([node.input[0], new_ratio_node.output[0], training_mode_node.output[0]])
new_dropout.output.extend(node.output)
delete_nodes.append(get_node_index(model, node))
index += 1
delete_nodes.sort(reverse=True)
for d in delete_nodes:
del model.graph.node[d]
def align_attention_mask_dim(model):
for model_input in model.graph.input:
if model_input.name == "attention_mask":
model_input.type.tensor_type.shape.dim[0].dim_param = "batch"
#replace TrainableDropout with Dropout
process_trainabledropout(model)
# some gpt-2 models (large ones) still don't have this input corrected
align_attention_mask_dim(model)
#set opset version to 12
model.opset_import[0].version = 12
with open (output_model_name, "wb") as f:
f.write(model.SerializeToString())
#
# To verify the converted model in case of bert, refer to the code at the end of model_transform.py
#