mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-14 20:48:00 +00:00
* opset12 support * opset12 support * on comments Co-authored-by: Ethan Tao <ettao@microsoft.com>
133 lines
4.7 KiB
Python
133 lines
4.7 KiB
Python
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
# Licensed under the MIT License.
|
|
#
|
|
# This converter is an internal util to upgrade existing bert/gpt-2 models,
|
|
# which were previously transformed/optimized from orginal model, to Opset 12
|
|
# version as well as replacing deprecated node, i.e., TrainableDropout with
|
|
# the "Dropout" node matching the Opset 12 Spec. Typically, a model to
|
|
# be run by this scripts would have "_optimized" substring in its model name,
|
|
# and the graph should have one or more "TrainableDropout" nodes in its graph.
|
|
# Example usage:
|
|
# python opset12_model_transform.py bert-base-uncased_L_12_H_768_A_12_V_30528_S_512_Dp_0.1_optimized_layer_norm.onnx
|
|
# Output:
|
|
# bert-base-uncased_L_12_H_768_A_12_V_30528_S_512_Dp_0.1_optimized_layer_norm_opset12.onnx
|
|
|
|
import sys
|
|
import onnx
|
|
from onnx import helper, shape_inference
|
|
from onnx import TensorProto
|
|
import numpy as np
|
|
from onnx import numpy_helper
|
|
|
|
if len(sys.argv) < 2:
|
|
print("Please give model path...")
|
|
exit(1)
|
|
|
|
input_model_name = sys.argv[1]
|
|
output_model_name = input_model_name[:-5] + '_opset12.onnx'
|
|
|
|
model = onnx.load(input_model_name)
|
|
|
|
def find_input_node(model, arg):
|
|
result = []
|
|
for node in model.graph.node:
|
|
for output in node.output:
|
|
if output == arg:
|
|
result.append(node)
|
|
return result[0] if len(result)== 1 else None
|
|
|
|
def find_output_node(model, arg):
|
|
result = []
|
|
for node in model.graph.node:
|
|
for input in node.input:
|
|
if input == arg:
|
|
result.append(node)
|
|
return result[0] if len(result) == 1 else None
|
|
|
|
def find_input(model, arg):
|
|
for initializer in model.graph.initializer:
|
|
if initializer.name == arg:
|
|
return initializer
|
|
return None
|
|
|
|
def get_node_index(model, node):
|
|
i = 0
|
|
while i < len(model.graph.node):
|
|
if model.graph.node[i] == node:
|
|
break;
|
|
i += 1
|
|
return i if i < len(model.graph.node) else None;
|
|
|
|
def add_const(model, name, output, t_value = None, f_value = None):
|
|
const_node = model.graph.node.add()
|
|
const_node.op_type = 'Constant'
|
|
const_node.name = name
|
|
const_node.output.extend([output])
|
|
attr = const_node.attribute.add()
|
|
attr.name = 'value'
|
|
if t_value is not None:
|
|
attr.type = 4
|
|
attr.t.CopyFrom(t_value)
|
|
else:
|
|
attr.type = 1
|
|
attr.f = f_value
|
|
return const_node
|
|
|
|
def process_trainabledropout(model):
|
|
delete_nodes = []
|
|
index = 0
|
|
for node in model.graph.node:
|
|
if node.op_type == 'TrainableDropout':
|
|
new_dropout = model.graph.node.add()
|
|
new_dropout.op_type = 'Dropout'
|
|
new_dropout.name = 'Dropout_%d' % index
|
|
# add seed attribute
|
|
attr = new_dropout.attribute.add()
|
|
attr.name = 'seed'
|
|
attr.type = 2
|
|
# find old ratio node
|
|
ratio_node = find_input_node(model, node.input[1])
|
|
assert ratio_node.op_type == 'Constant'
|
|
delete_nodes.append(get_node_index(model, ratio_node))
|
|
# make ratio scalar node
|
|
ratio_attr = ratio_node.attribute
|
|
ratio_data = numpy_helper.to_array(ratio_attr[0].t)
|
|
ratio_scalar = ratio_data.astype(np.float32).reshape(())
|
|
ratio_value = numpy_helper.from_array(ratio_scalar, "ratio")
|
|
new_ratio_node = add_const(model, 'dropout_ratio_node_%d' % index, 'dropout_ratio_%d' % index, t_value=ratio_value)
|
|
index+=1
|
|
# add training_mode output
|
|
mode_scalar = np.asarray([True]).astype(np.bool).reshape(())
|
|
mode_value = numpy_helper.from_array(mode_scalar, "training_mode")
|
|
training_mode_node = add_const(model, 'dropout_training_mode_node_%d' % index, 'dropout_training_mode_%d' % index, t_value=mode_value)
|
|
index+=1
|
|
|
|
new_dropout.input.extend([node.input[0], new_ratio_node.output[0], training_mode_node.output[0]])
|
|
new_dropout.output.extend(node.output)
|
|
delete_nodes.append(get_node_index(model, node))
|
|
index += 1
|
|
|
|
delete_nodes.sort(reverse=True)
|
|
for d in delete_nodes:
|
|
del model.graph.node[d]
|
|
|
|
def align_attention_mask_dim(model):
|
|
for model_input in model.graph.input:
|
|
if model_input.name == "attention_mask":
|
|
model_input.type.tensor_type.shape.dim[0].dim_param = "batch"
|
|
|
|
|
|
#replace TrainableDropout with Dropout
|
|
process_trainabledropout(model)
|
|
# some gpt-2 models (large ones) still don't have this input corrected
|
|
align_attention_mask_dim(model)
|
|
|
|
#set opset version to 12
|
|
model.opset_import[0].version = 12
|
|
|
|
with open (output_model_name, "wb") as f:
|
|
f.write(model.SerializeToString())
|
|
|
|
#
|
|
# To verify the converted model in case of bert, refer to the code at the end of model_transform.py
|
|
#
|