mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-14 20:48:00 +00:00
Opset12 upgrade for existing models used by perf/e2e pipelines (#4238)
* opset12 support * opset12 support * on comments Co-authored-by: Ethan Tao <ettao@microsoft.com>
This commit is contained in:
parent
4486c66ed4
commit
e0334f177c
7 changed files with 146 additions and 13 deletions
|
|
@ -11,8 +11,8 @@ import urllib.request
|
|||
import zipfile
|
||||
|
||||
# update these if the E2E test data changes
|
||||
ARCHIVE_BLOB_URL = "https://onnxruntimetestdata.blob.core.windows.net/training/onnxruntime_training_data.zip?snapshot=2020-04-14T02:10:05.3158245Z"
|
||||
ARCHIVE_SHA256_DIGEST = "ea4168a801ded478f4e2af08232cb1174913caac300d5bf73b2652dc6894372c"
|
||||
ARCHIVE_BLOB_URL = "https://onnxruntimetestdata.blob.core.windows.net/training/onnxruntime_training_data_v12.zip?snapshot=2020-06-13T06:24:15.0833240Z"
|
||||
ARCHIVE_SHA256_DIGEST = "B01C169B6550D1A0A6F1B4E2F34AE2A8714B52DBB70AC04DA85D371F691BDFF9"
|
||||
|
||||
def _download(url, local_path):
|
||||
urllib.request.urlretrieve(url, local_path)
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
step,total_loss,mlm_loss,nsp_loss
|
||||
0,11.2031,10.4979,0.707195
|
||||
0,11.2422,10.5228,0.717476
|
||||
5,10.1875,7.75453,2.43238
|
||||
10,8.33594,7.63755,0.697193
|
||||
10,8.42188,7.63755,0.792425
|
||||
15,8.35156,7.60502,0.744699
|
||||
20,8.22656,7.48076,0.749099
|
||||
25,8.27344,7.56207,0.71167
|
||||
20,8.22656,7.4854,0.749099
|
||||
25,8.29688,7.56207,0.73899
|
||||
30,8.125,7.40926,0.716592
|
||||
35,7.95703,7.26281,0.694741
|
||||
35,7.99219,7.26281,0.726583
|
||||
40,7.94531,7.26573,0.679934
|
||||
45,7.93359,7.27335,0.661407
|
||||
45,7.94141,7.27335,0.668663
|
||||
|
|
|
|||
|
|
|
@ -35,7 +35,7 @@ def main():
|
|||
os.path.join(args.binary_dir, "onnxruntime_training_bert"),
|
||||
"--model_name", os.path.join(
|
||||
args.model_root,
|
||||
"nv/bert-large/bert-large-uncased_L_24_H_1024_A_16_V_30528_S_512_Dp_0.1_optimized_layer_norm"),
|
||||
"nv/bert-large/bert-large-uncased_L_24_H_1024_A_16_V_30528_S_512_Dp_0.1_optimized_layer_norm_opset12"),
|
||||
"--train_batch_size", str(config.max_batch_size),
|
||||
"--mode", "perf",
|
||||
"--max_seq_length", str(config.sequence_length),
|
||||
|
|
@ -57,7 +57,7 @@ def main():
|
|||
if config.enable_mixed_precision:
|
||||
cmds.append("--use_mixed_precision"),
|
||||
|
||||
subprocess.run(cmds, timeout=60).check_returncode()
|
||||
subprocess.run(cmds, timeout=120).check_returncode()
|
||||
|
||||
return 0
|
||||
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ def main():
|
|||
cmds = [
|
||||
os.path.join(args.binary_dir, "onnxruntime_training_bert"),
|
||||
"--model_name", os.path.join(
|
||||
args.model_root, "nv/bert-large/bert-large-uncased_L_24_H_1024_A_16_V_30528_S_512_Dp_0.1_optimized_layer_norm"),
|
||||
args.model_root, "nv/bert-large/bert-large-uncased_L_24_H_1024_A_16_V_30528_S_512_Dp_0.1_optimized_layer_norm_opset12"),
|
||||
"--train_data_dir", os.path.join(
|
||||
args.training_data_root, str(c.max_seq_length), "books_wiki_en_corpus/train"),
|
||||
"--test_data_dir", os.path.join(
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ def main():
|
|||
subprocess.run([
|
||||
os.path.join(args.binary_dir, "onnxruntime_training_bert"),
|
||||
"--model_name", os.path.join(
|
||||
args.model_root, "nv/bert-base/bert-base-uncased_L_12_H_768_A_12_V_30528_S_512_Dp_0.1_optimized_layer_norm"),
|
||||
args.model_root, "nv/bert-base/bert-base-uncased_L_12_H_768_A_12_V_30528_S_512_Dp_0.1_optimized_layer_norm_opset12"),
|
||||
"--train_data_dir", os.path.join(
|
||||
args.training_data_root, "128/books_wiki_en_corpus/train"),
|
||||
"--test_data_dir", os.path.join(
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ def main():
|
|||
cmds = [
|
||||
os.path.join(args.binary_dir, "onnxruntime_training_gpt2"),
|
||||
"--model_name", os.path.join(
|
||||
args.model_root, "megatron-gpt2_hidden-size-1024_num-layers-24_vocab-size-50257_num-attention-heads-16_max-position-embeddings-1024_optimized"),
|
||||
args.model_root, "megatron-gpt2_hidden-size-1024_num-layers-24_vocab-size-50257_num-attention-heads-16_max-position-embeddings-1024_optimized_opset12"),
|
||||
"--train_data_dir", os.path.join(
|
||||
args.training_data_root, "train"),
|
||||
"--test_data_dir", os.path.join(
|
||||
|
|
|
|||
133
orttraining/tools/scripts/opset12_model_transform.py
Normal file
133
orttraining/tools/scripts/opset12_model_transform.py
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
#
|
||||
# This converter is an internal util to upgrade existing bert/gpt-2 models,
|
||||
# which were previously transformed/optimized from orginal model, to Opset 12
|
||||
# version as well as replacing deprecated node, i.e., TrainableDropout with
|
||||
# the "Dropout" node matching the Opset 12 Spec. Typically, a model to
|
||||
# be run by this scripts would have "_optimized" substring in its model name,
|
||||
# and the graph should have one or more "TrainableDropout" nodes in its graph.
|
||||
# Example usage:
|
||||
# python opset12_model_transform.py bert-base-uncased_L_12_H_768_A_12_V_30528_S_512_Dp_0.1_optimized_layer_norm.onnx
|
||||
# Output:
|
||||
# bert-base-uncased_L_12_H_768_A_12_V_30528_S_512_Dp_0.1_optimized_layer_norm_opset12.onnx
|
||||
|
||||
import sys
|
||||
import onnx
|
||||
from onnx import helper, shape_inference
|
||||
from onnx import TensorProto
|
||||
import numpy as np
|
||||
from onnx import numpy_helper
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print("Please give model path...")
|
||||
exit(1)
|
||||
|
||||
input_model_name = sys.argv[1]
|
||||
output_model_name = input_model_name[:-5] + '_opset12.onnx'
|
||||
|
||||
model = onnx.load(input_model_name)
|
||||
|
||||
def find_input_node(model, arg):
|
||||
result = []
|
||||
for node in model.graph.node:
|
||||
for output in node.output:
|
||||
if output == arg:
|
||||
result.append(node)
|
||||
return result[0] if len(result)== 1 else None
|
||||
|
||||
def find_output_node(model, arg):
|
||||
result = []
|
||||
for node in model.graph.node:
|
||||
for input in node.input:
|
||||
if input == arg:
|
||||
result.append(node)
|
||||
return result[0] if len(result) == 1 else None
|
||||
|
||||
def find_input(model, arg):
|
||||
for initializer in model.graph.initializer:
|
||||
if initializer.name == arg:
|
||||
return initializer
|
||||
return None
|
||||
|
||||
def get_node_index(model, node):
|
||||
i = 0
|
||||
while i < len(model.graph.node):
|
||||
if model.graph.node[i] == node:
|
||||
break;
|
||||
i += 1
|
||||
return i if i < len(model.graph.node) else None;
|
||||
|
||||
def add_const(model, name, output, t_value = None, f_value = None):
|
||||
const_node = model.graph.node.add()
|
||||
const_node.op_type = 'Constant'
|
||||
const_node.name = name
|
||||
const_node.output.extend([output])
|
||||
attr = const_node.attribute.add()
|
||||
attr.name = 'value'
|
||||
if t_value is not None:
|
||||
attr.type = 4
|
||||
attr.t.CopyFrom(t_value)
|
||||
else:
|
||||
attr.type = 1
|
||||
attr.f = f_value
|
||||
return const_node
|
||||
|
||||
def process_trainabledropout(model):
|
||||
delete_nodes = []
|
||||
index = 0
|
||||
for node in model.graph.node:
|
||||
if node.op_type == 'TrainableDropout':
|
||||
new_dropout = model.graph.node.add()
|
||||
new_dropout.op_type = 'Dropout'
|
||||
new_dropout.name = 'Dropout_%d' % index
|
||||
# add seed attribute
|
||||
attr = new_dropout.attribute.add()
|
||||
attr.name = 'seed'
|
||||
attr.type = 2
|
||||
# find old ratio node
|
||||
ratio_node = find_input_node(model, node.input[1])
|
||||
assert ratio_node.op_type == 'Constant'
|
||||
delete_nodes.append(get_node_index(model, ratio_node))
|
||||
# make ratio scalar node
|
||||
ratio_attr = ratio_node.attribute
|
||||
ratio_data = numpy_helper.to_array(ratio_attr[0].t)
|
||||
ratio_scalar = ratio_data.astype(np.float32).reshape(())
|
||||
ratio_value = numpy_helper.from_array(ratio_scalar, "ratio")
|
||||
new_ratio_node = add_const(model, 'dropout_ratio_node_%d' % index, 'dropout_ratio_%d' % index, t_value=ratio_value)
|
||||
index+=1
|
||||
# add training_mode output
|
||||
mode_scalar = np.asarray([True]).astype(np.bool).reshape(())
|
||||
mode_value = numpy_helper.from_array(mode_scalar, "training_mode")
|
||||
training_mode_node = add_const(model, 'dropout_training_mode_node_%d' % index, 'dropout_training_mode_%d' % index, t_value=mode_value)
|
||||
index+=1
|
||||
|
||||
new_dropout.input.extend([node.input[0], new_ratio_node.output[0], training_mode_node.output[0]])
|
||||
new_dropout.output.extend(node.output)
|
||||
delete_nodes.append(get_node_index(model, node))
|
||||
index += 1
|
||||
|
||||
delete_nodes.sort(reverse=True)
|
||||
for d in delete_nodes:
|
||||
del model.graph.node[d]
|
||||
|
||||
def align_attention_mask_dim(model):
|
||||
for model_input in model.graph.input:
|
||||
if model_input.name == "attention_mask":
|
||||
model_input.type.tensor_type.shape.dim[0].dim_param = "batch"
|
||||
|
||||
|
||||
#replace TrainableDropout with Dropout
|
||||
process_trainabledropout(model)
|
||||
# some gpt-2 models (large ones) still don't have this input corrected
|
||||
align_attention_mask_dim(model)
|
||||
|
||||
#set opset version to 12
|
||||
model.opset_import[0].version = 12
|
||||
|
||||
with open (output_model_name, "wb") as f:
|
||||
f.write(model.SerializeToString())
|
||||
|
||||
#
|
||||
# To verify the converted model in case of bert, refer to the code at the end of model_transform.py
|
||||
#
|
||||
Loading…
Reference in a new issue