Revert saving optimized model as external data (#5690)

* revert and add support for saving external data

* review comments

* update
This commit is contained in:
Ye Wang 2020-11-06 11:54:19 -08:00 committed by GitHub
parent 71f90e08f1
commit 95e6da7957
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 31 additions and 21 deletions

View file

@ -131,13 +131,15 @@ class FusionAttention(Fusion):
weight = helper.make_tensor(name=attention_node_name + '_qkv_weight',
data_type=TensorProto.FLOAT,
dims=[self.hidden_size, 3 * self.hidden_size],
vals=qkv_weight.flatten().tolist())
vals=bytes(qkv_weight.flatten()),
raw=True)
self.model.add_initializer(weight)
bias = helper.make_tensor(name=attention_node_name + '_qkv_bias',
data_type=TensorProto.FLOAT,
dims=[3 * self.hidden_size],
vals=qkv_bias.flatten().tolist())
vals=bytes(qkv_bias.flatten()),
raw=True)
self.model.add_initializer(bias)
attnetion_inputs = [input, attention_node_name + '_qkv_weight', attention_node_name + '_qkv_bias']
@ -291,9 +293,12 @@ class FusionAttention(Fusion):
if einsum_node is not None:
unique_index = einsum_node.input[0]
new_edge = "edge_modified_" + unique_index
shape_tensor = self.model.convert_list_to_tensor(
"shape_modified_tensor" + unique_index, TensorProto.INT64, [4],
[0, 0, self.num_heads, int(self.hidden_size / self.num_heads)])
shape_tensor = helper.make_tensor(
name="shape_modified_tensor" + unique_index,
data_type=TensorProto.INT64,
dims=[4],
vals=np.int64([0, 0, self.num_heads, int(self.hidden_size / self.num_heads)]).tobytes(),
raw=True)
self.model.add_initializer(shape_tensor)
self.model.add_node(
helper.make_node("Reshape", [attention_last_node.output[0], shape_tensor.name], [new_edge],

View file

@ -6,10 +6,17 @@
# Maps model class name to a tuple of model class
MODEL_CLASSES = [
'AutoModel',
'AutoModelWithLMHead',
'AutoModelForSequenceClassification',
'AutoModelForQuestionAnswering'
'AutoModel', 'AutoModelWithLMHead', 'AutoModelForSequenceClassification', 'AutoModelForQuestionAnswering'
]
# List of models that require external data saving for onnx export but do not require it when saving optimized onnx model
# Very few models in the huggingface list require it for both: albert-xxlarge-v1, albert-xxlarge-v2
# TODO: most of the models in the below exempt list having runtime issues when saving these optimized onnx models
# using external data format. Need to address the issue in the future
EXEMPT_MODELS = [
"gpt2-large", "gpt2-xl", "xlm-mlm-en-2048", "xlm-mlm-17-1280", "xlm-mlm-100-1280", "ctrl", "albert-xlarge-v1",
"albert-xlarge-v2", "t5-large", "t5-3b", "t5-11b", "xlm-roberta-large", "microsoft/DialoGPT-large",
"facebook/mbart-large-en-ro"
]
# List of pretrained models: https://huggingface.co/transformers/pretrained_models.html
@ -83,11 +90,11 @@ MODELS = {
"albert-base-v1": (["input_ids"], 12, False, "bert"),
"albert-large-v1": (["input_ids"], 12, False, "bert"),
"albert-xlarge-v1": (["input_ids"], 12, True, "bert"),
"albert-xxlarge-v1": (["input_ids"], 12, True, "bert"),
#"albert-xxlarge-v1": (["input_ids"], 12, True, "bert"),
"albert-base-v2": (["input_ids"], 12, False, "bert"),
"albert-large-v2": (["input_ids"], 12, False, "bert"),
"albert-xlarge-v2": (["input_ids"], 12, True, "bert"),
"albert-xxlarge-v2": (["input_ids"], 12, True, "bert"),
#"albert-xxlarge-v2": (["input_ids"], 12, True, "bert"),
# T5
"t5-small": (["input_ids"], 12, False, "bert"),
"t5-base": (["input_ids"], 12, False, "bert"),

View file

@ -13,7 +13,7 @@ from transformers import AutoConfig, AutoTokenizer, AutoModel
from benchmark_helper import create_onnxruntime_session, Precision
from gpt2_helper import GPT2ModelNoPastState, PRETRAINED_GPT2_MODELS
from quantize_helper import QuantizeHelper
from huggingface_models import MODEL_CLASSES
from huggingface_models import MODEL_CLASSES, EXEMPT_MODELS
logger = logging.getLogger(__name__)
@ -169,8 +169,8 @@ def optimize_onnx_model_by_ort(onnx_model_path, ort_model_path, use_gpu, overwri
logger.info(f"Skip optimization since model existed: {ort_model_path}")
def optimize_onnx_model(onnx_model_path, optimized_model_path, model_type, num_attention_heads, hidden_size, use_gpu,
precision, use_raw_attention_mask, overwrite, model_fusion_statistics,
def optimize_onnx_model(model_name, onnx_model_path, optimized_model_path, model_type, num_attention_heads, hidden_size,
use_gpu, precision, use_raw_attention_mask, overwrite, model_fusion_statistics,
use_external_data_format):
if overwrite or not os.path.exists(optimized_model_path):
Path(optimized_model_path).parent.mkdir(parents=True, exist_ok=True)
@ -202,6 +202,10 @@ def optimize_onnx_model(onnx_model_path, optimized_model_path, model_type, num_a
if Precision.FLOAT16 == precision:
opt_model.convert_model_float32_to_float16()
if model_name in EXEMPT_MODELS:
use_external_data_format = False
opt_model.save_model_to_file(optimized_model_path, use_external_data_format)
else:
logger.info(f"Skip optimization since model existed: {optimized_model_path}")
@ -291,7 +295,7 @@ def validate_and_optimize_onnx(model_name, use_external_data_format, model_type,
if optimize_onnx or precision == Precision.FLOAT16 or precision == Precision.INT8: # Use script (optimizer.py) to optimize
optimized_model_path = get_onnx_file_path(onnx_dir, model_name, len(input_names), True, use_gpu, precision,
False, use_external_data_format)
optimize_onnx_model(onnx_model_path, optimized_model_path, model_type, config.num_attention_heads,
optimize_onnx_model(model_name, onnx_model_path, optimized_model_path, model_type, config.num_attention_heads,
config.hidden_size, use_gpu, precision, use_raw_attention_mask, overwrite,
model_fusion_statistics, use_external_data_format)
@ -419,4 +423,3 @@ def export_onnx_model_from_tf(model_name, opset_version, use_external_data_forma
example_inputs, example_outputs_flatten)
return onnx_model_file, is_valid_onnx_model, vocab_size, max_input_size

View file

@ -368,11 +368,6 @@ class OnnxModel:
shape_list.append("?") # shall not happen
return shape_list
def convert_list_to_tensor(self, name, type, shape, value):
""" Convert list to tensor
"""
return helper.make_tensor(name, type, shape, value)
def change_input_output_float32_to_float16(self):
""" Change graph input and output data type from FLOAT to FLOAT16
"""