From 95e6da7957ea901cabc438f53500f16c976bd1f1 Mon Sep 17 00:00:00 2001 From: Ye Wang <52801275+wangyems@users.noreply.github.com> Date: Fri, 6 Nov 2020 11:54:19 -0800 Subject: [PATCH] Revert saving optimized model as external data (#5690) * revert and add support for saving external data * review comments * update --- .../tools/transformers/fusion_attention.py | 15 ++++++++++----- .../tools/transformers/huggingface_models.py | 19 +++++++++++++------ .../tools/transformers/onnx_exporter.py | 13 ++++++++----- .../python/tools/transformers/onnx_model.py | 5 ----- 4 files changed, 31 insertions(+), 21 deletions(-) diff --git a/onnxruntime/python/tools/transformers/fusion_attention.py b/onnxruntime/python/tools/transformers/fusion_attention.py index f1fb7369b1..041d3dd0f6 100644 --- a/onnxruntime/python/tools/transformers/fusion_attention.py +++ b/onnxruntime/python/tools/transformers/fusion_attention.py @@ -131,13 +131,15 @@ class FusionAttention(Fusion): weight = helper.make_tensor(name=attention_node_name + '_qkv_weight', data_type=TensorProto.FLOAT, dims=[self.hidden_size, 3 * self.hidden_size], - vals=qkv_weight.flatten().tolist()) + vals=bytes(qkv_weight.flatten()), + raw=True) self.model.add_initializer(weight) bias = helper.make_tensor(name=attention_node_name + '_qkv_bias', data_type=TensorProto.FLOAT, dims=[3 * self.hidden_size], - vals=qkv_bias.flatten().tolist()) + vals=bytes(qkv_bias.flatten()), + raw=True) self.model.add_initializer(bias) attnetion_inputs = [input, attention_node_name + '_qkv_weight', attention_node_name + '_qkv_bias'] @@ -291,9 +293,12 @@ class FusionAttention(Fusion): if einsum_node is not None: unique_index = einsum_node.input[0] new_edge = "edge_modified_" + unique_index - shape_tensor = self.model.convert_list_to_tensor( - "shape_modified_tensor" + unique_index, TensorProto.INT64, [4], - [0, 0, self.num_heads, int(self.hidden_size / self.num_heads)]) + shape_tensor = helper.make_tensor( + name="shape_modified_tensor" + unique_index, + data_type=TensorProto.INT64, + dims=[4], + vals=np.int64([0, 0, self.num_heads, int(self.hidden_size / self.num_heads)]).tobytes(), + raw=True) self.model.add_initializer(shape_tensor) self.model.add_node( helper.make_node("Reshape", [attention_last_node.output[0], shape_tensor.name], [new_edge], diff --git a/onnxruntime/python/tools/transformers/huggingface_models.py b/onnxruntime/python/tools/transformers/huggingface_models.py index 91ce85e1fc..d16f63384f 100644 --- a/onnxruntime/python/tools/transformers/huggingface_models.py +++ b/onnxruntime/python/tools/transformers/huggingface_models.py @@ -6,10 +6,17 @@ # Maps model class name to a tuple of model class MODEL_CLASSES = [ - 'AutoModel', - 'AutoModelWithLMHead', - 'AutoModelForSequenceClassification', - 'AutoModelForQuestionAnswering' + 'AutoModel', 'AutoModelWithLMHead', 'AutoModelForSequenceClassification', 'AutoModelForQuestionAnswering' +] + +# List of models that require external data saving for onnx export but do not require it when saving optimized onnx model +# Very few models in the huggingface list require it for both: albert-xxlarge-v1, albert-xxlarge-v2 +# TODO: most of the models in the below exempt list having runtime issues when saving these optimized onnx models +# using external data format. Need to address the issue in the future +EXEMPT_MODELS = [ + "gpt2-large", "gpt2-xl", "xlm-mlm-en-2048", "xlm-mlm-17-1280", "xlm-mlm-100-1280", "ctrl", "albert-xlarge-v1", + "albert-xlarge-v2", "t5-large", "t5-3b", "t5-11b", "xlm-roberta-large", "microsoft/DialoGPT-large", + "facebook/mbart-large-en-ro" ] # List of pretrained models: https://huggingface.co/transformers/pretrained_models.html @@ -83,11 +90,11 @@ MODELS = { "albert-base-v1": (["input_ids"], 12, False, "bert"), "albert-large-v1": (["input_ids"], 12, False, "bert"), "albert-xlarge-v1": (["input_ids"], 12, True, "bert"), - "albert-xxlarge-v1": (["input_ids"], 12, True, "bert"), + #"albert-xxlarge-v1": (["input_ids"], 12, True, "bert"), "albert-base-v2": (["input_ids"], 12, False, "bert"), "albert-large-v2": (["input_ids"], 12, False, "bert"), "albert-xlarge-v2": (["input_ids"], 12, True, "bert"), - "albert-xxlarge-v2": (["input_ids"], 12, True, "bert"), + #"albert-xxlarge-v2": (["input_ids"], 12, True, "bert"), # T5 "t5-small": (["input_ids"], 12, False, "bert"), "t5-base": (["input_ids"], 12, False, "bert"), diff --git a/onnxruntime/python/tools/transformers/onnx_exporter.py b/onnxruntime/python/tools/transformers/onnx_exporter.py index 1a36e0f7ea..1bd6bf25a6 100644 --- a/onnxruntime/python/tools/transformers/onnx_exporter.py +++ b/onnxruntime/python/tools/transformers/onnx_exporter.py @@ -13,7 +13,7 @@ from transformers import AutoConfig, AutoTokenizer, AutoModel from benchmark_helper import create_onnxruntime_session, Precision from gpt2_helper import GPT2ModelNoPastState, PRETRAINED_GPT2_MODELS from quantize_helper import QuantizeHelper -from huggingface_models import MODEL_CLASSES +from huggingface_models import MODEL_CLASSES, EXEMPT_MODELS logger = logging.getLogger(__name__) @@ -169,8 +169,8 @@ def optimize_onnx_model_by_ort(onnx_model_path, ort_model_path, use_gpu, overwri logger.info(f"Skip optimization since model existed: {ort_model_path}") -def optimize_onnx_model(onnx_model_path, optimized_model_path, model_type, num_attention_heads, hidden_size, use_gpu, - precision, use_raw_attention_mask, overwrite, model_fusion_statistics, +def optimize_onnx_model(model_name, onnx_model_path, optimized_model_path, model_type, num_attention_heads, hidden_size, + use_gpu, precision, use_raw_attention_mask, overwrite, model_fusion_statistics, use_external_data_format): if overwrite or not os.path.exists(optimized_model_path): Path(optimized_model_path).parent.mkdir(parents=True, exist_ok=True) @@ -202,6 +202,10 @@ def optimize_onnx_model(onnx_model_path, optimized_model_path, model_type, num_a if Precision.FLOAT16 == precision: opt_model.convert_model_float32_to_float16() + + if model_name in EXEMPT_MODELS: + use_external_data_format = False + opt_model.save_model_to_file(optimized_model_path, use_external_data_format) else: logger.info(f"Skip optimization since model existed: {optimized_model_path}") @@ -291,7 +295,7 @@ def validate_and_optimize_onnx(model_name, use_external_data_format, model_type, if optimize_onnx or precision == Precision.FLOAT16 or precision == Precision.INT8: # Use script (optimizer.py) to optimize optimized_model_path = get_onnx_file_path(onnx_dir, model_name, len(input_names), True, use_gpu, precision, False, use_external_data_format) - optimize_onnx_model(onnx_model_path, optimized_model_path, model_type, config.num_attention_heads, + optimize_onnx_model(model_name, onnx_model_path, optimized_model_path, model_type, config.num_attention_heads, config.hidden_size, use_gpu, precision, use_raw_attention_mask, overwrite, model_fusion_statistics, use_external_data_format) @@ -419,4 +423,3 @@ def export_onnx_model_from_tf(model_name, opset_version, use_external_data_forma example_inputs, example_outputs_flatten) return onnx_model_file, is_valid_onnx_model, vocab_size, max_input_size - diff --git a/onnxruntime/python/tools/transformers/onnx_model.py b/onnxruntime/python/tools/transformers/onnx_model.py index 45233dfc05..694a105942 100644 --- a/onnxruntime/python/tools/transformers/onnx_model.py +++ b/onnxruntime/python/tools/transformers/onnx_model.py @@ -368,11 +368,6 @@ class OnnxModel: shape_list.append("?") # shall not happen return shape_list - def convert_list_to_tensor(self, name, type, shape, value): - """ Convert list to tensor - """ - return helper.make_tensor(name, type, shape, value) - def change_input_output_float32_to_float16(self): """ Change graph input and output data type from FLOAT to FLOAT16 """