mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-08 00:23:03 +00:00
Revert saving optimized model as external data (#5690)
* revert and add support for saving external data * review comments * update
This commit is contained in:
parent
71f90e08f1
commit
95e6da7957
4 changed files with 31 additions and 21 deletions
|
|
@ -131,13 +131,15 @@ class FusionAttention(Fusion):
|
|||
weight = helper.make_tensor(name=attention_node_name + '_qkv_weight',
|
||||
data_type=TensorProto.FLOAT,
|
||||
dims=[self.hidden_size, 3 * self.hidden_size],
|
||||
vals=qkv_weight.flatten().tolist())
|
||||
vals=bytes(qkv_weight.flatten()),
|
||||
raw=True)
|
||||
self.model.add_initializer(weight)
|
||||
|
||||
bias = helper.make_tensor(name=attention_node_name + '_qkv_bias',
|
||||
data_type=TensorProto.FLOAT,
|
||||
dims=[3 * self.hidden_size],
|
||||
vals=qkv_bias.flatten().tolist())
|
||||
vals=bytes(qkv_bias.flatten()),
|
||||
raw=True)
|
||||
self.model.add_initializer(bias)
|
||||
|
||||
attnetion_inputs = [input, attention_node_name + '_qkv_weight', attention_node_name + '_qkv_bias']
|
||||
|
|
@ -291,9 +293,12 @@ class FusionAttention(Fusion):
|
|||
if einsum_node is not None:
|
||||
unique_index = einsum_node.input[0]
|
||||
new_edge = "edge_modified_" + unique_index
|
||||
shape_tensor = self.model.convert_list_to_tensor(
|
||||
"shape_modified_tensor" + unique_index, TensorProto.INT64, [4],
|
||||
[0, 0, self.num_heads, int(self.hidden_size / self.num_heads)])
|
||||
shape_tensor = helper.make_tensor(
|
||||
name="shape_modified_tensor" + unique_index,
|
||||
data_type=TensorProto.INT64,
|
||||
dims=[4],
|
||||
vals=np.int64([0, 0, self.num_heads, int(self.hidden_size / self.num_heads)]).tobytes(),
|
||||
raw=True)
|
||||
self.model.add_initializer(shape_tensor)
|
||||
self.model.add_node(
|
||||
helper.make_node("Reshape", [attention_last_node.output[0], shape_tensor.name], [new_edge],
|
||||
|
|
|
|||
|
|
@ -6,10 +6,17 @@
|
|||
|
||||
# Maps model class name to a tuple of model class
|
||||
MODEL_CLASSES = [
|
||||
'AutoModel',
|
||||
'AutoModelWithLMHead',
|
||||
'AutoModelForSequenceClassification',
|
||||
'AutoModelForQuestionAnswering'
|
||||
'AutoModel', 'AutoModelWithLMHead', 'AutoModelForSequenceClassification', 'AutoModelForQuestionAnswering'
|
||||
]
|
||||
|
||||
# List of models that require external data saving for onnx export but do not require it when saving optimized onnx model
|
||||
# Very few models in the huggingface list require it for both: albert-xxlarge-v1, albert-xxlarge-v2
|
||||
# TODO: most of the models in the below exempt list having runtime issues when saving these optimized onnx models
|
||||
# using external data format. Need to address the issue in the future
|
||||
EXEMPT_MODELS = [
|
||||
"gpt2-large", "gpt2-xl", "xlm-mlm-en-2048", "xlm-mlm-17-1280", "xlm-mlm-100-1280", "ctrl", "albert-xlarge-v1",
|
||||
"albert-xlarge-v2", "t5-large", "t5-3b", "t5-11b", "xlm-roberta-large", "microsoft/DialoGPT-large",
|
||||
"facebook/mbart-large-en-ro"
|
||||
]
|
||||
|
||||
# List of pretrained models: https://huggingface.co/transformers/pretrained_models.html
|
||||
|
|
@ -83,11 +90,11 @@ MODELS = {
|
|||
"albert-base-v1": (["input_ids"], 12, False, "bert"),
|
||||
"albert-large-v1": (["input_ids"], 12, False, "bert"),
|
||||
"albert-xlarge-v1": (["input_ids"], 12, True, "bert"),
|
||||
"albert-xxlarge-v1": (["input_ids"], 12, True, "bert"),
|
||||
#"albert-xxlarge-v1": (["input_ids"], 12, True, "bert"),
|
||||
"albert-base-v2": (["input_ids"], 12, False, "bert"),
|
||||
"albert-large-v2": (["input_ids"], 12, False, "bert"),
|
||||
"albert-xlarge-v2": (["input_ids"], 12, True, "bert"),
|
||||
"albert-xxlarge-v2": (["input_ids"], 12, True, "bert"),
|
||||
#"albert-xxlarge-v2": (["input_ids"], 12, True, "bert"),
|
||||
# T5
|
||||
"t5-small": (["input_ids"], 12, False, "bert"),
|
||||
"t5-base": (["input_ids"], 12, False, "bert"),
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ from transformers import AutoConfig, AutoTokenizer, AutoModel
|
|||
from benchmark_helper import create_onnxruntime_session, Precision
|
||||
from gpt2_helper import GPT2ModelNoPastState, PRETRAINED_GPT2_MODELS
|
||||
from quantize_helper import QuantizeHelper
|
||||
from huggingface_models import MODEL_CLASSES
|
||||
from huggingface_models import MODEL_CLASSES, EXEMPT_MODELS
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -169,8 +169,8 @@ def optimize_onnx_model_by_ort(onnx_model_path, ort_model_path, use_gpu, overwri
|
|||
logger.info(f"Skip optimization since model existed: {ort_model_path}")
|
||||
|
||||
|
||||
def optimize_onnx_model(onnx_model_path, optimized_model_path, model_type, num_attention_heads, hidden_size, use_gpu,
|
||||
precision, use_raw_attention_mask, overwrite, model_fusion_statistics,
|
||||
def optimize_onnx_model(model_name, onnx_model_path, optimized_model_path, model_type, num_attention_heads, hidden_size,
|
||||
use_gpu, precision, use_raw_attention_mask, overwrite, model_fusion_statistics,
|
||||
use_external_data_format):
|
||||
if overwrite or not os.path.exists(optimized_model_path):
|
||||
Path(optimized_model_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
|
|
@ -202,6 +202,10 @@ def optimize_onnx_model(onnx_model_path, optimized_model_path, model_type, num_a
|
|||
|
||||
if Precision.FLOAT16 == precision:
|
||||
opt_model.convert_model_float32_to_float16()
|
||||
|
||||
if model_name in EXEMPT_MODELS:
|
||||
use_external_data_format = False
|
||||
|
||||
opt_model.save_model_to_file(optimized_model_path, use_external_data_format)
|
||||
else:
|
||||
logger.info(f"Skip optimization since model existed: {optimized_model_path}")
|
||||
|
|
@ -291,7 +295,7 @@ def validate_and_optimize_onnx(model_name, use_external_data_format, model_type,
|
|||
if optimize_onnx or precision == Precision.FLOAT16 or precision == Precision.INT8: # Use script (optimizer.py) to optimize
|
||||
optimized_model_path = get_onnx_file_path(onnx_dir, model_name, len(input_names), True, use_gpu, precision,
|
||||
False, use_external_data_format)
|
||||
optimize_onnx_model(onnx_model_path, optimized_model_path, model_type, config.num_attention_heads,
|
||||
optimize_onnx_model(model_name, onnx_model_path, optimized_model_path, model_type, config.num_attention_heads,
|
||||
config.hidden_size, use_gpu, precision, use_raw_attention_mask, overwrite,
|
||||
model_fusion_statistics, use_external_data_format)
|
||||
|
||||
|
|
@ -419,4 +423,3 @@ def export_onnx_model_from_tf(model_name, opset_version, use_external_data_forma
|
|||
example_inputs, example_outputs_flatten)
|
||||
|
||||
return onnx_model_file, is_valid_onnx_model, vocab_size, max_input_size
|
||||
|
||||
|
|
|
|||
|
|
@ -368,11 +368,6 @@ class OnnxModel:
|
|||
shape_list.append("?") # shall not happen
|
||||
return shape_list
|
||||
|
||||
def convert_list_to_tensor(self, name, type, shape, value):
|
||||
""" Convert list to tensor
|
||||
"""
|
||||
return helper.make_tensor(name, type, shape, value)
|
||||
|
||||
def change_input_output_float32_to_float16(self):
|
||||
""" Change graph input and output data type from FLOAT to FLOAT16
|
||||
"""
|
||||
|
|
|
|||
Loading…
Reference in a new issue