Add more huggingface models in benchmark tools (#4986)

* checkin more huggingface models * review comments * review comments
2026-07-08 17:17:15 +00:00 · 2020-09-02 16:41:58 -07:00 · 2020-09-02 16:41:58 -07:00 · b4e9e98cee
commit b4e9e98cee
parent a935731bd3
3 changed files with 138 additions and 23 deletions
--- a/onnxruntime/python/tools/transformers/benchmark.py
+++ b/onnxruntime/python/tools/transformers/benchmark.py
@ -56,26 +56,7 @@ from onnx_exporter import create_onnxruntime_input, load_pretrained_model, expor

 logger = logging.getLogger('')

-# List of pretrained models: https://huggingface.co/transformers/pretrained_models.html
-# Pretrained model name to a tuple of input names, opset_version, use_external_data_format and optimization model type
-MODELS = {
-    "bert-base-cased": (["input_ids", "attention_mask", "token_type_ids"], 11, False, "bert"),
-    "distilbert-base-uncased": (["input_ids", "attention_mask"], 11, False, "bert"),
-    "roberta-base": (["input_ids", "attention_mask"], 11, False, "bert"),
-
-    # No past state inputs for GPT models.
-    "gpt2": (["input_ids"], 11, False, "gpt2"),  # no past state inputs & outputs
-    "gpt2-large": (["input_ids"], 11, True, "gpt2"),  # Model>2GB. Need use_external_data_format=True to export it.
-    "distilgpt2": (["input_ids"], 11, False, "gpt2"),  # no past state inputs & outputs
-
-    #"openai-gpt": (["input_ids"], 11, False, "gpt2"),  # no past state inputs
-
-    # Models uses Einsum, which need opset version 12 and PyTorch 1.5.0 or above.
-    "albert-base-v2": (["input_ids"], 12, False, "bert"),
-    #"xlnet-base-cased": (["input_ids"], 12, False, "bert"),
-
-    #"xlm-mlm-en-2048": (["input_ids"], 11, True, "bert"),
-}
+from huggingface_models import MODELS

 cpu_count = psutil.cpu_count(logical=True)
 # Set OMP environment variable before importing onnxruntime or torch.
@ -85,7 +66,6 @@ if "OMP_NUM_THREADS" not in os.environ:
 import torch
 from transformers import (AutoConfig, AutoTokenizer, AutoModel, GPT2Model)

-
 def run_onnxruntime(use_gpu, model_names, precision, batch_sizes, sequence_lengths, repeat_times, input_counts,
                    optimize_onnx, validate_onnx, cache_dir, onnx_dir, verbose, overwrite, disable_ort_io_binding,
                    use_raw_attention_mask, thread_num, model_fusion_statistics):
@ -187,7 +167,9 @@ def run_pytorch(use_gpu, model_names, precision, batch_sizes, sequence_lengths,
        config = AutoConfig.from_pretrained(model_name, torchscript=torchscript, cache_dir=cache_dir)
        model = load_pretrained_model(model_name, config=config, cache_dir=cache_dir)
        tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
-        max_input_size = tokenizer.max_model_input_sizes[model_name]
+
+        max_input_size = tokenizer.max_model_input_sizes[model_name] if model_name in tokenizer.max_model_input_sizes else 1024
+
        logger.debug(f"Model {model}")
        logger.debug(f"Number of parameters {model.num_parameters()}")

--- a/onnxruntime/python/tools/transformers/huggingface_models.py
+++ b/onnxruntime/python/tools/transformers/huggingface_models.py
@ -0,0 +1,111 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.  See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+# List of pretrained models: https://huggingface.co/transformers/pretrained_models.html
+# Pretrained model name to a tuple of input names, opset_version, use_external_data_format and optimization model type
+MODELS = {
+# BERT
+    "bert-base-uncased": (["input_ids", "attention_mask", "token_type_ids"], 11, False, "bert"),
+    "bert-large-uncased": (["input_ids", "attention_mask", "token_type_ids"], 11, False, "bert"),
+    "bert-base-cased": (["input_ids", "attention_mask", "token_type_ids"], 11, False, "bert"),
+    "bert-large-cased": (["input_ids", "attention_mask", "token_type_ids"], 11, False, "bert"),
+    "bert-base-multilingual-uncased": (["input_ids", "attention_mask", "token_type_ids"], 11, False, "bert"),
+    "bert-base-multilingual-cased": (["input_ids", "attention_mask", "token_type_ids"], 11, False, "bert"),
+    "bert-base-chinese": (["input_ids", "attention_mask", "token_type_ids"], 11, False, "bert"),
+    "bert-base-german-cased": (["input_ids", "attention_mask", "token_type_ids"], 11, False, "bert"),
+    "bert-large-uncased-whole-word-masking": (["input_ids", "attention_mask", "token_type_ids"], 11, False, "bert"),
+    "bert-large-cased-whole-word-masking": (["input_ids", "attention_mask", "token_type_ids"], 11, False, "bert"),
+    "bert-large-uncased-whole-word-masking-finetuned-squad": (["input_ids", "attention_mask", "token_type_ids"], 11, False, "bert"),
+    "bert-large-cased-whole-word-masking-finetuned-squad": (["input_ids", "attention_mask", "token_type_ids"], 11, False, "bert"),
+    "bert-base-cased-finetuned-mrpc": (["input_ids", "attention_mask", "token_type_ids"], 11, False, "bert"),
+    "bert-base-german-dbmdz-cased": (["input_ids", "attention_mask", "token_type_ids"], 11, False, "bert"),
+    "bert-base-german-dbmdz-uncased": (["input_ids", "attention_mask", "token_type_ids"], 11, False, "bert"),
+    # todo: more models to add
+# GPT
+    "openai-gpt": (["input_ids"], 11, False, "gpt2"),  # no past state inputs
+# GPT-2
+    "gpt2": (["input_ids"], 11, False, "gpt2"),  # no past state inputs & outputs
+    "gpt2-medium": (["input_ids"], 11, False, "gpt2"),
+    "gpt2-large": (["input_ids"], 11, True, "gpt2"),  # Model>2GB. Need use_external_data_format=True to export it. No past state inputs for GPT models.
+    "gpt2-xl": (["input_ids"], 11, True, "gpt2"),
+    "distilgpt2": (["input_ids"], 11, False, "gpt2"),  # no past state inputs & outputs
+# Transformer-XL
+    #"transfo-xl-wt103": (["input_ids"], 11, False, "bert"),
+# XLNet
+    #"xlnet-base-cased": (["input_ids"], 12, False, "bert"), # Models uses Einsum, which need opset version 12 and PyTorch 1.5.0 or above.
+    #"xlnet-large-cased": (["input_ids"], 12, False, "bert"), # Models uses Einsum, which need opset version 12 and PyTorch 1.5.0 or above.
+# XLM
+    "xlm-mlm-en-2048": (["input_ids"], 11, True, "bert"),
+    "xlm-mlm-ende-1024": (["input_ids"], 11, False, "bert"),
+    "xlm-mlm-enfr-1024": (["input_ids"], 11, False, "bert"),
+    "xlm-mlm-enro-1024": (["input_ids"], 11, False, "bert"),
+    "xlm-mlm-xnli15-1024": (["input_ids"], 11, False, "bert"),
+    "xlm-mlm-tlm-xnli15-1024": (["input_ids"], 11, False, "bert"),
+    "xlm-clm-enfr-1024": (["input_ids"], 11, False, "bert"),
+    "xlm-clm-ende-1024": (["input_ids"], 11, False, "bert"),
+    "xlm-mlm-17-1280": (["input_ids"], 11, True, "bert"),
+    "xlm-mlm-100-1280": (["input_ids"], 11, True, "bert"),
+# RoBERTa
+    "roberta-base": (["input_ids", "attention_mask"], 11, False, "bert"),
+    "roberta-large": (["input_ids", "attention_mask"], 11, False, "bert"),
+    "roberta-large-mnli": (["input_ids", "attention_mask"], 11, False, "bert"),
+    "distilroberta-base": (["input_ids", "attention_mask"], 11, False, "bert"),
+    "roberta-base-openai-detector": (["input_ids", "attention_mask"], 11, False, "bert"),
+    "roberta-large-openai-detector": (["input_ids", "attention_mask"], 11, False, "bert"),
+# DistilBERT
+    "distilbert-base-uncased": (["input_ids", "attention_mask"], 11, False, "bert"),
+    "distilbert-base-uncased-distilled-squad": (["input_ids", "attention_mask"], 11, False, "bert"), 
+    "distilbert-base-cased": (["input_ids", "attention_mask"], 11, False, "bert"),
+    "distilbert-base-cased-distilled-squad": (["input_ids", "attention_mask"], 11, False, "bert"),
+    "distilbert-base-german-cased": (["input_ids", "attention_mask"], 11, False, "bert"),
+    "distilbert-base-multilingual-cased": (["input_ids", "attention_mask"], 11, False, "bert"),
+# CTRL
+    "ctrl": (["input_ids"], 11, True, "bert"),
+# CamemBERT
+    "camembert-base": (["input_ids"], 11, False, "bert"),
+# ALBERT
+    # Models uses Einsum, which need opset version 12 and PyTorch 1.5.0 or above.
+    "albert-base-v1": (["input_ids"], 12, False, "bert"),
+    "albert-large-v1": (["input_ids"], 12, False, "bert"),
+    "albert-xlarge-v1": (["input_ids"], 12, True, "bert"),
+    "albert-xxlarge-v1": (["input_ids"], 12, True, "bert"),
+    "albert-base-v2": (["input_ids"], 12, False, "bert"),
+    "albert-large-v2": (["input_ids"], 12, False, "bert"),
+    "albert-xlarge-v2": (["input_ids"], 12, True, "bert"),
+    "albert-xxlarge-v2": (["input_ids"], 12, True, "bert"),
+# T5
+    #"t5-small": (["input_ids"], 11, False, "bert"),
+    #"t5-base": (["input_ids"], 11, False, "bert"),
+    #"t5-large": (["input_ids"], 11, False, "bert"),
+    #"t5-3b": (["input_ids"], 11, False, "bert"),
+    #"t5-11b": (["input_ids"], 11, False, "bert"),
+# XLM-RoBERTa
+    "xlm-roberta-base": (["input_ids"], 11, False, "bert"),
+    "xlm-roberta-large": (["input_ids"], 11, True, "bert"),
+# FlauBERT
+    "flaubert/flaubert_small_cased": (["input_ids"], 11, False, "bert"),
+    "flaubert/flaubert_base_uncased": (["input_ids"], 11, False, "bert"),
+    "flaubert/flaubert_base_cased": (["input_ids"], 11, False, "bert"),
+    "flaubert/flaubert_large_cased": (["input_ids"], 11, False, "bert"),
+# Bart
+    #"facebook/bart-large": (["input_ids"], 11, False, "bert"),
+    #"facebook/bart-base": (["input_ids"], 11, False, "bert"),
+    #"facebook/bart-large-mnli": (["input_ids"], 11, False, "bert"),
+    #"facebook/bart-large-cnn": (["input_ids"], 11, False, "bert"),
+    #"facebook/mbart-large-en-ro": (["input_ids"], 11, True, "bert"),
+# DialoGPT
+    "microsoft/DialoGPT-small": (["input_ids"], 11, False, "gpt2"),
+    "microsoft/DialoGPT-medium": (["input_ids"], 11, False, "gpt2"),
+    "microsoft/DialoGPT-large": (["input_ids"], 11, True, "gpt2"),
+# Reformer
+    #"google/reformer-enwik8": (["input_ids"], 11, False, "bert"),
+    #"google/reformer-crime-and-punishment": (["input_ids"], 11, False, "bert"),
+# MarianMT
+    #"Helsinki-NLP/opus-mt-ROMANCE-en": (["input_ids"], 12, False, "bert"),
+# Longformer
+    #"allenai/longformer-base-4096": (["input_ids"], 12, False, "bert"),
+    #"allenai/longformer-large-4096": (["input_ids"], 12, False, "bert"),
+}
--- a/onnxruntime/python/tools/transformers/onnx_exporter.py
+++ b/onnxruntime/python/tools/transformers/onnx_exporter.py
@ -15,6 +15,24 @@ from quantize_helper import QuantizeHelper

 logger = logging.getLogger(__name__)

+# Walkaround by replacing torch.triu using self-defined op
+# Since torch.triu cannot be exported to ONNX. See https://github.com/pytorch/pytorch/issues/32968
+torch_func = {"triu" : torch.triu}
+
+def triu_onnx(x, diagonal=0, out=None):
+    assert out is None
+    assert len(x.shape) == 2 and x.size(0) == x.size(1)
+
+    torch_triu = torch_func["triu"]
+    template = torch_triu(torch.ones((1024, 1024), dtype=torch.uint8), diagonal)
+    mask = template[:x.size(0),:x.size(1)]
+    return torch.where(mask.bool(), x, torch.zeros_like(x))
+
+def replace_torch_functions():
+    torch.triu = triu_onnx
+
+def restore_torch_functions():
+    torch.triu = torch_func["triu"]

 def create_onnxruntime_input(vocab_size, batch_size, sequence_length, input_names):
    input_ids = numpy.random.randint(low=0, high=vocab_size - 1, size=(batch_size, sequence_length), dtype=numpy.int64)
@ -201,6 +219,7 @@ def export_onnx_model(model_name, opset_version, use_external_data_format, model

        dynamic_axes, output_names = build_dynamic_axes(example_inputs, example_outputs_flatten)

+        replace_torch_functions()
        torch.onnx.export(model=model,
                          args=tuple(example_inputs.values()),
                          f=onnx_model_path,
@ -211,6 +230,7 @@ def export_onnx_model(model_name, opset_version, use_external_data_format, model
                          do_constant_folding=True,
                          opset_version=opset_version,
                          use_external_data_format=use_external_data_format)
+        restore_torch_functions()
    else:
        logger.info(f"Skip export since model existed: {onnx_model_path}")

@ -242,4 +262,6 @@ def export_onnx_model(model_name, opset_version, use_external_data_format, model
                                                use_external_data_format)
            optimize_onnx_model_by_ort(onnx_model_path, ort_model_path, use_gpu, overwrite, model_fusion_statistics)

-    return onnx_model_path, is_valid_onnx_model, config.vocab_size, tokenizer.max_model_input_sizes[model_name]
+    max_input_size = tokenizer.max_model_input_sizes[model_name] if model_name in tokenizer.max_model_input_sizes else 1024
+
+    return onnx_model_path, is_valid_onnx_model, config.vocab_size, max_input_size