mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-14 20:48:00 +00:00
Add transformers tools to python package (#5090)
* Add transformers to onnxruntime python package
This commit is contained in:
parent
61051396e8
commit
c5d4ae0401
7 changed files with 61 additions and 36 deletions
|
|
@ -208,6 +208,11 @@ file(GLOB onnxruntime_python_quantization_src CONFIGURE_DEPENDS
|
|||
file(GLOB onnxruntime_python_quantization_operators_src CONFIGURE_DEPENDS
|
||||
"${ONNXRUNTIME_ROOT}/python/tools/quantization/operators/*.py"
|
||||
)
|
||||
file(GLOB onnxruntime_python_transformers_src CONFIGURE_DEPENDS
|
||||
"${ONNXRUNTIME_ROOT}/python/tools/transformers/*.py"
|
||||
)
|
||||
list(REMOVE_ITEM onnxruntime_python_transformers_src
|
||||
"${ONNXRUNTIME_ROOT}/python/tools/transformers/test_optimizer.py")
|
||||
file(GLOB onnxruntime_python_datasets_srcs CONFIGURE_DEPENDS
|
||||
"${ONNXRUNTIME_ROOT}/python/datasets/*.py"
|
||||
)
|
||||
|
|
@ -226,6 +231,7 @@ add_custom_command(
|
|||
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${test_data_target}>/onnxruntime/datasets
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${test_data_target}>/onnxruntime/tools
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${test_data_target}>/onnxruntime/tools/featurizer_ops
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${test_data_target}>/onnxruntime/transformers
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${test_data_target}>/onnxruntime/quantization
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${test_data_target}>/onnxruntime/quantization/operators
|
||||
COMMAND ${CMAKE_COMMAND} -E copy
|
||||
|
|
@ -273,6 +279,9 @@ add_custom_command(
|
|||
COMMAND ${CMAKE_COMMAND} -E copy
|
||||
${onnxruntime_python_quantization_operators_src}
|
||||
$<TARGET_FILE_DIR:${test_data_target}>/onnxruntime/quantization/operators/
|
||||
COMMAND ${CMAKE_COMMAND} -E copy
|
||||
${onnxruntime_python_transformers_src}
|
||||
$<TARGET_FILE_DIR:${test_data_target}>/onnxruntime/transformers/
|
||||
COMMAND ${CMAKE_COMMAND} -E copy
|
||||
${REPO_ROOT}/VERSION_NUMBER
|
||||
$<TARGET_FILE_DIR:${test_data_target}>
|
||||
|
|
|
|||
4
onnxruntime/python/tools/transformers/__init__.py
Normal file
4
onnxruntime/python/tools/transformers/__init__.py
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(__file__))
|
||||
|
|
@ -66,9 +66,10 @@ if "OMP_NUM_THREADS" not in os.environ:
|
|||
import torch
|
||||
from transformers import (AutoConfig, AutoTokenizer, AutoModel, GPT2Model)
|
||||
|
||||
def run_onnxruntime(use_gpu, model_names, model_class, precision, batch_sizes, sequence_lengths, repeat_times, input_counts,
|
||||
optimize_onnx, validate_onnx, cache_dir, onnx_dir, verbose, overwrite, disable_ort_io_binding,
|
||||
use_raw_attention_mask, thread_num, model_fusion_statistics):
|
||||
|
||||
def run_onnxruntime(use_gpu, model_names, model_class, precision, batch_sizes, sequence_lengths, repeat_times,
|
||||
input_counts, optimize_onnx, validate_onnx, cache_dir, onnx_dir, verbose, overwrite,
|
||||
disable_ort_io_binding, use_raw_attention_mask, thread_num, model_fusion_statistics):
|
||||
import onnxruntime
|
||||
|
||||
results = []
|
||||
|
|
@ -154,8 +155,8 @@ def run_onnxruntime(use_gpu, model_names, model_class, precision, batch_sizes, s
|
|||
return results
|
||||
|
||||
|
||||
def run_pytorch(use_gpu, model_names, model_class, precision, batch_sizes, sequence_lengths, repeat_times, torchscript, cache_dir,
|
||||
verbose):
|
||||
def run_pytorch(use_gpu, model_names, model_class, precision, batch_sizes, sequence_lengths, repeat_times, torchscript,
|
||||
cache_dir, verbose):
|
||||
results = []
|
||||
if use_gpu and not torch.cuda.is_available():
|
||||
logger.error("Please install PyTorch with Cuda, and use a machine with GPU for testing gpu performance.")
|
||||
|
|
@ -168,7 +169,8 @@ def run_pytorch(use_gpu, model_names, model_class, precision, batch_sizes, seque
|
|||
model = load_pretrained_model(model_name, config=config, cache_dir=cache_dir, custom_model_class=model_class)
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
|
||||
|
||||
max_input_size = tokenizer.max_model_input_sizes[model_name] if model_name in tokenizer.max_model_input_sizes else 1024
|
||||
max_input_size = tokenizer.max_model_input_sizes[
|
||||
model_name] if model_name in tokenizer.max_model_input_sizes else 1024
|
||||
|
||||
logger.debug(f"Model {model}")
|
||||
logger.debug(f"Number of parameters {model.num_parameters()}")
|
||||
|
|
@ -365,12 +367,12 @@ def main():
|
|||
logger.warning("--input_counts is not implemented for torch or torchscript engine.")
|
||||
|
||||
if enable_torchscript:
|
||||
results += run_pytorch(args.use_gpu, args.models, args.model_class, args.precision, args.batch_sizes, args.sequence_lengths,
|
||||
args.test_times, True, args.cache_dir, args.verbose)
|
||||
results += run_pytorch(args.use_gpu, args.models, args.model_class, args.precision, args.batch_sizes,
|
||||
args.sequence_lengths, args.test_times, True, args.cache_dir, args.verbose)
|
||||
|
||||
if enable_torch:
|
||||
results += run_pytorch(args.use_gpu, args.models, args.model_class, args.precision, args.batch_sizes, args.sequence_lengths,
|
||||
args.test_times, False, args.cache_dir, args.verbose)
|
||||
results += run_pytorch(args.use_gpu, args.models, args.model_class, args.precision, args.batch_sizes,
|
||||
args.sequence_lengths, args.test_times, False, args.cache_dir, args.verbose)
|
||||
|
||||
model_fusion_statistics = {}
|
||||
if enable_onnxruntime:
|
||||
|
|
|
|||
|
|
@ -440,7 +440,7 @@ class Gpt2Helper:
|
|||
def onnxruntime_inference_with_binded_io(ort_session,
|
||||
inputs: Gpt2Inputs,
|
||||
output_buffers: Dict[str, torch.Tensor],
|
||||
output_shapes : Dict[str, List[int]],
|
||||
output_shapes: Dict[str, List[int]],
|
||||
total_runs: int = 0,
|
||||
return_numpy: bool = True,
|
||||
include_copy_output_latency: bool = False):
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ MODEL_CLASSES = {
|
|||
# List of pretrained models: https://huggingface.co/transformers/pretrained_models.html
|
||||
# Pretrained model name to a tuple of input names, opset_version, use_external_data_format, optimization model type
|
||||
MODELS = {
|
||||
# BERT
|
||||
# BERT
|
||||
"bert-base-uncased": (["input_ids", "attention_mask", "token_type_ids"], 11, False, "bert"),
|
||||
"bert-large-uncased": (["input_ids", "attention_mask", "token_type_ids"], 11, False, "bert"),
|
||||
"bert-base-cased": (["input_ids", "attention_mask", "token_type_ids"], 11, False, "bert"),
|
||||
|
|
@ -31,26 +31,30 @@ MODELS = {
|
|||
"bert-base-german-cased": (["input_ids", "attention_mask", "token_type_ids"], 11, False, "bert"),
|
||||
"bert-large-uncased-whole-word-masking": (["input_ids", "attention_mask", "token_type_ids"], 11, False, "bert"),
|
||||
"bert-large-cased-whole-word-masking": (["input_ids", "attention_mask", "token_type_ids"], 11, False, "bert"),
|
||||
"bert-large-uncased-whole-word-masking-finetuned-squad": (["input_ids", "attention_mask", "token_type_ids"], 11, False, "bert"),
|
||||
"bert-large-cased-whole-word-masking-finetuned-squad": (["input_ids", "attention_mask", "token_type_ids"], 11, False, "bert"),
|
||||
"bert-large-uncased-whole-word-masking-finetuned-squad": (["input_ids", "attention_mask",
|
||||
"token_type_ids"], 11, False, "bert"),
|
||||
"bert-large-cased-whole-word-masking-finetuned-squad": (["input_ids", "attention_mask",
|
||||
"token_type_ids"], 11, False, "bert"),
|
||||
"bert-base-cased-finetuned-mrpc": (["input_ids", "attention_mask", "token_type_ids"], 11, False, "bert"),
|
||||
"bert-base-german-dbmdz-cased": (["input_ids", "attention_mask", "token_type_ids"], 11, False, "bert"),
|
||||
"bert-base-german-dbmdz-uncased": (["input_ids", "attention_mask", "token_type_ids"], 11, False, "bert"),
|
||||
# todo: more models to add
|
||||
# GPT
|
||||
# GPT
|
||||
"openai-gpt": (["input_ids"], 11, False, "gpt2"), # no past state inputs
|
||||
# GPT-2
|
||||
# GPT-2
|
||||
"gpt2": (["input_ids"], 11, False, "gpt2"), # no past state inputs & outputs
|
||||
"gpt2-medium": (["input_ids"], 11, False, "gpt2"),
|
||||
"gpt2-large": (["input_ids"], 11, True, "gpt2"), # Model>2GB. Need use_external_data_format=True to export it. No past state inputs for GPT models.
|
||||
"gpt2-large":
|
||||
(["input_ids"], 11, True,
|
||||
"gpt2"), # Model>2GB. Need use_external_data_format=True to export it. No past state inputs for GPT models.
|
||||
"gpt2-xl": (["input_ids"], 11, True, "gpt2"),
|
||||
"distilgpt2": (["input_ids"], 11, False, "gpt2"), # no past state inputs & outputs
|
||||
# Transformer-XL
|
||||
# Transformer-XL
|
||||
#"transfo-xl-wt103": (["input_ids"], 11, False, "bert"),
|
||||
# XLNet
|
||||
# XLNet
|
||||
#"xlnet-base-cased": (["input_ids"], 12, False, "bert"), # Models uses Einsum, which need opset version 12 and PyTorch 1.5.0 or above.
|
||||
#"xlnet-large-cased": (["input_ids"], 12, False, "bert"), # Models uses Einsum, which need opset version 12 and PyTorch 1.5.0 or above.
|
||||
# XLM
|
||||
# XLM
|
||||
"xlm-mlm-en-2048": (["input_ids"], 11, True, "bert"),
|
||||
"xlm-mlm-ende-1024": (["input_ids"], 11, False, "bert"),
|
||||
"xlm-mlm-enfr-1024": (["input_ids"], 11, False, "bert"),
|
||||
|
|
@ -61,25 +65,25 @@ MODELS = {
|
|||
"xlm-clm-ende-1024": (["input_ids"], 11, False, "bert"),
|
||||
"xlm-mlm-17-1280": (["input_ids"], 11, True, "bert"),
|
||||
"xlm-mlm-100-1280": (["input_ids"], 11, True, "bert"),
|
||||
# RoBERTa
|
||||
# RoBERTa
|
||||
"roberta-base": (["input_ids", "attention_mask"], 11, False, "bert"),
|
||||
"roberta-large": (["input_ids", "attention_mask"], 11, False, "bert"),
|
||||
"roberta-large-mnli": (["input_ids", "attention_mask"], 11, False, "bert"),
|
||||
"distilroberta-base": (["input_ids", "attention_mask"], 11, False, "bert"),
|
||||
"roberta-base-openai-detector": (["input_ids", "attention_mask"], 11, False, "bert"),
|
||||
"roberta-large-openai-detector": (["input_ids", "attention_mask"], 11, False, "bert"),
|
||||
# DistilBERT
|
||||
# DistilBERT
|
||||
"distilbert-base-uncased": (["input_ids", "attention_mask"], 11, False, "bert"),
|
||||
"distilbert-base-uncased-distilled-squad": (["input_ids", "attention_mask"], 11, False, "bert"),
|
||||
"distilbert-base-cased": (["input_ids", "attention_mask"], 11, False, "bert"),
|
||||
"distilbert-base-cased-distilled-squad": (["input_ids", "attention_mask"], 11, False, "bert"),
|
||||
"distilbert-base-german-cased": (["input_ids", "attention_mask"], 11, False, "bert"),
|
||||
"distilbert-base-multilingual-cased": (["input_ids", "attention_mask"], 11, False, "bert"),
|
||||
# CTRL
|
||||
# CTRL
|
||||
"ctrl": (["input_ids"], 11, True, "bert"),
|
||||
# CamemBERT
|
||||
# CamemBERT
|
||||
"camembert-base": (["input_ids"], 11, False, "bert"),
|
||||
# ALBERT
|
||||
# ALBERT
|
||||
# Models uses Einsum, which need opset version 12 and PyTorch 1.5.0 or above.
|
||||
"albert-base-v1": (["input_ids"], 12, False, "bert"),
|
||||
"albert-large-v1": (["input_ids"], 12, False, "bert"),
|
||||
|
|
@ -89,36 +93,36 @@ MODELS = {
|
|||
"albert-large-v2": (["input_ids"], 12, False, "bert"),
|
||||
"albert-xlarge-v2": (["input_ids"], 12, True, "bert"),
|
||||
"albert-xxlarge-v2": (["input_ids"], 12, True, "bert"),
|
||||
# T5
|
||||
# T5
|
||||
#"t5-small": (["input_ids"], 11, False, "bert"),
|
||||
#"t5-base": (["input_ids"], 11, False, "bert"),
|
||||
#"t5-large": (["input_ids"], 11, False, "bert"),
|
||||
#"t5-3b": (["input_ids"], 11, False, "bert"),
|
||||
#"t5-11b": (["input_ids"], 11, False, "bert"),
|
||||
# XLM-RoBERTa
|
||||
# XLM-RoBERTa
|
||||
"xlm-roberta-base": (["input_ids"], 11, False, "bert"),
|
||||
"xlm-roberta-large": (["input_ids"], 11, True, "bert"),
|
||||
# FlauBERT
|
||||
# FlauBERT
|
||||
"flaubert/flaubert_small_cased": (["input_ids"], 11, False, "bert"),
|
||||
"flaubert/flaubert_base_uncased": (["input_ids"], 11, False, "bert"),
|
||||
"flaubert/flaubert_base_cased": (["input_ids"], 11, False, "bert"),
|
||||
"flaubert/flaubert_large_cased": (["input_ids"], 11, False, "bert"),
|
||||
# Bart
|
||||
# Bart
|
||||
#"facebook/bart-large": (["input_ids"], 11, False, "bert"),
|
||||
#"facebook/bart-base": (["input_ids"], 11, False, "bert"),
|
||||
#"facebook/bart-large-mnli": (["input_ids"], 11, False, "bert"),
|
||||
#"facebook/bart-large-cnn": (["input_ids"], 11, False, "bert"),
|
||||
#"facebook/mbart-large-en-ro": (["input_ids"], 11, True, "bert"),
|
||||
# DialoGPT
|
||||
# DialoGPT
|
||||
"microsoft/DialoGPT-small": (["input_ids"], 11, False, "gpt2"),
|
||||
"microsoft/DialoGPT-medium": (["input_ids"], 11, False, "gpt2"),
|
||||
"microsoft/DialoGPT-large": (["input_ids"], 11, True, "gpt2"),
|
||||
# Reformer
|
||||
# Reformer
|
||||
#"google/reformer-enwik8": (["input_ids"], 11, False, "bert"),
|
||||
#"google/reformer-crime-and-punishment": (["input_ids"], 11, False, "bert"),
|
||||
# MarianMT
|
||||
# MarianMT
|
||||
#"Helsinki-NLP/opus-mt-ROMANCE-en": (["input_ids"], 12, False, "bert"),
|
||||
# Longformer
|
||||
# Longformer
|
||||
#"allenai/longformer-base-4096": (["input_ids"], 12, False, "bert"),
|
||||
#"allenai/longformer-large-4096": (["input_ids"], 12, False, "bert"),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,7 +18,8 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
# Walkaround by replacing torch.triu using self-defined op
|
||||
# Since torch.triu cannot be exported to ONNX. See https://github.com/pytorch/pytorch/issues/32968
|
||||
torch_func = {"triu" : torch.triu}
|
||||
torch_func = {"triu": torch.triu}
|
||||
|
||||
|
||||
def triu_onnx(x, diagonal=0, out=None):
|
||||
assert out is None
|
||||
|
|
@ -26,15 +27,18 @@ def triu_onnx(x, diagonal=0, out=None):
|
|||
|
||||
torch_triu = torch_func["triu"]
|
||||
template = torch_triu(torch.ones((1024, 1024), dtype=torch.uint8), diagonal)
|
||||
mask = template[:x.size(0),:x.size(1)]
|
||||
mask = template[:x.size(0), :x.size(1)]
|
||||
return torch.where(mask.bool(), x, torch.zeros_like(x))
|
||||
|
||||
|
||||
def replace_torch_functions():
|
||||
torch.triu = triu_onnx
|
||||
|
||||
|
||||
def restore_torch_functions():
|
||||
torch.triu = torch_func["triu"]
|
||||
|
||||
|
||||
def create_onnxruntime_input(vocab_size, batch_size, sequence_length, input_names):
|
||||
input_ids = numpy.random.randint(low=0, high=vocab_size - 1, size=(batch_size, sequence_length), dtype=numpy.int64)
|
||||
|
||||
|
|
@ -283,6 +287,7 @@ def export_onnx_model(model_name, opset_version, use_external_data_format, model
|
|||
use_external_data_format)
|
||||
optimize_onnx_model_by_ort(onnx_model_path, ort_model_path, use_gpu, overwrite, model_fusion_statistics)
|
||||
|
||||
max_input_size = tokenizer.max_model_input_sizes[model_name] if model_name in tokenizer.max_model_input_sizes else 1024
|
||||
max_input_size = tokenizer.max_model_input_sizes[
|
||||
model_name] if model_name in tokenizer.max_model_input_sizes else 1024
|
||||
|
||||
return onnx_model_path, is_valid_onnx_model, config.vocab_size, max_input_size
|
||||
|
|
|
|||
1
setup.py
1
setup.py
|
|
@ -236,6 +236,7 @@ packages = [
|
|||
'onnxruntime.tools',
|
||||
'onnxruntime.quantization',
|
||||
'onnxruntime.quantization.operators',
|
||||
'onnxruntime.transformers',
|
||||
]
|
||||
|
||||
if '--enable_training' in sys.argv:
|
||||
|
|
|
|||
Loading…
Reference in a new issue