move longformer and t5 to models subdirectory (#11161)

* move longformer scripts to models subdirectory
* Copy transformers\models\t5 to python package as well
This commit is contained in:
Tianlei Wu 2022-04-09 22:35:14 -07:00 committed by GitHub
parent f24523e0eb
commit 00b595e389
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 50 additions and 33 deletions

View file

@ -364,8 +364,11 @@ file(GLOB onnxruntime_python_quantization_cal_table_flatbuffers_src CONFIGURE_DE
file(GLOB onnxruntime_python_transformers_src CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/python/tools/transformers/*.py"
)
file(GLOB onnxruntime_python_transformers_longformer_src CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/python/tools/transformers/longformer/*.py"
file(GLOB onnxruntime_python_transformers_models_longformer_src CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/python/tools/transformers/models/longformer/*.py"
)
file(GLOB onnxruntime_python_transformers_models_t5_src CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/python/tools/transformers/models/t5/*.py"
)
file(GLOB onnxruntime_python_datasets_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/python/datasets/*.py"
@ -416,7 +419,9 @@ add_custom_command(
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/tools/ort_format_model
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/tools/ort_format_model/ort_flatbuffers_py
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/longformer
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/models
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/models/longformer
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/models/t5
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/quantization
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/quantization/operators
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/quantization/CalTableFlatBuffers
@ -497,8 +502,11 @@ add_custom_command(
${onnxruntime_python_transformers_src}
$<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/
COMMAND ${CMAKE_COMMAND} -E copy
${onnxruntime_python_transformers_longformer_src}
$<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/longformer/
${onnxruntime_python_transformers_models_longformer_src}
$<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/models/longformer/
COMMAND ${CMAKE_COMMAND} -E copy
${onnxruntime_python_transformers_models_t5_src}
$<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/models/t5/
COMMAND ${CMAKE_COMMAND} -E copy
${REPO_ROOT}/VERSION_NUMBER
$<TARGET_FILE_DIR:${build_output_target}>

View file

@ -38,22 +38,24 @@ The tool will also verify whether the ONNX model and corresponding PyTorch model
### Longformer Model conversion
Requirement: Linux OS (For example Ubuntu 18.04 or 20.04) and a python environment like the following:
Requirement: Linux OS (For example Ubuntu 18.04 or 20.04) and a python environment with PyTorch 1.9.* like the following:
```
conda create -n longformer python=3.6
conda create -n longformer python=3.8
conda activate longformer
conda install pytorch torchvision torchaudio cpuonly -c pytorch
pip install onnx transformers onnxruntime
pip install torch==1.9.1+cpu torchvision==0.10.1+cpu torchaudio==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html
pip install onnx transformers==4.18.0 onnxruntime numpy
```
Next, get the source of [torch extensions for Longformer exporting](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/transformers/torch_extensions), and run the following:
Next, build the source of [torch extensions for Longformer ONNX exporting](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/transformers/torch_extensions) like the following:
```
cd onnxruntime/python/tools/transformers/models/longformer/torch_extensions
python setup.py install
```
It will generate file like "build/lib.linux-x86_64-3.6/longformer_attention.cpython-36m-x86_64-linux-gnu.so" under the directory.
It will generate a PyTorch extension file like "build/lib.linux-x86_64-3.8/longformer_attention.cpython-38-x86_64-linux-gnu.so" under the directory.
Finally, use [convert_longformer_to_onnx](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/longformer/convert_longformer_to_onnx.py) to convert to ONNX model like the following:
Finally, convert longformer model to ONNX model like the following:
```
python convert_longformer_to_onnx.py -m longformer-base-4096
cd ..
python convert_to_onnx.py -m longformer-base-4096
```
The exported ONNX model can only run in GPU right now.

View file

@ -5,19 +5,17 @@
# --------------------------------------------------------------------------
#
# This script run benchmark of latency or peak memory usage of Longformer model inference.
# Please run convert_to_onnx.py to get onnx model before running benchmark.
#
# Please run convert_longformer_to_onnx.py to get onnx model before running this script.
# Tested with python 3.6, onnxruntime-gpu 1.7.0, PyTorch 1.7.1, transformers 4.3.2, CUDA 10.2.
#
# Example commands for exporting longformer base model in Linux or WSL:
# cd ../torch_extensions
# python setup.py install
# cd ../longformer
# python convert_longformer_to_onnx.py --model longformer-base-4096 --precision fp16 --optimize_onnx
# It is tested with python 3.8, onnxruntime-gpu 1.11.0, PyTorch 1.11.0, transformers 4.18.0, CUDA 11.3 like the following
# conda create -n gpu_env python=3.8
# conda activate gpu_env
# pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113
# pip3 install onnx transformers onnxruntime-gpu numpy sympy coloredlogs psutil py3nvml
# python benchmark_longformer.py
#
# When there is no parameter, all avaiable tests (memory & latency) will run on the longformer-base-4096 pretrained model.
# python benchmark_longformer.py
#
# Benchmark the latency (Exported onnx model is in the current directory):
# python benchmark_longformer.py --model longformer-base-4096 --batch_sizes 1 --sequence_lengths 512 1024 2048 4096 --global_lengths 8 --onnx ./longformer-base-4096_fp16.onnx --validate_onnx -t 100
#
@ -41,7 +39,7 @@ import math
from longformer_helper import LongformerHelper, PRETRAINED_LONGFORMER_MODELS
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
import benchmark_helper

View file

@ -7,13 +7,21 @@
# This script converts Longformer model from huggingface transformers 4.0 or later to ONNX.
# Unlike normal ONNX model exporting, it will directly translate LongformerSelfAttention to the LongformerAttention operator in ONNX Runtime.
#
# Before running this script, please run "python setup.py install" in ../torch_extensions under Linux with PyTorch installed.
# Before running this script, please run "python setup.py install" in ./torch_extensions under Linux with PyTorch installed.
# Then you can update the path of longformer_attention.cpython-*.so and run this script in same environment.
#
# It is tested in Ubuntu 18.04, python 3.6, PyTorch 1.7.1, transformers 4.3.0 or 4.3.2.
# It is tested in Ubuntu 18.04 with python 3.8, onnxruntime-gpu 1.11.0, PyTorch 1.9.0, transformers 4.18.0.
# Warning: Using newer version (1.10 or 1.11) of PyTorch might encounter issue in exporting, but they are fine for benchmarking.
#
# Example commands for exporting longformer base model in Linux:
# cd ./torch_extensions
# python setup.py install
# cd ..
# python convert_to_onnx.py --model longformer-base-4096 --precision fp16 --optimize_onnx
#
# GPU is not needed for this script. You can run it in CPU. For --optimize_onnx, you can use either onnxruntime or onnxruntime-gpu package.
#
# For inference of the onnx model, you will need onnxruntime-gpu 1.7.0 or above.
# For inference of the onnx model, you will need onnxruntime-gpu 1.7.0 or newer version.
import sys
import os
@ -27,7 +35,7 @@ from packaging import version
from pathlib import Path
from longformer_helper import LongformerHelper, PRETRAINED_LONGFORMER_MODELS
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
from torch_onnx_export_helper import torch_onnx_export
@ -48,9 +56,9 @@ def my_longformer_attention(g, input, weight, bias, mask, global_weight, global_
# namespace is onnxruntime which is registered in longformer_attention.cpp
register_custom_op_symbolic('onnxruntime::LongformerAttention', my_longformer_attention, 9)
# TODO: search the directory to find correct output filename of "python setup.py install" when python version is not 3.6
# TODO: search the directory to find correct output filename of "python setup.py install" when python version is not 3.8
torch.ops.load_library(
r'../torch_extensions/build/lib.linux-x86_64-3.6/longformer_attention.cpython-36m-x86_64-linux-gnu.so')
r'./torch_extensions/build/lib.linux-x86_64-3.8/longformer_attention.cpython-38-x86_64-linux-gnu.so')
def parse_arguments():
@ -231,7 +239,7 @@ def export_longformer(model, onnx_model_path, export_padding):
torch_onnx_export(model,
example_inputs,
onnx_model_path,
opset_version=11,
opset_version=12,
input_names=["input_ids", "attention_mask", "global_attention_mask"],
output_names=["last_state", "pooler"],
dynamic_axes={

View file

@ -13,7 +13,7 @@ import random
from pathlib import Path
from onnx import ModelProto, TensorProto, numpy_helper
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
from onnx_model import OnnxModel
from bert_test_data import fake_input_ids_data, fake_input_mask_data, output_test_data

View file

@ -319,7 +319,8 @@ packages = [
'onnxruntime.quantization.operators',
'onnxruntime.quantization.CalTableFlatBuffers',
'onnxruntime.transformers',
'onnxruntime.transformers.longformer',
'onnxruntime.transformers.models.t5',
'onnxruntime.transformers.models.longformer',
]
requirements_file = "requirements.txt"