mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-14 20:48:00 +00:00
move longformer and t5 to models subdirectory (#11161)
* move longformer scripts to models subdirectory * Copy transformers\models\t5 to python package as well
This commit is contained in:
parent
f24523e0eb
commit
00b595e389
10 changed files with 50 additions and 33 deletions
|
|
@ -364,8 +364,11 @@ file(GLOB onnxruntime_python_quantization_cal_table_flatbuffers_src CONFIGURE_DE
|
|||
file(GLOB onnxruntime_python_transformers_src CONFIGURE_DEPENDS
|
||||
"${ONNXRUNTIME_ROOT}/python/tools/transformers/*.py"
|
||||
)
|
||||
file(GLOB onnxruntime_python_transformers_longformer_src CONFIGURE_DEPENDS
|
||||
"${ONNXRUNTIME_ROOT}/python/tools/transformers/longformer/*.py"
|
||||
file(GLOB onnxruntime_python_transformers_models_longformer_src CONFIGURE_DEPENDS
|
||||
"${ONNXRUNTIME_ROOT}/python/tools/transformers/models/longformer/*.py"
|
||||
)
|
||||
file(GLOB onnxruntime_python_transformers_models_t5_src CONFIGURE_DEPENDS
|
||||
"${ONNXRUNTIME_ROOT}/python/tools/transformers/models/t5/*.py"
|
||||
)
|
||||
file(GLOB onnxruntime_python_datasets_srcs CONFIGURE_DEPENDS
|
||||
"${ONNXRUNTIME_ROOT}/python/datasets/*.py"
|
||||
|
|
@ -416,7 +419,9 @@ add_custom_command(
|
|||
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/tools/ort_format_model
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/tools/ort_format_model/ort_flatbuffers_py
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/longformer
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/models
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/models/longformer
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/models/t5
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/quantization
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/quantization/operators
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/quantization/CalTableFlatBuffers
|
||||
|
|
@ -497,8 +502,11 @@ add_custom_command(
|
|||
${onnxruntime_python_transformers_src}
|
||||
$<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/
|
||||
COMMAND ${CMAKE_COMMAND} -E copy
|
||||
${onnxruntime_python_transformers_longformer_src}
|
||||
$<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/longformer/
|
||||
${onnxruntime_python_transformers_models_longformer_src}
|
||||
$<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/models/longformer/
|
||||
COMMAND ${CMAKE_COMMAND} -E copy
|
||||
${onnxruntime_python_transformers_models_t5_src}
|
||||
$<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/models/t5/
|
||||
COMMAND ${CMAKE_COMMAND} -E copy
|
||||
${REPO_ROOT}/VERSION_NUMBER
|
||||
$<TARGET_FILE_DIR:${build_output_target}>
|
||||
|
|
|
|||
|
|
@ -38,22 +38,24 @@ The tool will also verify whether the ONNX model and corresponding PyTorch model
|
|||
|
||||
### Longformer Model conversion
|
||||
|
||||
Requirement: Linux OS (For example Ubuntu 18.04 or 20.04) and a python environment like the following:
|
||||
Requirement: Linux OS (For example Ubuntu 18.04 or 20.04) and a python environment with PyTorch 1.9.* like the following:
|
||||
```
|
||||
conda create -n longformer python=3.6
|
||||
conda create -n longformer python=3.8
|
||||
conda activate longformer
|
||||
conda install pytorch torchvision torchaudio cpuonly -c pytorch
|
||||
pip install onnx transformers onnxruntime
|
||||
pip install torch==1.9.1+cpu torchvision==0.10.1+cpu torchaudio==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html
|
||||
pip install onnx transformers==4.18.0 onnxruntime numpy
|
||||
```
|
||||
Next, get the source of [torch extensions for Longformer exporting](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/transformers/torch_extensions), and run the following:
|
||||
Next, build the source of [torch extensions for Longformer ONNX exporting](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/transformers/torch_extensions) like the following:
|
||||
```
|
||||
cd onnxruntime/python/tools/transformers/models/longformer/torch_extensions
|
||||
python setup.py install
|
||||
```
|
||||
It will generate file like "build/lib.linux-x86_64-3.6/longformer_attention.cpython-36m-x86_64-linux-gnu.so" under the directory.
|
||||
It will generate a PyTorch extension file like "build/lib.linux-x86_64-3.8/longformer_attention.cpython-38-x86_64-linux-gnu.so" under the directory.
|
||||
|
||||
Finally, use [convert_longformer_to_onnx](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/longformer/convert_longformer_to_onnx.py) to convert to ONNX model like the following:
|
||||
Finally, convert longformer model to ONNX model like the following:
|
||||
```
|
||||
python convert_longformer_to_onnx.py -m longformer-base-4096
|
||||
cd ..
|
||||
python convert_to_onnx.py -m longformer-base-4096
|
||||
```
|
||||
|
||||
The exported ONNX model can only run in GPU right now.
|
||||
|
|
|
|||
|
|
@ -5,19 +5,17 @@
|
|||
# --------------------------------------------------------------------------
|
||||
#
|
||||
# This script run benchmark of latency or peak memory usage of Longformer model inference.
|
||||
# Please run convert_to_onnx.py to get onnx model before running benchmark.
|
||||
#
|
||||
# Please run convert_longformer_to_onnx.py to get onnx model before running this script.
|
||||
# Tested with python 3.6, onnxruntime-gpu 1.7.0, PyTorch 1.7.1, transformers 4.3.2, CUDA 10.2.
|
||||
#
|
||||
# Example commands for exporting longformer base model in Linux or WSL:
|
||||
# cd ../torch_extensions
|
||||
# python setup.py install
|
||||
# cd ../longformer
|
||||
# python convert_longformer_to_onnx.py --model longformer-base-4096 --precision fp16 --optimize_onnx
|
||||
# It is tested with python 3.8, onnxruntime-gpu 1.11.0, PyTorch 1.11.0, transformers 4.18.0, CUDA 11.3 like the following
|
||||
# conda create -n gpu_env python=3.8
|
||||
# conda activate gpu_env
|
||||
# pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113
|
||||
# pip3 install onnx transformers onnxruntime-gpu numpy sympy coloredlogs psutil py3nvml
|
||||
# python benchmark_longformer.py
|
||||
#
|
||||
# When there is no parameter, all avaiable tests (memory & latency) will run on the longformer-base-4096 pretrained model.
|
||||
# python benchmark_longformer.py
|
||||
#
|
||||
|
||||
# Benchmark the latency (Exported onnx model is in the current directory):
|
||||
# python benchmark_longformer.py --model longformer-base-4096 --batch_sizes 1 --sequence_lengths 512 1024 2048 4096 --global_lengths 8 --onnx ./longformer-base-4096_fp16.onnx --validate_onnx -t 100
|
||||
#
|
||||
|
|
@ -41,7 +39,7 @@ import math
|
|||
|
||||
from longformer_helper import LongformerHelper, PRETRAINED_LONGFORMER_MODELS
|
||||
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
|
||||
import benchmark_helper
|
||||
|
||||
|
||||
|
|
@ -7,13 +7,21 @@
|
|||
# This script converts Longformer model from huggingface transformers 4.0 or later to ONNX.
|
||||
# Unlike normal ONNX model exporting, it will directly translate LongformerSelfAttention to the LongformerAttention operator in ONNX Runtime.
|
||||
#
|
||||
# Before running this script, please run "python setup.py install" in ../torch_extensions under Linux with PyTorch installed.
|
||||
# Before running this script, please run "python setup.py install" in ./torch_extensions under Linux with PyTorch installed.
|
||||
# Then you can update the path of longformer_attention.cpython-*.so and run this script in same environment.
|
||||
#
|
||||
# It is tested in Ubuntu 18.04, python 3.6, PyTorch 1.7.1, transformers 4.3.0 or 4.3.2.
|
||||
# It is tested in Ubuntu 18.04 with python 3.8, onnxruntime-gpu 1.11.0, PyTorch 1.9.0, transformers 4.18.0.
|
||||
# Warning: Using newer version (1.10 or 1.11) of PyTorch might encounter issue in exporting, but they are fine for benchmarking.
|
||||
#
|
||||
# Example commands for exporting longformer base model in Linux:
|
||||
# cd ./torch_extensions
|
||||
# python setup.py install
|
||||
# cd ..
|
||||
# python convert_to_onnx.py --model longformer-base-4096 --precision fp16 --optimize_onnx
|
||||
#
|
||||
# GPU is not needed for this script. You can run it in CPU. For --optimize_onnx, you can use either onnxruntime or onnxruntime-gpu package.
|
||||
#
|
||||
# For inference of the onnx model, you will need onnxruntime-gpu 1.7.0 or above.
|
||||
# For inference of the onnx model, you will need onnxruntime-gpu 1.7.0 or newer version.
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
|
@ -27,7 +35,7 @@ from packaging import version
|
|||
from pathlib import Path
|
||||
from longformer_helper import LongformerHelper, PRETRAINED_LONGFORMER_MODELS
|
||||
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
|
||||
from torch_onnx_export_helper import torch_onnx_export
|
||||
|
||||
|
||||
|
|
@ -48,9 +56,9 @@ def my_longformer_attention(g, input, weight, bias, mask, global_weight, global_
|
|||
# namespace is onnxruntime which is registered in longformer_attention.cpp
|
||||
register_custom_op_symbolic('onnxruntime::LongformerAttention', my_longformer_attention, 9)
|
||||
|
||||
# TODO: search the directory to find correct output filename of "python setup.py install" when python version is not 3.6
|
||||
# TODO: search the directory to find correct output filename of "python setup.py install" when python version is not 3.8
|
||||
torch.ops.load_library(
|
||||
r'../torch_extensions/build/lib.linux-x86_64-3.6/longformer_attention.cpython-36m-x86_64-linux-gnu.so')
|
||||
r'./torch_extensions/build/lib.linux-x86_64-3.8/longformer_attention.cpython-38-x86_64-linux-gnu.so')
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
|
|
@ -231,7 +239,7 @@ def export_longformer(model, onnx_model_path, export_padding):
|
|||
torch_onnx_export(model,
|
||||
example_inputs,
|
||||
onnx_model_path,
|
||||
opset_version=11,
|
||||
opset_version=12,
|
||||
input_names=["input_ids", "attention_mask", "global_attention_mask"],
|
||||
output_names=["last_state", "pooler"],
|
||||
dynamic_axes={
|
||||
|
|
@ -13,7 +13,7 @@ import random
|
|||
from pathlib import Path
|
||||
from onnx import ModelProto, TensorProto, numpy_helper
|
||||
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
|
||||
from onnx_model import OnnxModel
|
||||
from bert_test_data import fake_input_ids_data, fake_input_mask_data, output_test_data
|
||||
|
||||
3
setup.py
3
setup.py
|
|
@ -319,7 +319,8 @@ packages = [
|
|||
'onnxruntime.quantization.operators',
|
||||
'onnxruntime.quantization.CalTableFlatBuffers',
|
||||
'onnxruntime.transformers',
|
||||
'onnxruntime.transformers.longformer',
|
||||
'onnxruntime.transformers.models.t5',
|
||||
'onnxruntime.transformers.models.longformer',
|
||||
]
|
||||
|
||||
requirements_file = "requirements.txt"
|
||||
|
|
|
|||
Loading…
Reference in a new issue