From 00b595e389ebdb30c3ff2c2a261040b2e5907d08 Mon Sep 17 00:00:00 2001 From: Tianlei Wu Date: Sat, 9 Apr 2022 22:35:14 -0700 Subject: [PATCH] move longformer and t5 to models subdirectory (#11161) * move longformer scripts to models subdirectory * Copy transformers\models\t5 to python package as well --- cmake/onnxruntime_python.cmake | 18 ++++++++++----- .../python/tools/transformers/README.md | 18 ++++++++------- .../{ => models}/longformer/__init__.py | 0 .../longformer/benchmark_longformer.py | 20 ++++++++--------- .../longformer/convert_to_onnx.py} | 22 +++++++++++++------ .../longformer/generate_test_data.py | 2 +- .../longformer/longformer_helper.py | 0 .../torch_extensions/longformer_attention.cpp | 0 .../longformer}/torch_extensions/setup.py | 0 setup.py | 3 ++- 10 files changed, 50 insertions(+), 33 deletions(-) rename onnxruntime/python/tools/transformers/{ => models}/longformer/__init__.py (100%) rename onnxruntime/python/tools/transformers/{ => models}/longformer/benchmark_longformer.py (97%) rename onnxruntime/python/tools/transformers/{longformer/convert_longformer_to_onnx.py => models/longformer/convert_to_onnx.py} (94%) rename onnxruntime/python/tools/transformers/{ => models}/longformer/generate_test_data.py (99%) rename onnxruntime/python/tools/transformers/{ => models}/longformer/longformer_helper.py (100%) rename onnxruntime/python/tools/transformers/{ => models/longformer}/torch_extensions/longformer_attention.cpp (100%) rename onnxruntime/python/tools/transformers/{ => models/longformer}/torch_extensions/setup.py (100%) diff --git a/cmake/onnxruntime_python.cmake b/cmake/onnxruntime_python.cmake index 002d8a05e5..4cd0d676c7 100644 --- a/cmake/onnxruntime_python.cmake +++ b/cmake/onnxruntime_python.cmake @@ -364,8 +364,11 @@ file(GLOB onnxruntime_python_quantization_cal_table_flatbuffers_src CONFIGURE_DE file(GLOB onnxruntime_python_transformers_src CONFIGURE_DEPENDS "${ONNXRUNTIME_ROOT}/python/tools/transformers/*.py" ) -file(GLOB onnxruntime_python_transformers_longformer_src CONFIGURE_DEPENDS - "${ONNXRUNTIME_ROOT}/python/tools/transformers/longformer/*.py" +file(GLOB onnxruntime_python_transformers_models_longformer_src CONFIGURE_DEPENDS + "${ONNXRUNTIME_ROOT}/python/tools/transformers/models/longformer/*.py" +) +file(GLOB onnxruntime_python_transformers_models_t5_src CONFIGURE_DEPENDS + "${ONNXRUNTIME_ROOT}/python/tools/transformers/models/t5/*.py" ) file(GLOB onnxruntime_python_datasets_srcs CONFIGURE_DEPENDS "${ONNXRUNTIME_ROOT}/python/datasets/*.py" @@ -416,7 +419,9 @@ add_custom_command( COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/tools/ort_format_model COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/tools/ort_format_model/ort_flatbuffers_py COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/transformers - COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/transformers/longformer + COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/transformers/models + COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/transformers/models/longformer + COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/transformers/models/t5 COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/quantization COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/quantization/operators COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/quantization/CalTableFlatBuffers @@ -497,8 +502,11 @@ add_custom_command( ${onnxruntime_python_transformers_src} $/onnxruntime/transformers/ COMMAND ${CMAKE_COMMAND} -E copy - ${onnxruntime_python_transformers_longformer_src} - $/onnxruntime/transformers/longformer/ + ${onnxruntime_python_transformers_models_longformer_src} + $/onnxruntime/transformers/models/longformer/ + COMMAND ${CMAKE_COMMAND} -E copy + ${onnxruntime_python_transformers_models_t5_src} + $/onnxruntime/transformers/models/t5/ COMMAND ${CMAKE_COMMAND} -E copy ${REPO_ROOT}/VERSION_NUMBER $ diff --git a/onnxruntime/python/tools/transformers/README.md b/onnxruntime/python/tools/transformers/README.md index e3b423f546..5316aa2a61 100644 --- a/onnxruntime/python/tools/transformers/README.md +++ b/onnxruntime/python/tools/transformers/README.md @@ -38,22 +38,24 @@ The tool will also verify whether the ONNX model and corresponding PyTorch model ### Longformer Model conversion -Requirement: Linux OS (For example Ubuntu 18.04 or 20.04) and a python environment like the following: +Requirement: Linux OS (For example Ubuntu 18.04 or 20.04) and a python environment with PyTorch 1.9.* like the following: ``` -conda create -n longformer python=3.6 +conda create -n longformer python=3.8 conda activate longformer -conda install pytorch torchvision torchaudio cpuonly -c pytorch -pip install onnx transformers onnxruntime +pip install torch==1.9.1+cpu torchvision==0.10.1+cpu torchaudio==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html +pip install onnx transformers==4.18.0 onnxruntime numpy ``` -Next, get the source of [torch extensions for Longformer exporting](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/transformers/torch_extensions), and run the following: +Next, build the source of [torch extensions for Longformer ONNX exporting](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/transformers/torch_extensions) like the following: ``` +cd onnxruntime/python/tools/transformers/models/longformer/torch_extensions python setup.py install ``` -It will generate file like "build/lib.linux-x86_64-3.6/longformer_attention.cpython-36m-x86_64-linux-gnu.so" under the directory. +It will generate a PyTorch extension file like "build/lib.linux-x86_64-3.8/longformer_attention.cpython-38-x86_64-linux-gnu.so" under the directory. -Finally, use [convert_longformer_to_onnx](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/longformer/convert_longformer_to_onnx.py) to convert to ONNX model like the following: +Finally, convert longformer model to ONNX model like the following: ``` -python convert_longformer_to_onnx.py -m longformer-base-4096 +cd .. +python convert_to_onnx.py -m longformer-base-4096 ``` The exported ONNX model can only run in GPU right now. diff --git a/onnxruntime/python/tools/transformers/longformer/__init__.py b/onnxruntime/python/tools/transformers/models/longformer/__init__.py similarity index 100% rename from onnxruntime/python/tools/transformers/longformer/__init__.py rename to onnxruntime/python/tools/transformers/models/longformer/__init__.py diff --git a/onnxruntime/python/tools/transformers/longformer/benchmark_longformer.py b/onnxruntime/python/tools/transformers/models/longformer/benchmark_longformer.py similarity index 97% rename from onnxruntime/python/tools/transformers/longformer/benchmark_longformer.py rename to onnxruntime/python/tools/transformers/models/longformer/benchmark_longformer.py index 76431ee82e..5223521fe3 100644 --- a/onnxruntime/python/tools/transformers/longformer/benchmark_longformer.py +++ b/onnxruntime/python/tools/transformers/models/longformer/benchmark_longformer.py @@ -5,19 +5,17 @@ # -------------------------------------------------------------------------- # # This script run benchmark of latency or peak memory usage of Longformer model inference. +# Please run convert_to_onnx.py to get onnx model before running benchmark. # -# Please run convert_longformer_to_onnx.py to get onnx model before running this script. -# Tested with python 3.6, onnxruntime-gpu 1.7.0, PyTorch 1.7.1, transformers 4.3.2, CUDA 10.2. -# -# Example commands for exporting longformer base model in Linux or WSL: -# cd ../torch_extensions -# python setup.py install -# cd ../longformer -# python convert_longformer_to_onnx.py --model longformer-base-4096 --precision fp16 --optimize_onnx +# It is tested with python 3.8, onnxruntime-gpu 1.11.0, PyTorch 1.11.0, transformers 4.18.0, CUDA 11.3 like the following +# conda create -n gpu_env python=3.8 +# conda activate gpu_env +# pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113 +# pip3 install onnx transformers onnxruntime-gpu numpy sympy coloredlogs psutil py3nvml +# python benchmark_longformer.py # # When there is no parameter, all avaiable tests (memory & latency) will run on the longformer-base-4096 pretrained model. -# python benchmark_longformer.py -# + # Benchmark the latency (Exported onnx model is in the current directory): # python benchmark_longformer.py --model longformer-base-4096 --batch_sizes 1 --sequence_lengths 512 1024 2048 4096 --global_lengths 8 --onnx ./longformer-base-4096_fp16.onnx --validate_onnx -t 100 # @@ -41,7 +39,7 @@ import math from longformer_helper import LongformerHelper, PRETRAINED_LONGFORMER_MODELS -sys.path.append(os.path.join(os.path.dirname(__file__), '..')) +sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..')) import benchmark_helper diff --git a/onnxruntime/python/tools/transformers/longformer/convert_longformer_to_onnx.py b/onnxruntime/python/tools/transformers/models/longformer/convert_to_onnx.py similarity index 94% rename from onnxruntime/python/tools/transformers/longformer/convert_longformer_to_onnx.py rename to onnxruntime/python/tools/transformers/models/longformer/convert_to_onnx.py index d5aac3d852..02e9473ef4 100644 --- a/onnxruntime/python/tools/transformers/longformer/convert_longformer_to_onnx.py +++ b/onnxruntime/python/tools/transformers/models/longformer/convert_to_onnx.py @@ -7,13 +7,21 @@ # This script converts Longformer model from huggingface transformers 4.0 or later to ONNX. # Unlike normal ONNX model exporting, it will directly translate LongformerSelfAttention to the LongformerAttention operator in ONNX Runtime. # -# Before running this script, please run "python setup.py install" in ../torch_extensions under Linux with PyTorch installed. +# Before running this script, please run "python setup.py install" in ./torch_extensions under Linux with PyTorch installed. # Then you can update the path of longformer_attention.cpython-*.so and run this script in same environment. # -# It is tested in Ubuntu 18.04, python 3.6, PyTorch 1.7.1, transformers 4.3.0 or 4.3.2. +# It is tested in Ubuntu 18.04 with python 3.8, onnxruntime-gpu 1.11.0, PyTorch 1.9.0, transformers 4.18.0. +# Warning: Using newer version (1.10 or 1.11) of PyTorch might encounter issue in exporting, but they are fine for benchmarking. +# +# Example commands for exporting longformer base model in Linux: +# cd ./torch_extensions +# python setup.py install +# cd .. +# python convert_to_onnx.py --model longformer-base-4096 --precision fp16 --optimize_onnx +# # GPU is not needed for this script. You can run it in CPU. For --optimize_onnx, you can use either onnxruntime or onnxruntime-gpu package. # -# For inference of the onnx model, you will need onnxruntime-gpu 1.7.0 or above. +# For inference of the onnx model, you will need onnxruntime-gpu 1.7.0 or newer version. import sys import os @@ -27,7 +35,7 @@ from packaging import version from pathlib import Path from longformer_helper import LongformerHelper, PRETRAINED_LONGFORMER_MODELS -sys.path.append(os.path.join(os.path.dirname(__file__), '..')) +sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..')) from torch_onnx_export_helper import torch_onnx_export @@ -48,9 +56,9 @@ def my_longformer_attention(g, input, weight, bias, mask, global_weight, global_ # namespace is onnxruntime which is registered in longformer_attention.cpp register_custom_op_symbolic('onnxruntime::LongformerAttention', my_longformer_attention, 9) -# TODO: search the directory to find correct output filename of "python setup.py install" when python version is not 3.6 +# TODO: search the directory to find correct output filename of "python setup.py install" when python version is not 3.8 torch.ops.load_library( - r'../torch_extensions/build/lib.linux-x86_64-3.6/longformer_attention.cpython-36m-x86_64-linux-gnu.so') + r'./torch_extensions/build/lib.linux-x86_64-3.8/longformer_attention.cpython-38-x86_64-linux-gnu.so') def parse_arguments(): @@ -231,7 +239,7 @@ def export_longformer(model, onnx_model_path, export_padding): torch_onnx_export(model, example_inputs, onnx_model_path, - opset_version=11, + opset_version=12, input_names=["input_ids", "attention_mask", "global_attention_mask"], output_names=["last_state", "pooler"], dynamic_axes={ diff --git a/onnxruntime/python/tools/transformers/longformer/generate_test_data.py b/onnxruntime/python/tools/transformers/models/longformer/generate_test_data.py similarity index 99% rename from onnxruntime/python/tools/transformers/longformer/generate_test_data.py rename to onnxruntime/python/tools/transformers/models/longformer/generate_test_data.py index 857f530b43..f0f185b8ed 100644 --- a/onnxruntime/python/tools/transformers/longformer/generate_test_data.py +++ b/onnxruntime/python/tools/transformers/models/longformer/generate_test_data.py @@ -13,7 +13,7 @@ import random from pathlib import Path from onnx import ModelProto, TensorProto, numpy_helper -sys.path.append(os.path.join(os.path.dirname(__file__), '..')) +sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..')) from onnx_model import OnnxModel from bert_test_data import fake_input_ids_data, fake_input_mask_data, output_test_data diff --git a/onnxruntime/python/tools/transformers/longformer/longformer_helper.py b/onnxruntime/python/tools/transformers/models/longformer/longformer_helper.py similarity index 100% rename from onnxruntime/python/tools/transformers/longformer/longformer_helper.py rename to onnxruntime/python/tools/transformers/models/longformer/longformer_helper.py diff --git a/onnxruntime/python/tools/transformers/torch_extensions/longformer_attention.cpp b/onnxruntime/python/tools/transformers/models/longformer/torch_extensions/longformer_attention.cpp similarity index 100% rename from onnxruntime/python/tools/transformers/torch_extensions/longformer_attention.cpp rename to onnxruntime/python/tools/transformers/models/longformer/torch_extensions/longformer_attention.cpp diff --git a/onnxruntime/python/tools/transformers/torch_extensions/setup.py b/onnxruntime/python/tools/transformers/models/longformer/torch_extensions/setup.py similarity index 100% rename from onnxruntime/python/tools/transformers/torch_extensions/setup.py rename to onnxruntime/python/tools/transformers/models/longformer/torch_extensions/setup.py diff --git a/setup.py b/setup.py index 744ccd7031..62086b1419 100644 --- a/setup.py +++ b/setup.py @@ -319,7 +319,8 @@ packages = [ 'onnxruntime.quantization.operators', 'onnxruntime.quantization.CalTableFlatBuffers', 'onnxruntime.transformers', - 'onnxruntime.transformers.longformer', + 'onnxruntime.transformers.models.t5', + 'onnxruntime.transformers.models.longformer', ] requirements_file = "requirements.txt"