move longformer and t5 to models subdirectory (#11161)

* move longformer scripts to models subdirectory * Copy transformers\models\t5 to python package as well
2026-05-14 20:48:00 +00:00 · 2022-04-09 22:35:14 -07:00 · 2022-04-09 22:35:14 -07:00 · 00b595e389
commit 00b595e389
parent f24523e0eb
10 changed files with 50 additions and 33 deletions
--- a/cmake/onnxruntime_python.cmake
+++ b/cmake/onnxruntime_python.cmake
@ -364,8 +364,11 @@ file(GLOB onnxruntime_python_quantization_cal_table_flatbuffers_src CONFIGURE_DE
 file(GLOB onnxruntime_python_transformers_src CONFIGURE_DEPENDS
    "${ONNXRUNTIME_ROOT}/python/tools/transformers/*.py"
 )
-file(GLOB onnxruntime_python_transformers_longformer_src CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_ROOT}/python/tools/transformers/longformer/*.py"
+file(GLOB onnxruntime_python_transformers_models_longformer_src CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_ROOT}/python/tools/transformers/models/longformer/*.py"
+)
+file(GLOB onnxruntime_python_transformers_models_t5_src CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_ROOT}/python/tools/transformers/models/t5/*.py"
 )
 file(GLOB onnxruntime_python_datasets_srcs CONFIGURE_DEPENDS
    "${ONNXRUNTIME_ROOT}/python/datasets/*.py"
@ -416,7 +419,9 @@ add_custom_command(
  COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/tools/ort_format_model
  COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/tools/ort_format_model/ort_flatbuffers_py
  COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers
-  COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/longformer
+  COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/models
+  COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/models/longformer
+  COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/models/t5
  COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/quantization
  COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/quantization/operators
  COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/quantization/CalTableFlatBuffers
@ -497,8 +502,11 @@ add_custom_command(
      ${onnxruntime_python_transformers_src}
      $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/
  COMMAND ${CMAKE_COMMAND} -E copy
-      ${onnxruntime_python_transformers_longformer_src}
-      $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/longformer/
+      ${onnxruntime_python_transformers_models_longformer_src}
+      $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/models/longformer/
+  COMMAND ${CMAKE_COMMAND} -E copy
+      ${onnxruntime_python_transformers_models_t5_src}
+      $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/models/t5/
  COMMAND ${CMAKE_COMMAND} -E copy
      ${REPO_ROOT}/VERSION_NUMBER
      $<TARGET_FILE_DIR:${build_output_target}>
--- a/onnxruntime/python/tools/transformers/README.md
+++ b/onnxruntime/python/tools/transformers/README.md
@ -38,22 +38,24 @@ The tool will also verify whether the ONNX model and corresponding PyTorch model

 ### Longformer Model conversion

-Requirement: Linux OS (For example Ubuntu 18.04 or 20.04) and a python environment like the following:
+Requirement: Linux OS (For example Ubuntu 18.04 or 20.04) and a python environment with PyTorch 1.9.* like the following:
 ```
-conda create -n longformer python=3.6
+conda create -n longformer python=3.8
 conda activate longformer
-conda install pytorch torchvision torchaudio cpuonly -c pytorch
-pip install onnx transformers onnxruntime
+pip install torch==1.9.1+cpu torchvision==0.10.1+cpu torchaudio==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html
+pip install onnx transformers==4.18.0 onnxruntime numpy
 ```
-Next, get the source of [torch extensions for Longformer exporting](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/transformers/torch_extensions), and run the following:
+Next, build the source of [torch extensions for Longformer ONNX exporting](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/transformers/torch_extensions) like the following:
 ```
+cd onnxruntime/python/tools/transformers/models/longformer/torch_extensions
 python setup.py install
 ```
-It will generate file like "build/lib.linux-x86_64-3.6/longformer_attention.cpython-36m-x86_64-linux-gnu.so" under the directory.
+It will generate a PyTorch extension file like "build/lib.linux-x86_64-3.8/longformer_attention.cpython-38-x86_64-linux-gnu.so" under the directory.

-Finally, use [convert_longformer_to_onnx](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/longformer/convert_longformer_to_onnx.py) to convert to ONNX model like the following:
+Finally, convert longformer model to ONNX model like the following:
 ```
-python convert_longformer_to_onnx.py -m longformer-base-4096
+cd ..
+python convert_to_onnx.py -m longformer-base-4096
 ```

 The exported ONNX model can only run in GPU right now.
--- a/onnxruntime/python/tools/transformers/models/longformer/init.py
+++ b/onnxruntime/python/tools/transformers/models/longformer/init.py
--- a/onnxruntime/python/tools/transformers/models/longformer/benchmark_longformer.py
+++ b/onnxruntime/python/tools/transformers/models/longformer/benchmark_longformer.py
@ -5,19 +5,17 @@
 # --------------------------------------------------------------------------
 #
 # This script run benchmark of latency or peak memory usage of Longformer model inference.
+# Please run convert_to_onnx.py to get onnx model before running benchmark.
 #
-# Please run convert_longformer_to_onnx.py to get onnx model before running this script.
-# Tested with python 3.6, onnxruntime-gpu 1.7.0, PyTorch 1.7.1, transformers 4.3.2, CUDA 10.2.
-#
-# Example commands for exporting longformer base model in Linux or WSL:
-#   cd ../torch_extensions
-#   python setup.py install
-#   cd ../longformer
-#   python convert_longformer_to_onnx.py --model longformer-base-4096 --precision fp16 --optimize_onnx
+# It is tested with python 3.8, onnxruntime-gpu 1.11.0, PyTorch 1.11.0, transformers 4.18.0, CUDA 11.3 like the following
+#   conda create -n gpu_env python=3.8
+#   conda activate gpu_env
+#   pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113
+#   pip3 install onnx transformers onnxruntime-gpu numpy sympy coloredlogs psutil py3nvml
+#   python benchmark_longformer.py
 #
 # When there is no parameter, all avaiable tests (memory & latency) will run on the longformer-base-4096 pretrained model.
-#    python benchmark_longformer.py
-#
+
 # Benchmark the latency (Exported onnx model is in the current directory):
 #   python benchmark_longformer.py --model longformer-base-4096 --batch_sizes 1 --sequence_lengths 512 1024 2048 4096 --global_lengths 8 --onnx ./longformer-base-4096_fp16.onnx --validate_onnx -t 100
 #
@ -41,7 +39,7 @@ import math

 from longformer_helper import LongformerHelper, PRETRAINED_LONGFORMER_MODELS

-sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
 import benchmark_helper


--- a/onnxruntime/python/tools/transformers/longformer/convert_longformer_to_onnx.py
+++ b/onnxruntime/python/tools/transformers/longformer/convert_longformer_to_onnx.py
@ -7,13 +7,21 @@
 # This script converts Longformer model from huggingface transformers 4.0 or later to ONNX.
 # Unlike normal ONNX model exporting, it will directly translate LongformerSelfAttention to the LongformerAttention operator in ONNX Runtime.
 #
-# Before running this script, please run "python setup.py install" in ../torch_extensions under Linux with PyTorch installed.
+# Before running this script, please run "python setup.py install" in ./torch_extensions under Linux with PyTorch installed.
 # Then you can update the path of longformer_attention.cpython-*.so and run this script in same environment.
 #
-# It is tested in Ubuntu 18.04, python 3.6, PyTorch 1.7.1, transformers 4.3.0 or 4.3.2.
+# It is tested in Ubuntu 18.04 with python 3.8, onnxruntime-gpu 1.11.0, PyTorch 1.9.0, transformers 4.18.0.
+# Warning: Using newer version (1.10 or 1.11) of PyTorch might encounter issue in exporting, but they are fine for benchmarking.
+# 
+# Example commands for exporting longformer base model in Linux:
+#   cd ./torch_extensions
+#   python setup.py install
+#   cd ..
+#   python convert_to_onnx.py --model longformer-base-4096 --precision fp16 --optimize_onnx
+#
 # GPU is not needed for this script. You can run it in CPU. For --optimize_onnx, you can use either onnxruntime or onnxruntime-gpu package.
 #
-# For inference of the onnx model, you will need onnxruntime-gpu 1.7.0 or above.
+# For inference of the onnx model, you will need onnxruntime-gpu 1.7.0 or newer version.

 import sys
 import os
@ -27,7 +35,7 @@ from packaging import version
 from pathlib import Path
 from longformer_helper import LongformerHelper, PRETRAINED_LONGFORMER_MODELS

-sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
 from torch_onnx_export_helper import torch_onnx_export


@ -48,9 +56,9 @@ def my_longformer_attention(g, input, weight, bias, mask, global_weight, global_
 # namespace is onnxruntime which is registered in longformer_attention.cpp
 register_custom_op_symbolic('onnxruntime::LongformerAttention', my_longformer_attention, 9)

-# TODO: search the directory to find correct output filename of "python setup.py install" when python version is not 3.6
+# TODO: search the directory to find correct output filename of "python setup.py install" when python version is not 3.8
 torch.ops.load_library(
-    r'../torch_extensions/build/lib.linux-x86_64-3.6/longformer_attention.cpython-36m-x86_64-linux-gnu.so')
+    r'./torch_extensions/build/lib.linux-x86_64-3.8/longformer_attention.cpython-38-x86_64-linux-gnu.so')


 def parse_arguments():
@ -231,7 +239,7 @@ def export_longformer(model, onnx_model_path, export_padding):
    torch_onnx_export(model,
                      example_inputs,
                      onnx_model_path,
-                      opset_version=11,
+                      opset_version=12,
                      input_names=["input_ids", "attention_mask", "global_attention_mask"],
                      output_names=["last_state", "pooler"],
                      dynamic_axes={
--- a/onnxruntime/python/tools/transformers/models/longformer/generate_test_data.py
+++ b/onnxruntime/python/tools/transformers/models/longformer/generate_test_data.py
@ -13,7 +13,7 @@ import random
 from pathlib import Path
 from onnx import ModelProto, TensorProto, numpy_helper

-sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
 from onnx_model import OnnxModel
 from bert_test_data import fake_input_ids_data, fake_input_mask_data, output_test_data

--- a/onnxruntime/python/tools/transformers/models/longformer/longformer_helper.py
+++ b/onnxruntime/python/tools/transformers/models/longformer/longformer_helper.py
--- a/onnxruntime/python/tools/transformers/models/longformer/torch_extensions/longformer_attention.cpp
+++ b/onnxruntime/python/tools/transformers/models/longformer/torch_extensions/longformer_attention.cpp
--- a/onnxruntime/python/tools/transformers/models/longformer/torch_extensions/setup.py
+++ b/onnxruntime/python/tools/transformers/models/longformer/torch_extensions/setup.py
--- a/setup.py
+++ b/setup.py
@ -319,7 +319,8 @@ packages = [
    'onnxruntime.quantization.operators',
    'onnxruntime.quantization.CalTableFlatBuffers',
    'onnxruntime.transformers',
-    'onnxruntime.transformers.longformer',
+    'onnxruntime.transformers.models.t5',
+    'onnxruntime.transformers.models.longformer',
 ]

 requirements_file = "requirements.txt"