mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-07-05 04:17:53 +00:00
### Description This PR adds PyTorch 2.0 as an option when running the ORT transformer benchmarking script. ### Motivation and Context PyTorch released [PyTorch 2.0](https://pytorch.org/get-started/pytorch-2.0/) in the nightly binaries and a stable release of PyTorch 2.0 is expected in March 2023.
226 lines
8 KiB
Bash
226 lines
8 KiB
Bash
# -------------------------------------------------------------------------
|
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
# Licensed under the MIT License. See License.txt in the project root for
|
|
# license information.
|
|
# --------------------------------------------------------------------------
|
|
# This measures the performance of OnnxRuntime, PyTorch and TorchScript on transformer models.
|
|
# Please install PyTorch (see https://pytorch.org/) before running this benchmark. Like the following:
|
|
# GPU: conda install pytorch torchvision cudatoolkit=11.0 -c pytorch
|
|
# CPU: conda install pytorch torchvision cpuonly -c pytorch
|
|
# To use torch2, please install the nightly PyTorch by replacing pytorch with pytorch-nightly.
|
|
|
|
# When use_package=true, you need not copy other files to run benchmarks except this sh file.
|
|
# Otherwise, it will use python script (*.py) files in this directory.
|
|
use_package=true
|
|
|
|
# only need once
|
|
run_install=true
|
|
|
|
# Engines to test.
|
|
# To run ort_trt, you need to build and install the onnxruntime-gpu-tensorrt package on your own
|
|
run_ort=true
|
|
run_ort_trt=false
|
|
run_torch=false
|
|
run_torch2=false
|
|
run_torchscript=true
|
|
run_tensorflow=false
|
|
|
|
# Onnx model source (default is from pytorch, set export_onnx_from_tf=true to convert from tensorflow model)
|
|
export_onnx_from_tf=false
|
|
|
|
# Devices to test (You can run either CPU or GPU, but not both: gpu need onnxruntime-gpu, and CPU need onnxruntime).
|
|
run_gpu_fp32=true
|
|
run_gpu_fp16=true
|
|
run_cpu_fp32=false
|
|
run_cpu_int8=false
|
|
|
|
average_over=1000
|
|
# CPU takes longer time to run, only run 100 inferences to get average latency.
|
|
if [ "$run_cpu_fp32" = true ] || [ "$run_cpu_int8" = true ]; then
|
|
average_over=100
|
|
fi
|
|
|
|
# Enable optimizer (use script instead of OnnxRuntime for graph optimization)
|
|
use_optimizer=true
|
|
|
|
# Manually set layer number as needed(e.g 16)
|
|
force_layer_number=false
|
|
layer_number=16
|
|
|
|
# Batch Sizes and Sequence Lengths
|
|
batch_sizes="1 4"
|
|
sequence_lengths="8 16 32 64 128 256 512 1024"
|
|
|
|
# Number of inputs (input_ids, token_type_ids, attention_mask) for ONNX model.
|
|
# Not that different input count might lead to different performance
|
|
# Here we only test one input (input_ids) for fair comparison with PyTorch.
|
|
input_counts=1
|
|
|
|
# Pretrained transformers models can be a subset of: bert-base-cased roberta-base gpt2 distilgpt2 distilbert-base-uncased
|
|
models_to_test="bert-base-cased roberta-base distilbert-base-uncased"
|
|
|
|
# If you have mutliple GPUs, you can choose one GPU for test. Here is an example to use the second GPU:
|
|
# export CUDA_VISIBLE_DEVICES=1
|
|
|
|
# This script will generate a logs file with a list of commands used in tests.
|
|
echo echo "ort=$run_ort torch=$run_torch torch2=$run_torch2 torchscript=$run_torchscript tensorflow=$run_tensorflow gpu_fp32=$run_gpu_fp32 gpu_fp16=$run_gpu_fp16 cpu=$run_cpu optimizer=$use_optimizer batch=$batch_sizes sequence=$sequence_length models=$models_to_test" >> benchmark.log
|
|
|
|
# Set it to false to skip testing. You can use it to dry run this script with the log file.
|
|
run_tests=true
|
|
|
|
# Directory for downloading pretrained models.
|
|
cache_dir="./cache_models"
|
|
|
|
# Directory for ONNX models
|
|
onnx_dir="./onnx_models"
|
|
|
|
# -------------------------------------------
|
|
if [ "$run_cpu_fp32" = true ] || [ "$run_cpu_int8" = true ]; then
|
|
if [ "$run_gpu_fp32" = true ] ; then
|
|
echo "cannot test cpu and gpu at same time"
|
|
exit 1
|
|
fi
|
|
if [ "$run_gpu_fp16" = true ] ; then
|
|
echo "cannot test cpu and gpu at same time"
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
|
|
if [ "$run_install" = true ] ; then
|
|
pip uninstall --yes ort-nightly ort-gpu-nightly
|
|
pip uninstall --yes onnxruntime
|
|
pip uninstall --yes onnxruntime-gpu
|
|
if [ "$run_cpu_fp32" = true ] || [ "$run_cpu_int8" = true ]; then
|
|
pip install onnxruntime
|
|
else
|
|
pip install onnxruntime-gpu
|
|
fi
|
|
pip install --upgrade onnx coloredlogs packaging psutil py3nvml onnxconverter_common numpy transformers sympy
|
|
fi
|
|
|
|
if [ "$use_package" = true ] ; then
|
|
echo "Use onnxruntime.transformers.benchmark"
|
|
benchmark_script="-m onnxruntime.transformers.benchmark"
|
|
else
|
|
benchmark_script="benchmark.py"
|
|
fi
|
|
|
|
onnx_export_options="-i $input_counts -v -b 0 --overwrite -f fusion.csv -c $cache_dir --onnx_dir $onnx_dir"
|
|
benchmark_options="-b $batch_sizes -s $sequence_lengths -t $average_over -f fusion.csv -r result.csv -d detail.csv -c $cache_dir --onnx_dir $onnx_dir"
|
|
|
|
if [ "$export_onnx_from_tf" = true ] ; then
|
|
onnx_export_options="$onnx_export_options --model_source tf"
|
|
benchmark_options="$benchmark_options --model_source tf"
|
|
fi
|
|
|
|
if [ "$use_optimizer" = true ] ; then
|
|
onnx_export_options="$onnx_export_options -o by_script"
|
|
benchmark_options="$benchmark_options -o by_script"
|
|
else
|
|
onnx_export_options="$onnx_export_options -o by_ort"
|
|
benchmark_options="$benchmark_options -o by_ort"
|
|
fi
|
|
|
|
if [ "$force_layer_number" = true ] ; then
|
|
onnx_export_options="$onnx_export_options --force_num_layers $layer_number"
|
|
benchmark_options="$benchmark_options --force_num_layers $layer_number"
|
|
fi
|
|
|
|
# -------------------------------------------
|
|
run_one_test() {
|
|
if [ "$run_ort" = true ] ; then
|
|
echo python $benchmark_script -m $1 $onnx_export_options $2 $3 $4 >> benchmark.log
|
|
echo python $benchmark_script -m $1 $benchmark_options $2 $3 $4 -i $input_counts >> benchmark.log
|
|
if [ "$run_tests" = true ] ; then
|
|
python $benchmark_script -m $1 $onnx_export_options $2 $3 $4
|
|
python $benchmark_script -m $1 $benchmark_options $2 $3 $4 -i $input_counts
|
|
fi
|
|
fi
|
|
|
|
if [ "$run_ort_trt" = true ] ; then
|
|
trt_options="--provider tensorrt"
|
|
echo python $benchmark_script -m $1 $onnx_export_options $trt_options $2 $3 $4 >> benchmark.log
|
|
echo python $benchmark_script -m $1 $benchmark_options $trt_options $2 $3 $4 -i $input_counts >> benchmark.log
|
|
if [ "$run_tests" = true ] ; then
|
|
python $benchmark_script -m $1 $onnx_export_options $trt_options $2 $3 $4
|
|
python $benchmark_script -m $1 $benchmark_options $trt_options $2 $3 $4 -i $input_counts
|
|
fi
|
|
fi
|
|
|
|
if [ "$run_torch" = true ] ; then
|
|
echo python $benchmark_script -e torch -m $1 $benchmark_options $2 $3 $4 >> benchmark.log
|
|
if [ "$run_tests" = true ] ; then
|
|
python $benchmark_script -e torch -m $1 $benchmark_options $2 $3 $4
|
|
fi
|
|
fi
|
|
|
|
if [ "$run_torch2" = true ] ; then
|
|
echo python $benchmark_script -e torch2 -m $1 $benchmark_options $2 $3 $4 >> benchmark.log
|
|
if [ "$run_tests" = true ] ; then
|
|
python $benchmark_script -e torch2 -m $1 $benchmark_options $2 $3 $4
|
|
fi
|
|
fi
|
|
|
|
if [ "$run_torchscript" = true ] ; then
|
|
echo python $benchmark_script -e torchscript -m $1 $benchmark_options $2 $3 $4 >> benchmark.log
|
|
if [ "$run_tests" = true ] ; then
|
|
python $benchmark_script -e torchscript -m $1 $benchmark_options $2 $3 $4
|
|
fi
|
|
fi
|
|
|
|
if [ "$run_tensorflow" = true ] ; then
|
|
echo python $benchmark_script -e tensorflow -m $1 $benchmark_options $2 $3 $4 >> benchmark.log
|
|
if [ "$run_tests" = true ] ; then
|
|
python $benchmark_script -e tensorflow -m $1 $benchmark_options $2 $3 $4
|
|
fi
|
|
fi
|
|
}
|
|
|
|
# -------------------------------------------
|
|
if [ "$run_gpu_fp32" = true ] ; then
|
|
if [ "$run_ort_trt" = true ] ; then
|
|
export ORT_TENSORRT_FP16_ENABLE=0
|
|
fi
|
|
for m in $models_to_test
|
|
do
|
|
echo Run GPU FP32 Benchmark on model ${m}
|
|
run_one_test "${m}" -g
|
|
done
|
|
fi
|
|
|
|
if [ "$run_gpu_fp16" = true ] ; then
|
|
if [ "$run_ort_trt" = true ] ; then
|
|
export ORT_TENSORRT_FP16_ENABLE=1
|
|
fi
|
|
for m in $models_to_test
|
|
do
|
|
echo Run GPU FP16 Benchmark on model ${m}
|
|
run_one_test "${m}" -g -p fp16
|
|
done
|
|
fi
|
|
|
|
if [ "$run_cpu_fp32" = true ] ; then
|
|
for m in $models_to_test
|
|
do
|
|
echo Run CPU Benchmark on model ${m}
|
|
run_one_test "${m}"
|
|
done
|
|
fi
|
|
|
|
if [ "$run_cpu_int8" = true ] ; then
|
|
for m in $models_to_test
|
|
do
|
|
echo Run CPU Benchmark on model ${m}
|
|
run_one_test "${m}" -p int8
|
|
done
|
|
fi
|
|
|
|
if [ "run_tests" = false ] ; then
|
|
more $log_file
|
|
fi
|
|
|
|
# Remove duplicated lines
|
|
awk '!x[$0]++' ./result.csv > summary_result.csv
|
|
awk '!x[$0]++' ./fusion.csv > summary_fusion.csv
|
|
awk '!x[$0]++' ./detail.csv > summary_detail.csv
|