onnxruntime/onnxruntime/python/tools/transformers/dev_benchmark.cmd
Tianlei Wu 975d3dffcf
Update bert benchmark: replace deprecated API (#22611)
### Description
(1) tokenizer.max_model_input_sizes was deprecated. Use
tokenizer.model_max_length to replace it.
(2) onnx opset updated to 16 instead of 11/12 for models.
(3) Update a few comments related to torch installation.
(4) Test gpu instead of cpu in dev_benchmark.cmd.

### Motivation and Context
Update bert benchmark script so that it can run with latest huggingface
transformers package.
2024-10-28 13:24:17 -07:00

162 lines
5.4 KiB
Batchfile

@echo off
REM Run benchmark in Windows for developing purpose. For official benchmark, please use run_benchmark.sh.
REM Settings are different from run_benchmark.sh: no cli, batch and sequence, input counts, average over 100, no fp16, less models etc.
REM Please install PyTorch (see https://pytorch.org/) before running this benchmark.
REM When use_package=true, you need not copy other files to run benchmarks except this sh file.
REM Otherwise, it will use python script (*.py) files in this directory.
set use_package=false
REM only need once
set run_install=false
REM Engines to test
set run_ort=true
set run_torch=false
set run_torchscript=false
REM Devices to test.
REM Attention: You cannot run both CPU and GPU at the same time: gpu need onnxruntime-gpu, and CPU need onnxruntime.
set run_gpu_fp32=true
set run_gpu_fp16=true
set run_cpu_fp32=false
set run_cpu_int8=false
set average_over=1000
REM Enable optimizer (use script instead of OnnxRuntime for graph optimization)
set use_optimizer=true
set batch_sizes=1
set sequence_length=8 128
REM Number of inputs (input_ids, token_type_ids, attention_mask) for ONNX model.
REM Note that different input count might lead to different performance
set input_counts=3
REM Pretrained transformers models can be a subset of: bert-base-cased roberta-base gpt2 distilgpt2 distilbert-base-uncased
set models_to_test=bert-base-cased
REM If you have multiple GPUs, you can choose one GPU for test. Here is an example to use the second GPU:
REM set CUDA_VISIBLE_DEVICES=1
REM This script will generate a logs file with a list of commands used in tests.
>benchmark.log echo echo ort=%run_ort% torch=%run_torch% torchscript=%run_torchscript% gpu_fp32=%run_gpu_fp32% gpu_fp16=%run_gpu_fp16% cpu=%run_cpu% optimizer=%use_optimizer% batch="%batch_sizes%" sequence="%sequence_length%" models="%models_to_test%" input_counts="%input_counts%"
REM Set it to false to skip testing. You can use it to dry run this script with the benchmark.log file.
set run_tests=true
REM -------------------------------------------
if %run_cpu_fp32% == true if %run_gpu_fp32% == true echo cannot test cpu and gpu at same time & goto :EOF
if %run_cpu_fp32% == true if %run_gpu_fp16% == true echo cannot test cpu and gpu at same time & goto :EOF
if %run_cpu_int8% == true if %run_gpu_fp32% == true echo cannot test cpu and gpu at same time & goto :EOF
if %run_cpu_int8% == true if %run_gpu_fp16% == true echo cannot test cpu and gpu at same time & goto :EOF
if %run_install% == true (
pip uninstall --yes onnxruntime
pip uninstall --yes onnxruntime-gpu
if %run_cpu_fp32% == true (
pip install onnxruntime
) else (
if %run_cpu_fp32% == true (
pip install onnxruntime
) else (
pip install --upgrade onnxruntime-gpu
)
)
pip install --upgrade transformers
)
if %use_package% == true (
echo Use onnxruntime.transformers.benchmark
set optimizer_script=-m onnxruntime.transformers.benchmark
) else (
set optimizer_script=benchmark.py
)
REM remove --overwrite can save some time if you did not update any of these: transformers, PyTorch or fusion logic in optimizer.
set onnx_export_options=-i %input_counts% -v -b 0 -f fusion.csv --overwrite
set benchmark_options=-b %batch_sizes% -s %sequence_length% -t %average_over% -f fusion.csv -r result.csv -d detail.csv
if %use_optimizer% == true (
set onnx_export_options=%onnx_export_options% -o by_script
set benchmark_options=%benchmark_options% -o by_script
) else (
set onnx_export_options=%onnx_export_options% -o by_ort
set benchmark_options=%benchmark_options% -o by_ort
)
if %run_gpu_fp32% == true (
for %%m in (%models_to_test%) DO (
echo Run GPU FP32 Benchmark on model %%m
call :RunOneTest %%m -g
)
)
if %run_gpu_fp16% == true (
for %%m in (%models_to_test%) DO (
echo Run GPU FP16 Benchmark on model %%m
call :RunOneTest %%m -g -p fp16
)
)
if %run_cpu_fp32% == true (
for %%m in (%models_to_test%) DO (
echo Run CPU FP32 Benchmark on model %%m
call :RunOneTest %%m
)
)
if %run_cpu_int8% == true (
for %%m in (%models_to_test%) DO (
echo Run CPU Int8 Benchmark on model %%m
call :RunOneTest %%m -p int8
)
)
if %run_tests%==false more benchmark.log
call :RemoveDuplicateLines result.csv
call :RemoveDuplicateLines fusion.csv
call :RemoveDuplicateLines detail.csv
echo Done!
goto :EOF
REM -----------------------------
:RunOneTest
if %run_ort% == true (
>>benchmark.log echo python %optimizer_script% -m %1 %onnx_export_options% %2 %3 %4
>>benchmark.log echo python %optimizer_script% -m %1 %benchmark_options% %2 %3 %4 -i %input_counts%
if %run_tests%==true (
python %optimizer_script% -m %1 %onnx_export_options% %2 %3 %4
python %optimizer_script% -m %1 %benchmark_options% %2 %3 %4 -i %input_counts%
)
)
if %run_torch% == true (
>>benchmark.log echo python %optimizer_script% -e torch -m %1 %benchmark_options% %2 %3 %4
if %run_tests%==true python %optimizer_script% -e torch -m %1 %benchmark_options% %2 %3 %4
)
if %run_torchscript% == true (
>>benchmark.log echo python %optimizer_script% -e torchscript -m %1 %benchmark_options% %2 %3 %4
if %run_tests%==true python %optimizer_script% -e torchscript -m %1 %benchmark_options% %2 %3 %4
)
goto :EOF
REM -----------------------------
:RemoveDuplicateLines
SET FileSize=%~z1
IF %FileSize% LSS 10 goto :EOF
python -c "import sys; lines=sys.stdin.readlines(); h=lines[0]; print(''.join([h]+list(sorted(set(lines)-set([h])))))" < %1 > sort_%1
FindStr "[^,]" sort_%1 > summary_%1
DEL sort_%1
goto :EOF