diff --git a/onnxruntime/python/tools/transformers/benchmark.py b/onnxruntime/python/tools/transformers/benchmark.py index e3062d6ff7..23f1be3eee 100644 --- a/onnxruntime/python/tools/transformers/benchmark.py +++ b/onnxruntime/python/tools/transformers/benchmark.py @@ -71,6 +71,7 @@ from onnx_exporter import ( export_onnx_model_from_tf, load_pretrained_model, ) +from packaging import version from quantize_helper import QuantizeHelper logger = logging.getLogger("") @@ -312,6 +313,7 @@ def run_pytorch( sequence_lengths, repeat_times, torchscript, + torch2, cache_dir, verbose, ): @@ -366,13 +368,15 @@ def run_pytorch( device=device, ) try: - inference = torch.jit.trace(model, input_ids) if torchscript else model + inference = ( + torch.jit.trace(model, input_ids) if torchscript else torch.compile(model) if torch2 else model + ) inference(input_ids) runtimes = timeit.repeat(lambda: inference(input_ids), repeat=repeat_times, number=1) result = { - "engine": "torchscript" if torchscript else "torch", + "engine": "torchscript" if torchscript else "torch2" if torch2 else "torch", "version": torch.__version__, "providers": "NA", "device": "cuda" if use_gpu else "cpu", @@ -597,7 +601,7 @@ def parse_arguments(): nargs="+", type=str, default=["onnxruntime"], - choices=["onnxruntime", "torch", "torchscript", "tensorflow"], + choices=["onnxruntime", "torch", "torch2", "torchscript", "tensorflow"], help="Engines to benchmark", ) @@ -773,10 +777,15 @@ def main(): logger.error("Creation of the directory %s failed" % args.cache_dir) enable_torch = "torch" in args.engines + enable_torch2 = "torch2" in args.engines enable_torchscript = "torchscript" in args.engines enable_onnxruntime = "onnxruntime" in args.engines enable_tensorflow = "tensorflow" in args.engines + if enable_torch2 and version.parse(torch.__version__) < version.parse("2.0.0"): + logger.error(f"PyTorch version must be >=2.0.0 and you are using {torch.__version__}") + return + config_modifier = ConfigModifier(args.force_num_layers) results = [] @@ -784,7 +793,7 @@ def main(): for num_threads in args.num_threads: torch.set_num_threads(num_threads) logger.debug(torch.__config__.parallel_info()) - if enable_torch or enable_torchscript: + if enable_torch or enable_torch2 or enable_torchscript: if args.input_counts != [1]: logger.warning("--input_counts is not implemented for torch or torchscript engine.") @@ -800,6 +809,7 @@ def main(): args.sequence_lengths, args.test_times, True, + False, args.cache_dir, args.verbose, ) @@ -816,6 +826,24 @@ def main(): args.sequence_lengths, args.test_times, False, + False, + args.cache_dir, + args.verbose, + ) + + if enable_torch2: + results += run_pytorch( + args.use_gpu, + args.models, + args.model_class, + config_modifier, + args.precision, + num_threads, + args.batch_sizes, + args.sequence_lengths, + args.test_times, + False, + True, args.cache_dir, args.verbose, ) diff --git a/onnxruntime/python/tools/transformers/run_benchmark.sh b/onnxruntime/python/tools/transformers/run_benchmark.sh index 2191ff245b..f0422839c1 100644 --- a/onnxruntime/python/tools/transformers/run_benchmark.sh +++ b/onnxruntime/python/tools/transformers/run_benchmark.sh @@ -7,6 +7,7 @@ # Please install PyTorch (see https://pytorch.org/) before running this benchmark. Like the following: # GPU: conda install pytorch torchvision cudatoolkit=11.0 -c pytorch # CPU: conda install pytorch torchvision cpuonly -c pytorch +# To use torch2, please install the nightly PyTorch by replacing pytorch with pytorch-nightly. # When use_package=true, you need not copy other files to run benchmarks except this sh file. # Otherwise, it will use python script (*.py) files in this directory. @@ -20,6 +21,7 @@ run_install=true run_ort=true run_ort_trt=false run_torch=false +run_torch2=false run_torchscript=true run_tensorflow=false @@ -61,7 +63,7 @@ models_to_test="bert-base-cased roberta-base distilbert-base-uncased" # export CUDA_VISIBLE_DEVICES=1 # This script will generate a logs file with a list of commands used in tests. -echo echo "ort=$run_ort torch=$run_torch torchscript=$run_torchscript tensorflow=$run_tensorflow gpu_fp32=$run_gpu_fp32 gpu_fp16=$run_gpu_fp16 cpu=$run_cpu optimizer=$use_optimizer batch=$batch_sizes sequence=$sequence_length models=$models_to_test" >> benchmark.log +echo echo "ort=$run_ort torch=$run_torch torch2=$run_torch2 torchscript=$run_torchscript tensorflow=$run_tensorflow gpu_fp32=$run_gpu_fp32 gpu_fp16=$run_gpu_fp16 cpu=$run_cpu optimizer=$use_optimizer batch=$batch_sizes sequence=$sequence_length models=$models_to_test" >> benchmark.log # Set it to false to skip testing. You can use it to dry run this script with the log file. run_tests=true @@ -153,6 +155,13 @@ run_one_test() { fi fi + if [ "$run_torch2" = true ] ; then + echo python $benchmark_script -e torch2 -m $1 $benchmark_options $2 $3 $4 >> benchmark.log + if [ "$run_tests" = true ] ; then + python $benchmark_script -e torch2 -m $1 $benchmark_options $2 $3 $4 + fi + fi + if [ "$run_torchscript" = true ] ; then echo python $benchmark_script -e torchscript -m $1 $benchmark_options $2 $3 $4 >> benchmark.log if [ "$run_tests" = true ] ; then