From ae22bdaefe2a0d1cc8d997d87ba5d88b2d8eafd1 Mon Sep 17 00:00:00 2001 From: James Wu Date: Sun, 10 Mar 2024 20:53:32 -0700 Subject: [PATCH] Update torchbench commit pin, add sam_fast benchmark (#121420) After this, the sam_fast benchmark can now be run in the pytorch repo: ``` SEGMENT_ANYTHING_FAST_USE_FLASH_4=0 benchmarks/dynamo/torchbench.py --inference --amp --performance --backend=inductor --explain --only sam_fast ``` sam_fast is designed for inference only, with cuda and amp on. The code adds these restrictions to the benchmark. Pull Request resolved: https://github.com/pytorch/pytorch/pull/121420 Approved by: https://github.com/oulgen, https://github.com/msaroufim --- .github/ci_commit_pins/torchbench.txt | 2 +- .../aot_eager_torchbench_inference.csv | 4 ++++ .../aot_inductor_torchbench_inference.csv | 4 ++++ .../dynamo_eager_torchbench_inference.csv | 4 ++++ .../ci_expected_accuracy/inductor_torchbench_inference.csv | 4 ++++ benchmarks/dynamo/common.py | 1 + benchmarks/dynamo/torchbench.py | 7 +++++-- benchmarks/dynamo/torchbench.yaml | 4 ++++ 8 files changed, 27 insertions(+), 3 deletions(-) diff --git a/.github/ci_commit_pins/torchbench.txt b/.github/ci_commit_pins/torchbench.txt index 22e3bc7d040..3df9dd6cf80 100644 --- a/.github/ci_commit_pins/torchbench.txt +++ b/.github/ci_commit_pins/torchbench.txt @@ -1 +1 @@ -1ef0a39e13872e4cf810c430ae4c87e46938f2ba +d6015d42d9a1834bc7595c4bd6852562fb80b30b diff --git a/benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_inference.csv index 3267cca071c..09b5510daac 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_inference.csv @@ -310,6 +310,10 @@ sam,pass,0 +sam_fast,pass,0 + + + shufflenet_v2_x1_0,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/aot_inductor_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/aot_inductor_torchbench_inference.csv index ce759f92460..b3bb399f754 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/aot_inductor_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/aot_inductor_torchbench_inference.csv @@ -282,6 +282,10 @@ sam,fail_to_run,0 +sam_fast,fail_to_run,0 + + + shufflenet_v2_x1_0,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_inference.csv index 3267cca071c..09b5510daac 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_inference.csv @@ -310,6 +310,10 @@ sam,pass,0 +sam_fast,pass,0 + + + shufflenet_v2_x1_0,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv index 928ddfb5592..bd93461957a 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv @@ -310,6 +310,10 @@ sam,pass,0 +sam_fast,pass,0 + + + shufflenet_v2_x1_0,pass,0 diff --git a/benchmarks/dynamo/common.py b/benchmarks/dynamo/common.py index f6076cd5668..fd3fd7e6c9b 100644 --- a/benchmarks/dynamo/common.py +++ b/benchmarks/dynamo/common.py @@ -3483,6 +3483,7 @@ def run(runner, args, original_dir=None): "Wav2Vec2ForCTC", "Wav2Vec2ForPreTraining", "sam", + "sam_fast", "resnet50_quantized_qat", "mobilenet_v2_quantized_qat", }: diff --git a/benchmarks/dynamo/torchbench.py b/benchmarks/dynamo/torchbench.py index 1537def8570..6dc6dd22ee6 100755 --- a/benchmarks/dynamo/torchbench.py +++ b/benchmarks/dynamo/torchbench.py @@ -229,6 +229,11 @@ class TorchBenchmarkRunner(BenchmarkRunner): if part: extra_args += ["--part", part] + # sam_fast only runs with amp + if model_name == "sam_fast": + self.args.amp = True + self.setup_amp() + if model_name == "vision_maskrcnn" and is_training: # Output of vision_maskrcnn model is a list of bounding boxes, # sorted on the basis of their scores. This makes accuracy @@ -259,7 +264,6 @@ class TorchBenchmarkRunner(BenchmarkRunner): extra_args=extra_args, ) model, example_inputs = benchmark.get_module() - # Models that must be in train mode while training if is_training and ( not use_eval_mode or model_name in self._config["only_training"] @@ -269,7 +273,6 @@ class TorchBenchmarkRunner(BenchmarkRunner): model.eval() gc.collect() batch_size = benchmark.batch_size - # Torchbench has quite different setup for yolov3, so directly passing # the right example_inputs if model_name == "yolov3": diff --git a/benchmarks/dynamo/torchbench.yaml b/benchmarks/dynamo/torchbench.yaml index 240881bb126..d0ee3b95291 100644 --- a/benchmarks/dynamo/torchbench.yaml +++ b/benchmarks/dynamo/torchbench.yaml @@ -94,6 +94,7 @@ slow: non_deterministic: # https://github.com/pytorch/pytorch/issues/98355 - mobilenet_v3_large + - sam_fast dtype: @@ -173,6 +174,8 @@ skip: # timeout - sam # model is CUDA only + - sam_fast + # model is CUDA only - llama_v2_7b_16h # flaky - stable_diffusion @@ -197,6 +200,7 @@ skip: - llama - llama_v2_7b_16h - simple_gpt + - sam_fast # Model's DEFAULT_TRAIN_BSIZE is not implemented - cm3leon_generate - hf_T5_generate