From ae22bdaefe2a0d1cc8d997d87ba5d88b2d8eafd1 Mon Sep 17 00:00:00 2001
From: James Wu <jjwu@meta.com>
Date: Sun, 10 Mar 2024 20:53:32 -0700
Subject: [PATCH] Update torchbench commit pin, add sam_fast benchmark
 (#121420)

After this, the sam_fast benchmark can now be run in the pytorch repo:
```
SEGMENT_ANYTHING_FAST_USE_FLASH_4=0 benchmarks/dynamo/torchbench.py --inference --amp --performance --backend=inductor --explain --only sam_fast
```

sam_fast is designed for inference only, with cuda and amp on. The code adds these restrictions to the benchmark.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/121420
Approved by: https://github.com/oulgen, https://github.com/msaroufim
---
 .github/ci_commit_pins/torchbench.txt                      | 2 +-
 .../aot_eager_torchbench_inference.csv                     | 4 ++++
 .../aot_inductor_torchbench_inference.csv                  | 4 ++++
 .../dynamo_eager_torchbench_inference.csv                  | 4 ++++
 .../ci_expected_accuracy/inductor_torchbench_inference.csv | 4 ++++
 benchmarks/dynamo/common.py                                | 1 +
 benchmarks/dynamo/torchbench.py                            | 7 +++++--
 benchmarks/dynamo/torchbench.yaml                          | 4 ++++
 8 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/.github/ci_commit_pins/torchbench.txt b/.github/ci_commit_pins/torchbench.txt
index 22e3bc7d040..3df9dd6cf80 100644
--- a/.github/ci_commit_pins/torchbench.txt
+++ b/.github/ci_commit_pins/torchbench.txt
@@ -1 +1 @@
-1ef0a39e13872e4cf810c430ae4c87e46938f2ba
+d6015d42d9a1834bc7595c4bd6852562fb80b30b
diff --git a/benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_inference.csv
index 3267cca071c..09b5510daac 100644
--- a/benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_inference.csv
@@ -310,6 +310,10 @@ sam,pass,0
 
 
 
+sam_fast,pass,0
+
+
+
 shufflenet_v2_x1_0,pass,0
 
 
diff --git a/benchmarks/dynamo/ci_expected_accuracy/aot_inductor_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/aot_inductor_torchbench_inference.csv
index ce759f92460..b3bb399f754 100644
--- a/benchmarks/dynamo/ci_expected_accuracy/aot_inductor_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/aot_inductor_torchbench_inference.csv
@@ -282,6 +282,10 @@ sam,fail_to_run,0
 
 
 
+sam_fast,fail_to_run,0
+
+
+
 shufflenet_v2_x1_0,pass,0
 
 
diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_inference.csv
index 3267cca071c..09b5510daac 100644
--- a/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_inference.csv
@@ -310,6 +310,10 @@ sam,pass,0
 
 
 
+sam_fast,pass,0
+
+
+
 shufflenet_v2_x1_0,pass,0
 
 
diff --git a/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv
index 928ddfb5592..bd93461957a 100644
--- a/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv
@@ -310,6 +310,10 @@ sam,pass,0
 
 
 
+sam_fast,pass,0
+
+
+
 shufflenet_v2_x1_0,pass,0
 
 
diff --git a/benchmarks/dynamo/common.py b/benchmarks/dynamo/common.py
index f6076cd5668..fd3fd7e6c9b 100644
--- a/benchmarks/dynamo/common.py
+++ b/benchmarks/dynamo/common.py
@@ -3483,6 +3483,7 @@ def run(runner, args, original_dir=None):
             "Wav2Vec2ForCTC",
             "Wav2Vec2ForPreTraining",
             "sam",
+            "sam_fast",
             "resnet50_quantized_qat",
             "mobilenet_v2_quantized_qat",
         }:
diff --git a/benchmarks/dynamo/torchbench.py b/benchmarks/dynamo/torchbench.py
index 1537def8570..6dc6dd22ee6 100755
--- a/benchmarks/dynamo/torchbench.py
+++ b/benchmarks/dynamo/torchbench.py
@@ -229,6 +229,11 @@ class TorchBenchmarkRunner(BenchmarkRunner):
         if part:
             extra_args += ["--part", part]
 
+        # sam_fast only runs with amp
+        if model_name == "sam_fast":
+            self.args.amp = True
+            self.setup_amp()
+
         if model_name == "vision_maskrcnn" and is_training:
             # Output of vision_maskrcnn model is a list of bounding boxes,
             # sorted on the basis of their scores. This makes accuracy
@@ -259,7 +264,6 @@ class TorchBenchmarkRunner(BenchmarkRunner):
                 extra_args=extra_args,
             )
         model, example_inputs = benchmark.get_module()
-
         # Models that must be in train mode while training
         if is_training and (
             not use_eval_mode or model_name in self._config["only_training"]
@@ -269,7 +273,6 @@ class TorchBenchmarkRunner(BenchmarkRunner):
             model.eval()
         gc.collect()
         batch_size = benchmark.batch_size
-
         # Torchbench has quite different setup for yolov3, so directly passing
         # the right example_inputs
         if model_name == "yolov3":
diff --git a/benchmarks/dynamo/torchbench.yaml b/benchmarks/dynamo/torchbench.yaml
index 240881bb126..d0ee3b95291 100644
--- a/benchmarks/dynamo/torchbench.yaml
+++ b/benchmarks/dynamo/torchbench.yaml
@@ -94,6 +94,7 @@ slow:
 non_deterministic:
   # https://github.com/pytorch/pytorch/issues/98355
   - mobilenet_v3_large
+  - sam_fast
 
 
 dtype:
@@ -173,6 +174,8 @@ skip:
       # timeout
       - sam
       # model is CUDA only
+      - sam_fast
+      # model is CUDA only
       - llama_v2_7b_16h
       # flaky
       - stable_diffusion
@@ -197,6 +200,7 @@ skip:
       - llama
       - llama_v2_7b_16h
       - simple_gpt
+      - sam_fast
       # Model's DEFAULT_TRAIN_BSIZE is not implemented
       - cm3leon_generate
       - hf_T5_generate