Update torchbench commit pin, add sam_fast benchmark (#121420)

After this, the sam_fast benchmark can now be run in the pytorch repo: ``` SEGMENT_ANYTHING_FAST_USE_FLASH_4=0 benchmarks/dynamo/torchbench.py --inference --amp --performance --backend=inductor --explain --only sam_fast ``` sam_fast is designed for inference only, with cuda and amp on. The code adds these restrictions to the benchmark. Pull Request resolved: https://github.com/pytorch/pytorch/pull/121420 Approved by: https://github.com/oulgen, https://github.com/msaroufim
2026-05-14 20:57:59 +00:00 · 2024-03-10 20:53:32 -07:00 · 2024-03-10 20:53:32 -07:00 · ae22bdaefe
commit ae22bdaefe
parent dccc1ca839
8 changed files with 27 additions and 3 deletions
--- a/.github/ci_commit_pins/torchbench.txt
+++ b/.github/ci_commit_pins/torchbench.txt
@ -1 +1 @@
-1ef0a39e13872e4cf810c430ae4c87e46938f2ba
+d6015d42d9a1834bc7595c4bd6852562fb80b30b
--- a/benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_inference.csv
@ -310,6 +310,10 @@ sam,pass,0



+sam_fast,pass,0
+
+
+
 shufflenet_v2_x1_0,pass,0


--- a/benchmarks/dynamo/ci_expected_accuracy/aot_inductor_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/aot_inductor_torchbench_inference.csv
@ -282,6 +282,10 @@ sam,fail_to_run,0



+sam_fast,fail_to_run,0
+
+
+
 shufflenet_v2_x1_0,pass,0


--- a/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_inference.csv
@ -310,6 +310,10 @@ sam,pass,0



+sam_fast,pass,0
+
+
+
 shufflenet_v2_x1_0,pass,0


--- a/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv
@ -310,6 +310,10 @@ sam,pass,0



+sam_fast,pass,0
+
+
+
 shufflenet_v2_x1_0,pass,0


--- a/benchmarks/dynamo/common.py
+++ b/benchmarks/dynamo/common.py
@ -3483,6 +3483,7 @@ def run(runner, args, original_dir=None):
            "Wav2Vec2ForCTC",
            "Wav2Vec2ForPreTraining",
            "sam",
+            "sam_fast",
            "resnet50_quantized_qat",
            "mobilenet_v2_quantized_qat",
        }:
--- a/benchmarks/dynamo/torchbench.py
+++ b/benchmarks/dynamo/torchbench.py
@ -229,6 +229,11 @@ class TorchBenchmarkRunner(BenchmarkRunner):
        if part:
            extra_args += ["--part", part]

+        # sam_fast only runs with amp
+        if model_name == "sam_fast":
+            self.args.amp = True
+            self.setup_amp()
+
        if model_name == "vision_maskrcnn" and is_training:
            # Output of vision_maskrcnn model is a list of bounding boxes,
            # sorted on the basis of their scores. This makes accuracy
@ -259,7 +264,6 @@ class TorchBenchmarkRunner(BenchmarkRunner):
                extra_args=extra_args,
            )
        model, example_inputs = benchmark.get_module()
-
        # Models that must be in train mode while training
        if is_training and (
            not use_eval_mode or model_name in self._config["only_training"]
@ -269,7 +273,6 @@ class TorchBenchmarkRunner(BenchmarkRunner):
            model.eval()
        gc.collect()
        batch_size = benchmark.batch_size
-
        # Torchbench has quite different setup for yolov3, so directly passing
        # the right example_inputs
        if model_name == "yolov3":
--- a/benchmarks/dynamo/torchbench.yaml
+++ b/benchmarks/dynamo/torchbench.yaml
@ -94,6 +94,7 @@ slow:
 non_deterministic:
  # https://github.com/pytorch/pytorch/issues/98355
  - mobilenet_v3_large
+  - sam_fast


 dtype:
@ -173,6 +174,8 @@ skip:
      # timeout
      - sam
      # model is CUDA only
+      - sam_fast
+      # model is CUDA only
      - llama_v2_7b_16h
      # flaky
      - stable_diffusion
@ -197,6 +200,7 @@ skip:
      - llama
      - llama_v2_7b_16h
      - simple_gpt
+      - sam_fast
      # Model's DEFAULT_TRAIN_BSIZE is not implemented
      - cm3leon_generate
      - hf_T5_generate