Update torchbench commit pin, add sam_fast benchmark (#121420)

After this, the sam_fast benchmark can now be run in the pytorch repo:
```
SEGMENT_ANYTHING_FAST_USE_FLASH_4=0 benchmarks/dynamo/torchbench.py --inference --amp --performance --backend=inductor --explain --only sam_fast
```

sam_fast is designed for inference only, with cuda and amp on. The code adds these restrictions to the benchmark.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/121420
Approved by: https://github.com/oulgen, https://github.com/msaroufim
This commit is contained in:
James Wu 2024-03-10 20:53:32 -07:00 committed by PyTorch MergeBot
parent dccc1ca839
commit ae22bdaefe
8 changed files with 27 additions and 3 deletions

View file

@ -1 +1 @@
1ef0a39e13872e4cf810c430ae4c87e46938f2ba
d6015d42d9a1834bc7595c4bd6852562fb80b30b

View file

@ -310,6 +310,10 @@ sam,pass,0
sam_fast,pass,0
shufflenet_v2_x1_0,pass,0

1 name accuracy graph_breaks
310
311
312
313
314
315
316
317
318
319

View file

@ -282,6 +282,10 @@ sam,fail_to_run,0
sam_fast,fail_to_run,0
shufflenet_v2_x1_0,pass,0

1 name accuracy graph_breaks
282
283
284
285
286
287
288
289
290
291

View file

@ -310,6 +310,10 @@ sam,pass,0
sam_fast,pass,0
shufflenet_v2_x1_0,pass,0

1 name accuracy graph_breaks
310
311
312
313
314
315
316
317
318
319

View file

@ -310,6 +310,10 @@ sam,pass,0
sam_fast,pass,0
shufflenet_v2_x1_0,pass,0

1 name accuracy graph_breaks
310
311
312
313
314
315
316
317
318
319

View file

@ -3483,6 +3483,7 @@ def run(runner, args, original_dir=None):
"Wav2Vec2ForCTC",
"Wav2Vec2ForPreTraining",
"sam",
"sam_fast",
"resnet50_quantized_qat",
"mobilenet_v2_quantized_qat",
}:

View file

@ -229,6 +229,11 @@ class TorchBenchmarkRunner(BenchmarkRunner):
if part:
extra_args += ["--part", part]
# sam_fast only runs with amp
if model_name == "sam_fast":
self.args.amp = True
self.setup_amp()
if model_name == "vision_maskrcnn" and is_training:
# Output of vision_maskrcnn model is a list of bounding boxes,
# sorted on the basis of their scores. This makes accuracy
@ -259,7 +264,6 @@ class TorchBenchmarkRunner(BenchmarkRunner):
extra_args=extra_args,
)
model, example_inputs = benchmark.get_module()
# Models that must be in train mode while training
if is_training and (
not use_eval_mode or model_name in self._config["only_training"]
@ -269,7 +273,6 @@ class TorchBenchmarkRunner(BenchmarkRunner):
model.eval()
gc.collect()
batch_size = benchmark.batch_size
# Torchbench has quite different setup for yolov3, so directly passing
# the right example_inputs
if model_name == "yolov3":

View file

@ -94,6 +94,7 @@ slow:
non_deterministic:
# https://github.com/pytorch/pytorch/issues/98355
- mobilenet_v3_large
- sam_fast
dtype:
@ -173,6 +174,8 @@ skip:
# timeout
- sam
# model is CUDA only
- sam_fast
# model is CUDA only
- llama_v2_7b_16h
# flaky
- stable_diffusion
@ -197,6 +200,7 @@ skip:
- llama
- llama_v2_7b_16h
- simple_gpt
- sam_fast
# Model's DEFAULT_TRAIN_BSIZE is not implemented
- cm3leon_generate
- hf_T5_generate