mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
The benchmark is failing with the following error
```
File "/var/lib/jenkins/workspace/benchmarks/gpt_fast/benchmark.py", line 333, in <module>
main(output_file=args.output, only_model=args.only)
File "/var/lib/jenkins/workspace/benchmarks/gpt_fast/benchmark.py", line 308, in main
lst = func(device)
File "/var/lib/jenkins/workspace/benchmarks/gpt_fast/benchmark.py", line 66, in run_mlp_layer_norm_gelu
us_per_iter = benchmarker.benchmark(compiled_mod, (x,)) * 1000
File "/opt/conda/envs/py_3.9/lib/python3.9/site-packages/torch/_inductor/runtime/benchmarking.py", line 39, in wrapper
return fn(self, *args, **kwargs)
TypeError: benchmark() missing 1 required positional argument: 'fn_kwargs'
```
An example error is https://github.com/pytorch/pytorch/actions/runs/12862761823/job/35858912555
I also assign `oncall: pt2` as the owner of this job going forward.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/145235
Approved by: https://github.com/nmacchioni
This commit is contained in:
parent
2cffbff7da
commit
eb553ae3cf
3 changed files with 6 additions and 4 deletions
|
|
@ -26,7 +26,7 @@ jobs:
|
|||
# Use metal host for benchmark jobs
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "inductor-micro-benchmark-cpu-x86", shard: 1, num_shards: 1, runner: "linux.24xl.spr-metal" },
|
||||
{ config: "inductor-micro-benchmark-cpu-x86", shard: 1, num_shards: 1, runner: "linux.24xl.spr-metal", owners: ["oncall:pt2"] },
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ jobs:
|
|||
cuda-arch-list: '8.0'
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "inductor-micro-benchmark", shard: 1, num_shards: 1, runner: "linux.aws.a100" },
|
||||
{ config: "inductor-micro-benchmark", shard: 1, num_shards: 1, runner: "linux.aws.a100", owners: ["oncall:pt2"] },
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ def run_mlp_layer_norm_gelu(device: str = "cuda"):
|
|||
for _ in range(WARMUP_ITER):
|
||||
compiled_mod(x)
|
||||
|
||||
us_per_iter = benchmarker.benchmark(compiled_mod, (x,)) * 1000
|
||||
us_per_iter = benchmarker.benchmark(compiled_mod, (x,), {}) * 1000
|
||||
flops_utilization += us_per_iter * flops / 1e9 / A100_40G_BF16_TFLOPS
|
||||
|
||||
flops_utilization = flops_utilization / len(input_shapes)
|
||||
|
|
@ -102,7 +102,7 @@ def run_layer_norm(device: str = "cuda"):
|
|||
for _ in range(WARMUP_ITER):
|
||||
compiled_mod(x)
|
||||
|
||||
us_per_iter = benchmarker.benchmark(compiled_mod, (x,)) * 1000
|
||||
us_per_iter = benchmarker.benchmark(compiled_mod, (x,), {}) * 1000
|
||||
memory_bandwidth += (1e6 / us_per_iter) * 2 * BS * D * dtype.itemsize / 1e9
|
||||
|
||||
memory_bandwidth = memory_bandwidth / len(input_shapes)
|
||||
|
|
@ -155,6 +155,7 @@ def run_gather_gemv(device: str = "cuda"):
|
|||
score_idxs,
|
||||
x,
|
||||
),
|
||||
{},
|
||||
)
|
||||
* 1000
|
||||
)
|
||||
|
|
@ -207,6 +208,7 @@ def run_gemv(device: str = "cuda"):
|
|||
W,
|
||||
x,
|
||||
),
|
||||
{},
|
||||
)
|
||||
* 1000
|
||||
)
|
||||
|
|
|
|||
Loading…
Reference in a new issue