Record PR time benchmark results in JSON format (#140493)

I'm trying to make this benchmark results available on OSS benchmark database, so that people can query it from outside.  The first step is to also record the results in the JSON format compatible with the database schema defined in https://github.com/pytorch/test-infra/pull/5839.

Existing CSV files remain unchanged.

### Testing

The JSON results are uploaded as artifacts to S3 https://github.com/pytorch/pytorch/actions/runs/11809725848/job/32901411180#step:26:13, for example https://gha-artifacts.s3.amazonaws.com/pytorch/pytorch/11809725848/1/artifact/test-jsons-test-pr_time_benchmarks-1-1-linux.g4dn.metal.nvidia.gpu_32901411180.zip

Pull Request resolved: https://github.com/pytorch/pytorch/pull/140493
Approved by: https://github.com/laithsakka
This commit is contained in:
Huy Do 2024-11-20 18:54:01 +00:00 committed by PyTorch MergeBot
parent 4acd56eb53
commit 1a7055cb73
9 changed files with 150 additions and 31 deletions

View file

@ -330,6 +330,14 @@ jobs:
test_config: ${{ matrix.config }}
job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}
- name: Upload the benchmark results
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
with:
benchmark-results-dir: test/test-reports
dry-run: false
schema-version: v3
github-token: ${{ secrets.GITHUB_TOKEN }}
- name: Print remaining test logs
shell: bash
if: always() && steps.test.conclusion

View file

@ -1,5 +1,7 @@
import csv
import gc
import json
import os
from abc import ABC, abstractmethod
from fbscribelogger import make_scribe_logger
@ -65,6 +67,22 @@ class BenchmarkBase(ABC):
# number of iterations used to run when collecting instruction_count or compile_time_instruction_count.
_num_iterations = 5
def __init__(
self,
category: str,
device: str,
backend: str = "",
mode: str = "",
dynamic=None,
):
# These individual attributes are used to support different filters on the
# dashboard later
self._category = category
self._device = device
self._backend = backend
self._mode = mode # Training or inference
self._dynamic = dynamic
def with_iterations(self, value):
self._num_iterations = value
return self
@ -80,6 +98,21 @@ class BenchmarkBase(ABC):
def name(self):
return ""
def backend(self):
return self._backend
def mode(self):
return self._mode
def category(self):
return self._category
def device(self):
return self._device
def is_dynamic(self):
return self._dynamic
def description(self):
return ""
@ -134,6 +167,46 @@ class BenchmarkBase(ABC):
finally:
gc.enable()
def _write_to_json(self, output_dir: str):
"""
Write the result into JSON format, so that it can be uploaded to the benchmark database
to be displayed on OSS dashboard. The JSON format is defined at
https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database
"""
records = []
for entry in self.results:
metric_name = entry[1]
value = entry[2]
if not metric_name or value is None:
continue
records.append(
{
"benchmark": {
"name": "pr_time_benchmarks",
"mode": self.mode(),
"extra_info": {
"is_dynamic": self.is_dynamic(),
"device": self.device(),
"description": self.description(),
},
},
"model": {
"name": self.name(),
"type": self.category(),
"backend": self.backend(),
},
"metric": {
"name": metric_name,
"benchmark_values": [value],
},
}
)
with open(os.path.join(output_dir, f"{self.name()}.json"), "w") as f:
json.dump(records, f)
def append_results(self, path):
with open(path, "a", newline="") as csvfile:
# Create a writer object
@ -142,6 +215,10 @@ class BenchmarkBase(ABC):
for entry in self.results:
writer.writerow(entry)
# TODO (huydhn) This requires the path to write to, so it needs to be in the same place
# as the CSV writer for now
self._write_to_json(os.path.dirname(os.path.abspath(path)))
def print(self):
for entry in self.results:
print(f"{entry[0]},{entry[1]},{entry[2]}")

View file

@ -8,15 +8,18 @@ from torch._inductor.utils import fresh_inductor_cache
class Benchmark(BenchmarkBase):
def __init__(self, backend, dynamic=False, is_gpu=False):
self._backend = backend
self._dynamic = dynamic
self._device = "cuda" if is_gpu else "cpu"
super().__init__(
category="add_loop",
backend=backend,
device="cuda" if is_gpu else "cpu",
dynamic=dynamic,
)
def name(self):
prefix = f"add_loop_{self._backend}"
if self._dynamic:
prefix = f"{self.category()}_{self.backend()}"
if self.is_dynamic():
prefix += "_dynamic"
if self._device == "cuda":
if self.device() == "cuda":
prefix += "_gpu"
return prefix
@ -24,14 +27,18 @@ class Benchmark(BenchmarkBase):
return "a loop over 100 add node"
def _prepare_once(self):
self.a = torch.ones(1000, device=self._device)
self.b = torch.torch.ones(1000, device=self._device)
self.a = torch.ones(1000, device=self.device())
self.b = torch.torch.ones(1000, device=self.device())
def _prepare(self):
torch._dynamo.reset()
def _work(self):
@torch.compile(backend=self._backend, fullgraph=True, dynamic=self._dynamic)
@torch.compile(
backend=self.backend(),
fullgraph=True,
dynamic=self.is_dynamic(),
)
def f(a, b):
result = a.clone()
for i in range(1000):

View file

@ -10,19 +10,20 @@ class Benchmark(BenchmarkBase):
def __init__(self, *, training, subclass):
self._training = training
self._subclass = subclass
self._device = "cpu"
super().__init__(
category="aotdispatcher",
backend="aot_eager_decomp_partition",
device="cpu",
mode="training" if self._training else "inference",
)
def name(self):
prefix = "aotdispatcher"
if self._training:
prefix += "_training"
else:
prefix += "_inference"
prefix = f"{self.category()}_{self.mode()}"
if self._subclass:
prefix += "_subclass"
else:
prefix += "_nosubclass"
if self._device == "cpu":
if self.device() == "cpu":
prefix += "_cpu"
return prefix
@ -31,7 +32,7 @@ class Benchmark(BenchmarkBase):
def _prepare_once(self):
_args = [
torch.ones(100, requires_grad=self._training, device=self._device)
torch.ones(100, requires_grad=self._training, device=self.device())
for _ in range(100)
]
if self._subclass:
@ -45,7 +46,7 @@ class Benchmark(BenchmarkBase):
torch._dynamo.reset()
def _work(self):
@torch.compile(backend="aot_eager_decomp_partition", fullgraph=True)
@torch.compile(backend=self.backend(), fullgraph=True)
def f(*args):
outs = [torch.add(x, x) for x in args]
return outs

View file

@ -6,8 +6,15 @@ import torch
class Benchmark(BenchmarkBase):
def __init__(self):
super().__init__(
category="aotdispatcher_partitioner",
backend="aot_eager_decomp_partition",
device="cpu",
)
def name(self):
return "aotdispatcher_partitioner_cpu"
return f"{self.category()}_{self.device()}"
def description(self):
return "partitioner benchmark 1 input and 100 weights, mix of recompute and non-recompute ops"
@ -20,7 +27,7 @@ class Benchmark(BenchmarkBase):
torch._dynamo.reset()
def _work(self):
@torch.compile(backend="aot_eager_decomp_partition", fullgraph=True)
@torch.compile(backend=self.backend(), fullgraph=True)
def f(inp, *weights):
x = inp
for w in weights:

View file

@ -24,15 +24,20 @@ class Benchmark(BenchmarkBase):
self, ModuleClass, backend, is_gpu=False, dynamic=False, force_shape_pad=False
):
self.ModuleClass = ModuleClass
self.backend = backend
self._name = ModuleClass.__name__
self._is_gpu = is_gpu
self._dynamic = dynamic
self._force_shape_pad = force_shape_pad
super().__init__(
category="basic_modules",
backend=backend,
device="cuda" if self._is_gpu else "cpu",
dynamic=dynamic,
)
def name(self):
prefix = f"basic_modules_{self._name}_{self.backend}"
if self._dynamic:
prefix = f"{self.category()}_{self._name}_{self.backend()}"
if self.is_dynamic():
prefix += "_dynamic"
if self._is_gpu:
prefix += "_gpu"
@ -43,7 +48,7 @@ class Benchmark(BenchmarkBase):
def _prepare_once(self):
self.m = self.ModuleClass()
torch.set_float32_matmul_precision("high")
self.input = torch.ones(10, device="cuda" if self._is_gpu else "cpu")
self.input = torch.ones(10, device=self.device())
def _prepare(self):
torch._dynamo.reset()
@ -52,7 +57,7 @@ class Benchmark(BenchmarkBase):
with fresh_inductor_cache(), torch._inductor.config.patch(
force_shape_pad=self._force_shape_pad
):
opt_m = torch.compile(backend=self.backend, dynamic=self._dynamic)(
opt_m = torch.compile(backend=self.backend(), dynamic=self.is_dynamic())(
self.m.cuda() if self._is_gpu else self.m
)
opt_m(self.input)

View file

@ -8,8 +8,11 @@ import torch
class Benchmark(BenchmarkBase):
N = 100
def __init__(self):
super().__init__(category="sum_floordiv", backend="export", device="cpu")
def name(self):
return "sum_floordiv_regression"
return f"{self.category()}_regression"
def description(self):
return "information at https://github.com/pytorch/pytorch/issues/134133"

View file

@ -9,14 +9,18 @@ class Benchmark(BenchmarkBase):
N = 200
def __init__(self, use_loop=False):
super().__init__()
self.use_loop = use_loop
super().__init__(
category="symint_sum",
backend="inductor",
device="cpu",
)
def name(self):
if self.use_loop:
return "symint_sum_loop"
return f"{self.category()}_loop"
return "symint_sum"
return self.category()
def description(self):
return "see https://docs.google.com/document/d/11xJXl1etSmefUxPiVyk885e0Dl-4o7QwxYcPiMIo2iY/edit"

View file

@ -8,8 +8,15 @@ import torch
class Benchmark(BenchmarkBase):
N = 20
def __init__(self):
super().__init__(
category="update_hint",
backend="inductor",
device="cpu",
)
def name(self):
return "update_hint_regression"
return f"{self.category()}_regression"
def description(self):
return "information at https://github.com/pytorch/pytorch/pull/129893"