Record PR time benchmark results in JSON format (#140493)

I'm trying to make this benchmark results available on OSS benchmark database, so that people can query it from outside. The first step is to also record the results in the JSON format compatible with the database schema defined in https://github.com/pytorch/test-infra/pull/5839. Existing CSV files remain unchanged. ### Testing The JSON results are uploaded as artifacts to S3 https://github.com/pytorch/pytorch/actions/runs/11809725848/job/32901411180#step:26:13, for example https://gha-artifacts.s3.amazonaws.com/pytorch/pytorch/11809725848/1/artifact/test-jsons-test-pr_time_benchmarks-1-1-linux.g4dn.metal.nvidia.gpu_32901411180.zip Pull Request resolved: https://github.com/pytorch/pytorch/pull/140493 Approved by: https://github.com/laithsakka
2026-05-14 20:57:59 +00:00 · 2024-11-20 18:54:01 +00:00 · 2024-11-20 18:54:01 +00:00 · 1a7055cb73
commit 1a7055cb73
parent 4acd56eb53
9 changed files with 150 additions and 31 deletions
--- a/.github/workflows/_linux-test.yml
+++ b/.github/workflows/_linux-test.yml
@ -330,6 +330,14 @@ jobs:
          test_config: ${{ matrix.config }}
          job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}

+      - name: Upload the benchmark results
+        uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
+        with:
+          benchmark-results-dir: test/test-reports
+          dry-run: false
+          schema-version: v3
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+
      - name: Print remaining test logs
        shell: bash
        if: always() && steps.test.conclusion
--- a/benchmarks/dynamo/pr_time_benchmarks/benchmark_base.py
+++ b/benchmarks/dynamo/pr_time_benchmarks/benchmark_base.py
@ -1,5 +1,7 @@
 import csv
 import gc
+import json
+import os
 from abc import ABC, abstractmethod

 from fbscribelogger import make_scribe_logger
@ -65,6 +67,22 @@ class BenchmarkBase(ABC):
    # number of iterations used to run when collecting instruction_count or compile_time_instruction_count.
    _num_iterations = 5

+    def __init__(
+        self,
+        category: str,
+        device: str,
+        backend: str = "",
+        mode: str = "",
+        dynamic=None,
+    ):
+        # These individual attributes are used to support different filters on the
+        # dashboard later
+        self._category = category
+        self._device = device
+        self._backend = backend
+        self._mode = mode  # Training or inference
+        self._dynamic = dynamic
+
    def with_iterations(self, value):
        self._num_iterations = value
        return self
@ -80,6 +98,21 @@ class BenchmarkBase(ABC):
    def name(self):
        return ""

+    def backend(self):
+        return self._backend
+
+    def mode(self):
+        return self._mode
+
+    def category(self):
+        return self._category
+
+    def device(self):
+        return self._device
+
+    def is_dynamic(self):
+        return self._dynamic
+
    def description(self):
        return ""

@ -134,6 +167,46 @@ class BenchmarkBase(ABC):
        finally:
            gc.enable()

+    def _write_to_json(self, output_dir: str):
+        """
+        Write the result into JSON format, so that it can be uploaded to the benchmark database
+        to be displayed on OSS dashboard. The JSON format is defined at
+        https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database
+        """
+        records = []
+        for entry in self.results:
+            metric_name = entry[1]
+            value = entry[2]
+
+            if not metric_name or value is None:
+                continue
+
+            records.append(
+                {
+                    "benchmark": {
+                        "name": "pr_time_benchmarks",
+                        "mode": self.mode(),
+                        "extra_info": {
+                            "is_dynamic": self.is_dynamic(),
+                            "device": self.device(),
+                            "description": self.description(),
+                        },
+                    },
+                    "model": {
+                        "name": self.name(),
+                        "type": self.category(),
+                        "backend": self.backend(),
+                    },
+                    "metric": {
+                        "name": metric_name,
+                        "benchmark_values": [value],
+                    },
+                }
+            )
+
+        with open(os.path.join(output_dir, f"{self.name()}.json"), "w") as f:
+            json.dump(records, f)
+
    def append_results(self, path):
        with open(path, "a", newline="") as csvfile:
            # Create a writer object
@ -142,6 +215,10 @@ class BenchmarkBase(ABC):
            for entry in self.results:
                writer.writerow(entry)

+        # TODO (huydhn) This requires the path to write to, so it needs to be in the same place
+        # as the CSV writer for now
+        self._write_to_json(os.path.dirname(os.path.abspath(path)))
+
    def print(self):
        for entry in self.results:
            print(f"{entry[0]},{entry[1]},{entry[2]}")
--- a/benchmarks/dynamo/pr_time_benchmarks/benchmarks/add_loop.py
+++ b/benchmarks/dynamo/pr_time_benchmarks/benchmarks/add_loop.py
@ -8,15 +8,18 @@ from torch._inductor.utils import fresh_inductor_cache

 class Benchmark(BenchmarkBase):
    def __init__(self, backend, dynamic=False, is_gpu=False):
-        self._backend = backend
-        self._dynamic = dynamic
-        self._device = "cuda" if is_gpu else "cpu"
+        super().__init__(
+            category="add_loop",
+            backend=backend,
+            device="cuda" if is_gpu else "cpu",
+            dynamic=dynamic,
+        )

    def name(self):
-        prefix = f"add_loop_{self._backend}"
-        if self._dynamic:
+        prefix = f"{self.category()}_{self.backend()}"
+        if self.is_dynamic():
            prefix += "_dynamic"
-        if self._device == "cuda":
+        if self.device() == "cuda":
            prefix += "_gpu"
        return prefix

@ -24,14 +27,18 @@ class Benchmark(BenchmarkBase):
        return "a loop over 100 add node"

    def _prepare_once(self):
-        self.a = torch.ones(1000, device=self._device)
-        self.b = torch.torch.ones(1000, device=self._device)
+        self.a = torch.ones(1000, device=self.device())
+        self.b = torch.torch.ones(1000, device=self.device())

    def _prepare(self):
        torch._dynamo.reset()

    def _work(self):
-        @torch.compile(backend=self._backend, fullgraph=True, dynamic=self._dynamic)
+        @torch.compile(
+            backend=self.backend(),
+            fullgraph=True,
+            dynamic=self.is_dynamic(),
+        )
        def f(a, b):
            result = a.clone()
            for i in range(1000):
--- a/benchmarks/dynamo/pr_time_benchmarks/benchmarks/aotdispatcher.py
+++ b/benchmarks/dynamo/pr_time_benchmarks/benchmarks/aotdispatcher.py
@ -10,19 +10,20 @@ class Benchmark(BenchmarkBase):
    def __init__(self, *, training, subclass):
        self._training = training
        self._subclass = subclass
-        self._device = "cpu"
+        super().__init__(
+            category="aotdispatcher",
+            backend="aot_eager_decomp_partition",
+            device="cpu",
+            mode="training" if self._training else "inference",
+        )

    def name(self):
-        prefix = "aotdispatcher"
-        if self._training:
-            prefix += "_training"
-        else:
-            prefix += "_inference"
+        prefix = f"{self.category()}_{self.mode()}"
        if self._subclass:
            prefix += "_subclass"
        else:
            prefix += "_nosubclass"
-        if self._device == "cpu":
+        if self.device() == "cpu":
            prefix += "_cpu"
        return prefix

@ -31,7 +32,7 @@ class Benchmark(BenchmarkBase):

    def _prepare_once(self):
        _args = [
-            torch.ones(100, requires_grad=self._training, device=self._device)
+            torch.ones(100, requires_grad=self._training, device=self.device())
            for _ in range(100)
        ]
        if self._subclass:
@ -45,7 +46,7 @@ class Benchmark(BenchmarkBase):
        torch._dynamo.reset()

    def _work(self):
-        @torch.compile(backend="aot_eager_decomp_partition", fullgraph=True)
+        @torch.compile(backend=self.backend(), fullgraph=True)
        def f(*args):
            outs = [torch.add(x, x) for x in args]
            return outs
--- a/benchmarks/dynamo/pr_time_benchmarks/benchmarks/aotdispatcher_partitioner.py
+++ b/benchmarks/dynamo/pr_time_benchmarks/benchmarks/aotdispatcher_partitioner.py
@ -6,8 +6,15 @@ import torch


 class Benchmark(BenchmarkBase):
+    def __init__(self):
+        super().__init__(
+            category="aotdispatcher_partitioner",
+            backend="aot_eager_decomp_partition",
+            device="cpu",
+        )
+
    def name(self):
-        return "aotdispatcher_partitioner_cpu"
+        return f"{self.category()}_{self.device()}"

    def description(self):
        return "partitioner benchmark 1 input and 100 weights, mix of recompute and non-recompute ops"
@ -20,7 +27,7 @@ class Benchmark(BenchmarkBase):
        torch._dynamo.reset()

    def _work(self):
-        @torch.compile(backend="aot_eager_decomp_partition", fullgraph=True)
+        @torch.compile(backend=self.backend(), fullgraph=True)
        def f(inp, *weights):
            x = inp
            for w in weights:
--- a/benchmarks/dynamo/pr_time_benchmarks/benchmarks/basic_modules_benchmarks.py
+++ b/benchmarks/dynamo/pr_time_benchmarks/benchmarks/basic_modules_benchmarks.py
@ -24,15 +24,20 @@ class Benchmark(BenchmarkBase):
        self, ModuleClass, backend, is_gpu=False, dynamic=False, force_shape_pad=False
    ):
        self.ModuleClass = ModuleClass
-        self.backend = backend
        self._name = ModuleClass.__name__
        self._is_gpu = is_gpu
-        self._dynamic = dynamic
        self._force_shape_pad = force_shape_pad

+        super().__init__(
+            category="basic_modules",
+            backend=backend,
+            device="cuda" if self._is_gpu else "cpu",
+            dynamic=dynamic,
+        )
+
    def name(self):
-        prefix = f"basic_modules_{self._name}_{self.backend}"
-        if self._dynamic:
+        prefix = f"{self.category()}_{self._name}_{self.backend()}"
+        if self.is_dynamic():
            prefix += "_dynamic"
        if self._is_gpu:
            prefix += "_gpu"
@ -43,7 +48,7 @@ class Benchmark(BenchmarkBase):
    def _prepare_once(self):
        self.m = self.ModuleClass()
        torch.set_float32_matmul_precision("high")
-        self.input = torch.ones(10, device="cuda" if self._is_gpu else "cpu")
+        self.input = torch.ones(10, device=self.device())

    def _prepare(self):
        torch._dynamo.reset()
@ -52,7 +57,7 @@ class Benchmark(BenchmarkBase):
        with fresh_inductor_cache(), torch._inductor.config.patch(
            force_shape_pad=self._force_shape_pad
        ):
-            opt_m = torch.compile(backend=self.backend, dynamic=self._dynamic)(
+            opt_m = torch.compile(backend=self.backend(), dynamic=self.is_dynamic())(
                self.m.cuda() if self._is_gpu else self.m
            )
            opt_m(self.input)
--- a/benchmarks/dynamo/pr_time_benchmarks/benchmarks/sum_floordiv.py
+++ b/benchmarks/dynamo/pr_time_benchmarks/benchmarks/sum_floordiv.py
@ -8,8 +8,11 @@ import torch
 class Benchmark(BenchmarkBase):
    N = 100

+    def __init__(self):
+        super().__init__(category="sum_floordiv", backend="export", device="cpu")
+
    def name(self):
-        return "sum_floordiv_regression"
+        return f"{self.category()}_regression"

    def description(self):
        return "information at https://github.com/pytorch/pytorch/issues/134133"
--- a/benchmarks/dynamo/pr_time_benchmarks/benchmarks/symint_sum.py
+++ b/benchmarks/dynamo/pr_time_benchmarks/benchmarks/symint_sum.py
@ -9,14 +9,18 @@ class Benchmark(BenchmarkBase):
    N = 200

    def __init__(self, use_loop=False):
-        super().__init__()
        self.use_loop = use_loop
+        super().__init__(
+            category="symint_sum",
+            backend="inductor",
+            device="cpu",
+        )

    def name(self):
        if self.use_loop:
-            return "symint_sum_loop"
+            return f"{self.category()}_loop"

-        return "symint_sum"
+        return self.category()

    def description(self):
        return "see https://docs.google.com/document/d/11xJXl1etSmefUxPiVyk885e0Dl-4o7QwxYcPiMIo2iY/edit"
--- a/benchmarks/dynamo/pr_time_benchmarks/benchmarks/update_hint_benchmark.py
+++ b/benchmarks/dynamo/pr_time_benchmarks/benchmarks/update_hint_benchmark.py
@ -8,8 +8,15 @@ import torch
 class Benchmark(BenchmarkBase):
    N = 20

+    def __init__(self):
+        super().__init__(
+            category="update_hint",
+            backend="inductor",
+            device="cpu",
+        )
+
    def name(self):
-        return "update_hint_regression"
+        return f"{self.category()}_regression"

    def description(self):
        return "information at https://github.com/pytorch/pytorch/pull/129893"