Upload MPS benchmark results (#141087)

This uploads the MPS benchmark results to benchmark database.  The data can then be queried, for example:

```
select benchmark, model, metric from oss_ci_benchmark_v3 where head_sha = '99a133116fee15aa1467165f2b209b37da53f189' and metric.name in ['eager_peak_mem', 'dynamo_peak_mem', 'speedup'] and model.name = 'BERT_pytorch'
```

I'm documenting the JSON format at https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database

### Testing

Locally,

```
PYTHONPATH=/Users/huydo/Storage/mine/benchmark python benchmarks/dynamo/torchbench.py --performance --only resnet152 --backend eager --training --devices mps --output test/test-reports/torchbench_training.csv
```

Workflow dispatch https://github.com/pytorch/pytorch/actions/runs/11927990520

Pull Request resolved: https://github.com/pytorch/pytorch/pull/141087
Approved by: https://github.com/malfet
This commit is contained in:
Huy Do 2024-11-20 18:18:21 +00:00 committed by PyTorch MergeBot
parent 1d8318df98
commit 4acd56eb53
2 changed files with 115 additions and 21 deletions

View file

@ -223,6 +223,14 @@ jobs:
use-gha: true
file-suffix: ${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}_${{ steps.get-job-id.outputs.job-id }}
- name: Upload the benchmark results
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
with:
benchmark-results-dir: test/test-reports
dry-run: false
schema-version: v3
github-token: ${{ secrets.GITHUB_TOKEN }}
- name: Clean up disk space
if: always()
continue-on-error: true

View file

@ -111,6 +111,11 @@ os.environ["KINETO_LOG_LEVEL"] = "5"
current_name = ""
current_device = ""
current_backend = ""
current_mode = ""
current_dtype = ""
current_quantization = ""
current_settings = None
current_onnx_compiler = ""
current_batch_size = None
output_filename = None
@ -356,10 +361,19 @@ def load_model_from_path(path_and_class_str):
return model, inputs
def output_csv(filename, headers, row):
def write_outputs(filename, headers, row):
"""
Write both CSV and JSON outputs using the original CSV output interface
"""
global disable_output
if disable_output:
return
output_csv(filename, headers, row)
output_json(filename, headers, row)
def output_csv(filename, headers, row):
if os.path.exists(filename):
with open(filename) as fd:
lines = list(csv.reader(fd)) or [[]]
@ -377,6 +391,56 @@ def output_csv(filename, headers, row):
writer.writerow(list(line) + ["0"] * (len(headers) - len(line)))
def output_json(filename, headers, row):
"""
Write the result into JSON format, so that it can be uploaded to the benchmark database
to be displayed on OSS dashboard. The JSON format is defined at
https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database
"""
origin = ""
if "torchbench" in filename:
origin = "torchbench"
elif "huggingface" in filename:
origin = "huggingface"
elif "timm_models" in filename:
origin = "timm_models"
extra_info = {
"device": current_device,
"quantization": current_quantization,
"batch_size": current_batch_size,
}
if current_settings:
extra_info.update(current_settings)
mapping_headers = {headers[i]: v for i, v in enumerate(row)}
with open(f"{os.path.splitext(filename)[0]}.json", "a") as f:
for header, value in mapping_headers.items():
# These headers are not metric names
if header in ("dev", "name", "batch_size"):
continue
record = {
"benchmark": {
"name": "TorchInductor",
"mode": current_mode,
"dtype": current_dtype,
"extra_info": extra_info,
},
"model": {
"name": current_name,
"type": "OSS model",
"backend": current_backend,
"origins": [origin],
},
"metric": {
"name": header,
"benchmark_values": [value],
},
}
print(json.dumps(record), file=f)
def get_suite_from_model_iter_fn(model_iter_fn):
# TODO: This is a bit of a hack
suite = None
@ -729,7 +793,7 @@ def coverage_experiment(args, model_iter_fn, model, example_inputs):
with profiler.prof:
frozen_model_iter_fn(model, example_inputs)
coverage_result = profiler.results()
output_csv(
write_outputs(
output_filename,
(
"dev",
@ -768,7 +832,7 @@ def recompile_profiler_experiment(args, model_iter_fn, model, example_inputs):
model_iter_fn
)
opt_model_iter_fn(model, example_inputs)
output_csv(
write_outputs(
output_filename, ["model", "profiler report"], [current_name, prof.report()]
)
met = prof.get_metrics()
@ -923,7 +987,7 @@ def latency_experiment_summary(suite_name, args, model, timings, **kwargs):
for k, v in kwargs["dynamo_stats"].items():
headers.append(k)
row.append(v)
output_csv(
write_outputs(
output_filename,
headers,
row,
@ -932,7 +996,7 @@ def latency_experiment_summary(suite_name, args, model, timings, **kwargs):
assert (
output_filename.find(".csv") > 0
), f"expected output_filename to be a .csv, but got {output_filename}"
output_csv(
write_outputs(
output_filename[:-4] + "_compilation_metrics.csv",
first_headers + c_headers,
first_fields + c_data,
@ -1092,7 +1156,7 @@ def speedup_experiment(args, model_iter_fn, model, example_inputs, **kwargs):
for k, v in kwargs["dynamo_stats"].items():
headers.append(k)
row.append(v)
output_csv(
write_outputs(
output_filename,
headers,
row,
@ -1101,7 +1165,7 @@ def speedup_experiment(args, model_iter_fn, model, example_inputs, **kwargs):
assert (
output_filename.find(".csv") > 0
), f"expected output_filename to be a .csv, but got {output_filename}"
output_csv(
write_outputs(
output_filename[:-4] + "_compilation_metrics.csv",
first_headers + c_headers,
first_fields + c_data,
@ -1177,7 +1241,7 @@ def speedup_experiment_ds(args, model_iter_fn, model, example_inputs):
]
)
)
output_csv(
write_outputs(
output_filename,
("dev", "name", "batch_size", "speedup mean", "speedup median", "speedup var"),
[
@ -1339,7 +1403,7 @@ def speedup_experiment_onnx(
row.append(kwargs["compilation_latency"])
row.append(kwargs["compression_ratio"])
output_csv(
write_outputs(
output_filename,
headers,
row,
@ -1348,7 +1412,7 @@ def speedup_experiment_onnx(
assert (
output_filename.find(".csv") > 0
), f"expected output_filename to be a .csv, but got {output_filename}"
output_csv(
write_outputs(
output_filename[:-4] + "_compilation_metrics.csv",
["dev", "name", "batch_size"] + headers,
[current_device, current_name, current_batch_size] + data,
@ -1422,7 +1486,7 @@ def baselines(models, model_iter_fn, example_inputs, args):
for s, p, m in zip(speedup, pvalue, [m for n, m in models[1:]])
]
)
output_csv(
write_outputs(
output_filename,
("dev", "name", "batch_size") + tuple(n for n, m in models[1:]),
[current_device, current_name, current_batch_size]
@ -1449,7 +1513,7 @@ def xla(args, model_iter_fn, model, example_inputs):
pvalue = ttest_ind(timings[:, 0], timings[:, 1]).pvalue
time_baseline, time_xla = np.median(timings, axis=0)
speedup = time_baseline / time_xla
output_csv(
write_outputs(
output_filename,
("dev", "name", "batch_size", "speedup", "time_baseline", "time_xla"),
[
@ -2245,7 +2309,7 @@ def optimize_onnx_ctx(
# `torch.onnx.dynamo_export` raises error that encloses diagnostics.
diagnostic_context = e.onnx_program.diagnostic_context
for parsed_error in parser.parse_diagnostic_context(diagnostic_context):
output_csv(
write_outputs(
output_error_filename, parsed_error.headers, parsed_error.row
)
if context.onnx_model is not None:
@ -2261,7 +2325,7 @@ def optimize_onnx_ctx(
cause_of_exception, diagnostics.RuntimeErrorWithDiagnostic
):
parsed_error = parser.parse_exception(cause_of_exception)
output_csv(
write_outputs(
output_error_filename, parsed_error.headers, parsed_error.row
)
raise
@ -2269,7 +2333,7 @@ def optimize_onnx_ctx(
# `torch.onnx.export` errors.
# ORT errors.
parsed_error = parser.parse_exception(e)
output_csv(output_error_filename, parsed_error.headers, parsed_error.row)
write_outputs(output_error_filename, parsed_error.headers, parsed_error.row)
raise
run_n_iterations_onnx.context = context
@ -2836,7 +2900,7 @@ class BenchmarkRunner:
headers.append(k)
fields.append(v)
output_csv(output_filename, headers, fields)
write_outputs(output_filename, headers, fields)
output_signpost(
dict(zip(o_headers, o_fields)),
@ -3125,7 +3189,7 @@ class BenchmarkRunner:
mean.item(),
div.item(),
]
output_csv(output_filename, headers, fields)
write_outputs(output_filename, headers, fields)
return tolerance_status
def run_performance_test_non_alternate(
@ -3588,7 +3652,7 @@ class BenchmarkRunner:
user_stack = add_double_quotes(
", ".join([str(x) for x in graph_break.user_stack])
)
output_csv(
write_outputs(
filename,
["model", "reason", "user_stack"],
[current_name, reason, user_stack],
@ -4271,7 +4335,7 @@ def write_csv_when_exception(args, name: str, status: str, device=None):
rows = [[device, name, placeholder_batch_size, 0.0] for device in devices]
for row in rows:
output_csv(output_filename, headers, row)
write_outputs(output_filename, headers, row)
def run(runner, args, original_dir=None):
@ -4466,6 +4530,11 @@ def run(runner, args, original_dir=None):
current_name, \
current_device, \
current_batch_size, \
current_backend, \
current_mode, \
current_dtype, \
current_quantization, \
current_settings, \
output_filename, \
disable_output, \
optimize_ctx, \
@ -4677,7 +4746,7 @@ def run(runner, args, original_dir=None):
for device in args.devices:
batch_size = runner.batch_size_finder(device, args.only)
print(args.only, batch_size)
output_csv(output_filename, [], [args.only, batch_size])
write_outputs(output_filename, [], [args.only, batch_size])
return
if args.export_profiler_trace:
@ -4818,6 +4887,23 @@ def run(runner, args, original_dir=None):
current_name = name
current_device = device
current_batch_size = batch_size
current_backend = args.backend
current_mode = (
"training" if args.training else "inference" if args.inference else ""
)
if args.float16:
current_dtype = "float16"
elif args.bfloat16:
current_dtype = "bfloat16"
elif args.float32:
current_dtype = "float32"
elif args.amp:
current_dtype = "amp"
else:
current_dtype = ""
current_quantization = args.quantization
# Keep the remaining of the settings
current_settings = vars(args)
set_model_name(name)
# Look for stuff that looks like batch size, and mark it dynamic.
@ -4881,7 +4967,7 @@ def run(runner, args, original_dir=None):
)
if args.generate_aot_autograd_stats:
stats_file = output_filename.split(".csv")[0] + "_stats.csv"
output_csv(
write_outputs(
stats_file,
("dev", "name", "batch_size", "total_aot_graphs", "ok_aot_graphs"),
[