mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
Upload MPS benchmark results (#141087)
This uploads the MPS benchmark results to benchmark database. The data can then be queried, for example: ``` select benchmark, model, metric from oss_ci_benchmark_v3 where head_sha = '99a133116fee15aa1467165f2b209b37da53f189' and metric.name in ['eager_peak_mem', 'dynamo_peak_mem', 'speedup'] and model.name = 'BERT_pytorch' ``` I'm documenting the JSON format at https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database ### Testing Locally, ``` PYTHONPATH=/Users/huydo/Storage/mine/benchmark python benchmarks/dynamo/torchbench.py --performance --only resnet152 --backend eager --training --devices mps --output test/test-reports/torchbench_training.csv ``` Workflow dispatch https://github.com/pytorch/pytorch/actions/runs/11927990520 Pull Request resolved: https://github.com/pytorch/pytorch/pull/141087 Approved by: https://github.com/malfet
This commit is contained in:
parent
1d8318df98
commit
4acd56eb53
2 changed files with 115 additions and 21 deletions
8
.github/workflows/_mac-test.yml
vendored
8
.github/workflows/_mac-test.yml
vendored
|
|
@ -223,6 +223,14 @@ jobs:
|
|||
use-gha: true
|
||||
file-suffix: ${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}_${{ steps.get-job-id.outputs.job-id }}
|
||||
|
||||
- name: Upload the benchmark results
|
||||
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
|
||||
with:
|
||||
benchmark-results-dir: test/test-reports
|
||||
dry-run: false
|
||||
schema-version: v3
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Clean up disk space
|
||||
if: always()
|
||||
continue-on-error: true
|
||||
|
|
|
|||
|
|
@ -111,6 +111,11 @@ os.environ["KINETO_LOG_LEVEL"] = "5"
|
|||
|
||||
current_name = ""
|
||||
current_device = ""
|
||||
current_backend = ""
|
||||
current_mode = ""
|
||||
current_dtype = ""
|
||||
current_quantization = ""
|
||||
current_settings = None
|
||||
current_onnx_compiler = ""
|
||||
current_batch_size = None
|
||||
output_filename = None
|
||||
|
|
@ -356,10 +361,19 @@ def load_model_from_path(path_and_class_str):
|
|||
return model, inputs
|
||||
|
||||
|
||||
def output_csv(filename, headers, row):
|
||||
def write_outputs(filename, headers, row):
|
||||
"""
|
||||
Write both CSV and JSON outputs using the original CSV output interface
|
||||
"""
|
||||
global disable_output
|
||||
if disable_output:
|
||||
return
|
||||
|
||||
output_csv(filename, headers, row)
|
||||
output_json(filename, headers, row)
|
||||
|
||||
|
||||
def output_csv(filename, headers, row):
|
||||
if os.path.exists(filename):
|
||||
with open(filename) as fd:
|
||||
lines = list(csv.reader(fd)) or [[]]
|
||||
|
|
@ -377,6 +391,56 @@ def output_csv(filename, headers, row):
|
|||
writer.writerow(list(line) + ["0"] * (len(headers) - len(line)))
|
||||
|
||||
|
||||
def output_json(filename, headers, row):
|
||||
"""
|
||||
Write the result into JSON format, so that it can be uploaded to the benchmark database
|
||||
to be displayed on OSS dashboard. The JSON format is defined at
|
||||
https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database
|
||||
"""
|
||||
origin = ""
|
||||
if "torchbench" in filename:
|
||||
origin = "torchbench"
|
||||
elif "huggingface" in filename:
|
||||
origin = "huggingface"
|
||||
elif "timm_models" in filename:
|
||||
origin = "timm_models"
|
||||
|
||||
extra_info = {
|
||||
"device": current_device,
|
||||
"quantization": current_quantization,
|
||||
"batch_size": current_batch_size,
|
||||
}
|
||||
if current_settings:
|
||||
extra_info.update(current_settings)
|
||||
|
||||
mapping_headers = {headers[i]: v for i, v in enumerate(row)}
|
||||
with open(f"{os.path.splitext(filename)[0]}.json", "a") as f:
|
||||
for header, value in mapping_headers.items():
|
||||
# These headers are not metric names
|
||||
if header in ("dev", "name", "batch_size"):
|
||||
continue
|
||||
|
||||
record = {
|
||||
"benchmark": {
|
||||
"name": "TorchInductor",
|
||||
"mode": current_mode,
|
||||
"dtype": current_dtype,
|
||||
"extra_info": extra_info,
|
||||
},
|
||||
"model": {
|
||||
"name": current_name,
|
||||
"type": "OSS model",
|
||||
"backend": current_backend,
|
||||
"origins": [origin],
|
||||
},
|
||||
"metric": {
|
||||
"name": header,
|
||||
"benchmark_values": [value],
|
||||
},
|
||||
}
|
||||
print(json.dumps(record), file=f)
|
||||
|
||||
|
||||
def get_suite_from_model_iter_fn(model_iter_fn):
|
||||
# TODO: This is a bit of a hack
|
||||
suite = None
|
||||
|
|
@ -729,7 +793,7 @@ def coverage_experiment(args, model_iter_fn, model, example_inputs):
|
|||
with profiler.prof:
|
||||
frozen_model_iter_fn(model, example_inputs)
|
||||
coverage_result = profiler.results()
|
||||
output_csv(
|
||||
write_outputs(
|
||||
output_filename,
|
||||
(
|
||||
"dev",
|
||||
|
|
@ -768,7 +832,7 @@ def recompile_profiler_experiment(args, model_iter_fn, model, example_inputs):
|
|||
model_iter_fn
|
||||
)
|
||||
opt_model_iter_fn(model, example_inputs)
|
||||
output_csv(
|
||||
write_outputs(
|
||||
output_filename, ["model", "profiler report"], [current_name, prof.report()]
|
||||
)
|
||||
met = prof.get_metrics()
|
||||
|
|
@ -923,7 +987,7 @@ def latency_experiment_summary(suite_name, args, model, timings, **kwargs):
|
|||
for k, v in kwargs["dynamo_stats"].items():
|
||||
headers.append(k)
|
||||
row.append(v)
|
||||
output_csv(
|
||||
write_outputs(
|
||||
output_filename,
|
||||
headers,
|
||||
row,
|
||||
|
|
@ -932,7 +996,7 @@ def latency_experiment_summary(suite_name, args, model, timings, **kwargs):
|
|||
assert (
|
||||
output_filename.find(".csv") > 0
|
||||
), f"expected output_filename to be a .csv, but got {output_filename}"
|
||||
output_csv(
|
||||
write_outputs(
|
||||
output_filename[:-4] + "_compilation_metrics.csv",
|
||||
first_headers + c_headers,
|
||||
first_fields + c_data,
|
||||
|
|
@ -1092,7 +1156,7 @@ def speedup_experiment(args, model_iter_fn, model, example_inputs, **kwargs):
|
|||
for k, v in kwargs["dynamo_stats"].items():
|
||||
headers.append(k)
|
||||
row.append(v)
|
||||
output_csv(
|
||||
write_outputs(
|
||||
output_filename,
|
||||
headers,
|
||||
row,
|
||||
|
|
@ -1101,7 +1165,7 @@ def speedup_experiment(args, model_iter_fn, model, example_inputs, **kwargs):
|
|||
assert (
|
||||
output_filename.find(".csv") > 0
|
||||
), f"expected output_filename to be a .csv, but got {output_filename}"
|
||||
output_csv(
|
||||
write_outputs(
|
||||
output_filename[:-4] + "_compilation_metrics.csv",
|
||||
first_headers + c_headers,
|
||||
first_fields + c_data,
|
||||
|
|
@ -1177,7 +1241,7 @@ def speedup_experiment_ds(args, model_iter_fn, model, example_inputs):
|
|||
]
|
||||
)
|
||||
)
|
||||
output_csv(
|
||||
write_outputs(
|
||||
output_filename,
|
||||
("dev", "name", "batch_size", "speedup mean", "speedup median", "speedup var"),
|
||||
[
|
||||
|
|
@ -1339,7 +1403,7 @@ def speedup_experiment_onnx(
|
|||
row.append(kwargs["compilation_latency"])
|
||||
row.append(kwargs["compression_ratio"])
|
||||
|
||||
output_csv(
|
||||
write_outputs(
|
||||
output_filename,
|
||||
headers,
|
||||
row,
|
||||
|
|
@ -1348,7 +1412,7 @@ def speedup_experiment_onnx(
|
|||
assert (
|
||||
output_filename.find(".csv") > 0
|
||||
), f"expected output_filename to be a .csv, but got {output_filename}"
|
||||
output_csv(
|
||||
write_outputs(
|
||||
output_filename[:-4] + "_compilation_metrics.csv",
|
||||
["dev", "name", "batch_size"] + headers,
|
||||
[current_device, current_name, current_batch_size] + data,
|
||||
|
|
@ -1422,7 +1486,7 @@ def baselines(models, model_iter_fn, example_inputs, args):
|
|||
for s, p, m in zip(speedup, pvalue, [m for n, m in models[1:]])
|
||||
]
|
||||
)
|
||||
output_csv(
|
||||
write_outputs(
|
||||
output_filename,
|
||||
("dev", "name", "batch_size") + tuple(n for n, m in models[1:]),
|
||||
[current_device, current_name, current_batch_size]
|
||||
|
|
@ -1449,7 +1513,7 @@ def xla(args, model_iter_fn, model, example_inputs):
|
|||
pvalue = ttest_ind(timings[:, 0], timings[:, 1]).pvalue
|
||||
time_baseline, time_xla = np.median(timings, axis=0)
|
||||
speedup = time_baseline / time_xla
|
||||
output_csv(
|
||||
write_outputs(
|
||||
output_filename,
|
||||
("dev", "name", "batch_size", "speedup", "time_baseline", "time_xla"),
|
||||
[
|
||||
|
|
@ -2245,7 +2309,7 @@ def optimize_onnx_ctx(
|
|||
# `torch.onnx.dynamo_export` raises error that encloses diagnostics.
|
||||
diagnostic_context = e.onnx_program.diagnostic_context
|
||||
for parsed_error in parser.parse_diagnostic_context(diagnostic_context):
|
||||
output_csv(
|
||||
write_outputs(
|
||||
output_error_filename, parsed_error.headers, parsed_error.row
|
||||
)
|
||||
if context.onnx_model is not None:
|
||||
|
|
@ -2261,7 +2325,7 @@ def optimize_onnx_ctx(
|
|||
cause_of_exception, diagnostics.RuntimeErrorWithDiagnostic
|
||||
):
|
||||
parsed_error = parser.parse_exception(cause_of_exception)
|
||||
output_csv(
|
||||
write_outputs(
|
||||
output_error_filename, parsed_error.headers, parsed_error.row
|
||||
)
|
||||
raise
|
||||
|
|
@ -2269,7 +2333,7 @@ def optimize_onnx_ctx(
|
|||
# `torch.onnx.export` errors.
|
||||
# ORT errors.
|
||||
parsed_error = parser.parse_exception(e)
|
||||
output_csv(output_error_filename, parsed_error.headers, parsed_error.row)
|
||||
write_outputs(output_error_filename, parsed_error.headers, parsed_error.row)
|
||||
raise
|
||||
|
||||
run_n_iterations_onnx.context = context
|
||||
|
|
@ -2836,7 +2900,7 @@ class BenchmarkRunner:
|
|||
headers.append(k)
|
||||
fields.append(v)
|
||||
|
||||
output_csv(output_filename, headers, fields)
|
||||
write_outputs(output_filename, headers, fields)
|
||||
|
||||
output_signpost(
|
||||
dict(zip(o_headers, o_fields)),
|
||||
|
|
@ -3125,7 +3189,7 @@ class BenchmarkRunner:
|
|||
mean.item(),
|
||||
div.item(),
|
||||
]
|
||||
output_csv(output_filename, headers, fields)
|
||||
write_outputs(output_filename, headers, fields)
|
||||
return tolerance_status
|
||||
|
||||
def run_performance_test_non_alternate(
|
||||
|
|
@ -3588,7 +3652,7 @@ class BenchmarkRunner:
|
|||
user_stack = add_double_quotes(
|
||||
", ".join([str(x) for x in graph_break.user_stack])
|
||||
)
|
||||
output_csv(
|
||||
write_outputs(
|
||||
filename,
|
||||
["model", "reason", "user_stack"],
|
||||
[current_name, reason, user_stack],
|
||||
|
|
@ -4271,7 +4335,7 @@ def write_csv_when_exception(args, name: str, status: str, device=None):
|
|||
rows = [[device, name, placeholder_batch_size, 0.0] for device in devices]
|
||||
|
||||
for row in rows:
|
||||
output_csv(output_filename, headers, row)
|
||||
write_outputs(output_filename, headers, row)
|
||||
|
||||
|
||||
def run(runner, args, original_dir=None):
|
||||
|
|
@ -4466,6 +4530,11 @@ def run(runner, args, original_dir=None):
|
|||
current_name, \
|
||||
current_device, \
|
||||
current_batch_size, \
|
||||
current_backend, \
|
||||
current_mode, \
|
||||
current_dtype, \
|
||||
current_quantization, \
|
||||
current_settings, \
|
||||
output_filename, \
|
||||
disable_output, \
|
||||
optimize_ctx, \
|
||||
|
|
@ -4677,7 +4746,7 @@ def run(runner, args, original_dir=None):
|
|||
for device in args.devices:
|
||||
batch_size = runner.batch_size_finder(device, args.only)
|
||||
print(args.only, batch_size)
|
||||
output_csv(output_filename, [], [args.only, batch_size])
|
||||
write_outputs(output_filename, [], [args.only, batch_size])
|
||||
return
|
||||
|
||||
if args.export_profiler_trace:
|
||||
|
|
@ -4818,6 +4887,23 @@ def run(runner, args, original_dir=None):
|
|||
current_name = name
|
||||
current_device = device
|
||||
current_batch_size = batch_size
|
||||
current_backend = args.backend
|
||||
current_mode = (
|
||||
"training" if args.training else "inference" if args.inference else ""
|
||||
)
|
||||
if args.float16:
|
||||
current_dtype = "float16"
|
||||
elif args.bfloat16:
|
||||
current_dtype = "bfloat16"
|
||||
elif args.float32:
|
||||
current_dtype = "float32"
|
||||
elif args.amp:
|
||||
current_dtype = "amp"
|
||||
else:
|
||||
current_dtype = ""
|
||||
current_quantization = args.quantization
|
||||
# Keep the remaining of the settings
|
||||
current_settings = vars(args)
|
||||
set_model_name(name)
|
||||
|
||||
# Look for stuff that looks like batch size, and mark it dynamic.
|
||||
|
|
@ -4881,7 +4967,7 @@ def run(runner, args, original_dir=None):
|
|||
)
|
||||
if args.generate_aot_autograd_stats:
|
||||
stats_file = output_filename.split(".csv")[0] + "_stats.csv"
|
||||
output_csv(
|
||||
write_outputs(
|
||||
stats_file,
|
||||
("dev", "name", "batch_size", "total_aot_graphs", "ok_aot_graphs"),
|
||||
[
|
||||
|
|
|
|||
Loading…
Reference in a new issue