From 4acd56eb53bcbefffb7f911e0099560f3693c8bb Mon Sep 17 00:00:00 2001 From: Huy Do Date: Wed, 20 Nov 2024 18:18:21 +0000 Subject: [PATCH] Upload MPS benchmark results (#141087) This uploads the MPS benchmark results to benchmark database. The data can then be queried, for example: ``` select benchmark, model, metric from oss_ci_benchmark_v3 where head_sha = '99a133116fee15aa1467165f2b209b37da53f189' and metric.name in ['eager_peak_mem', 'dynamo_peak_mem', 'speedup'] and model.name = 'BERT_pytorch' ``` I'm documenting the JSON format at https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database ### Testing Locally, ``` PYTHONPATH=/Users/huydo/Storage/mine/benchmark python benchmarks/dynamo/torchbench.py --performance --only resnet152 --backend eager --training --devices mps --output test/test-reports/torchbench_training.csv ``` Workflow dispatch https://github.com/pytorch/pytorch/actions/runs/11927990520 Pull Request resolved: https://github.com/pytorch/pytorch/pull/141087 Approved by: https://github.com/malfet --- .github/workflows/_mac-test.yml | 8 ++ benchmarks/dynamo/common.py | 128 ++++++++++++++++++++++++++------ 2 files changed, 115 insertions(+), 21 deletions(-) diff --git a/.github/workflows/_mac-test.yml b/.github/workflows/_mac-test.yml index cc08c2f0164..c030911caf1 100644 --- a/.github/workflows/_mac-test.yml +++ b/.github/workflows/_mac-test.yml @@ -223,6 +223,14 @@ jobs: use-gha: true file-suffix: ${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}_${{ steps.get-job-id.outputs.job-id }} + - name: Upload the benchmark results + uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main + with: + benchmark-results-dir: test/test-reports + dry-run: false + schema-version: v3 + github-token: ${{ secrets.GITHUB_TOKEN }} + - name: Clean up disk space if: always() continue-on-error: true diff --git a/benchmarks/dynamo/common.py b/benchmarks/dynamo/common.py index 9727b9191dc..c1d299b2e13 100644 --- a/benchmarks/dynamo/common.py +++ b/benchmarks/dynamo/common.py @@ -111,6 +111,11 @@ os.environ["KINETO_LOG_LEVEL"] = "5" current_name = "" current_device = "" +current_backend = "" +current_mode = "" +current_dtype = "" +current_quantization = "" +current_settings = None current_onnx_compiler = "" current_batch_size = None output_filename = None @@ -356,10 +361,19 @@ def load_model_from_path(path_and_class_str): return model, inputs -def output_csv(filename, headers, row): +def write_outputs(filename, headers, row): + """ + Write both CSV and JSON outputs using the original CSV output interface + """ global disable_output if disable_output: return + + output_csv(filename, headers, row) + output_json(filename, headers, row) + + +def output_csv(filename, headers, row): if os.path.exists(filename): with open(filename) as fd: lines = list(csv.reader(fd)) or [[]] @@ -377,6 +391,56 @@ def output_csv(filename, headers, row): writer.writerow(list(line) + ["0"] * (len(headers) - len(line))) +def output_json(filename, headers, row): + """ + Write the result into JSON format, so that it can be uploaded to the benchmark database + to be displayed on OSS dashboard. The JSON format is defined at + https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database + """ + origin = "" + if "torchbench" in filename: + origin = "torchbench" + elif "huggingface" in filename: + origin = "huggingface" + elif "timm_models" in filename: + origin = "timm_models" + + extra_info = { + "device": current_device, + "quantization": current_quantization, + "batch_size": current_batch_size, + } + if current_settings: + extra_info.update(current_settings) + + mapping_headers = {headers[i]: v for i, v in enumerate(row)} + with open(f"{os.path.splitext(filename)[0]}.json", "a") as f: + for header, value in mapping_headers.items(): + # These headers are not metric names + if header in ("dev", "name", "batch_size"): + continue + + record = { + "benchmark": { + "name": "TorchInductor", + "mode": current_mode, + "dtype": current_dtype, + "extra_info": extra_info, + }, + "model": { + "name": current_name, + "type": "OSS model", + "backend": current_backend, + "origins": [origin], + }, + "metric": { + "name": header, + "benchmark_values": [value], + }, + } + print(json.dumps(record), file=f) + + def get_suite_from_model_iter_fn(model_iter_fn): # TODO: This is a bit of a hack suite = None @@ -729,7 +793,7 @@ def coverage_experiment(args, model_iter_fn, model, example_inputs): with profiler.prof: frozen_model_iter_fn(model, example_inputs) coverage_result = profiler.results() - output_csv( + write_outputs( output_filename, ( "dev", @@ -768,7 +832,7 @@ def recompile_profiler_experiment(args, model_iter_fn, model, example_inputs): model_iter_fn ) opt_model_iter_fn(model, example_inputs) - output_csv( + write_outputs( output_filename, ["model", "profiler report"], [current_name, prof.report()] ) met = prof.get_metrics() @@ -923,7 +987,7 @@ def latency_experiment_summary(suite_name, args, model, timings, **kwargs): for k, v in kwargs["dynamo_stats"].items(): headers.append(k) row.append(v) - output_csv( + write_outputs( output_filename, headers, row, @@ -932,7 +996,7 @@ def latency_experiment_summary(suite_name, args, model, timings, **kwargs): assert ( output_filename.find(".csv") > 0 ), f"expected output_filename to be a .csv, but got {output_filename}" - output_csv( + write_outputs( output_filename[:-4] + "_compilation_metrics.csv", first_headers + c_headers, first_fields + c_data, @@ -1092,7 +1156,7 @@ def speedup_experiment(args, model_iter_fn, model, example_inputs, **kwargs): for k, v in kwargs["dynamo_stats"].items(): headers.append(k) row.append(v) - output_csv( + write_outputs( output_filename, headers, row, @@ -1101,7 +1165,7 @@ def speedup_experiment(args, model_iter_fn, model, example_inputs, **kwargs): assert ( output_filename.find(".csv") > 0 ), f"expected output_filename to be a .csv, but got {output_filename}" - output_csv( + write_outputs( output_filename[:-4] + "_compilation_metrics.csv", first_headers + c_headers, first_fields + c_data, @@ -1177,7 +1241,7 @@ def speedup_experiment_ds(args, model_iter_fn, model, example_inputs): ] ) ) - output_csv( + write_outputs( output_filename, ("dev", "name", "batch_size", "speedup mean", "speedup median", "speedup var"), [ @@ -1339,7 +1403,7 @@ def speedup_experiment_onnx( row.append(kwargs["compilation_latency"]) row.append(kwargs["compression_ratio"]) - output_csv( + write_outputs( output_filename, headers, row, @@ -1348,7 +1412,7 @@ def speedup_experiment_onnx( assert ( output_filename.find(".csv") > 0 ), f"expected output_filename to be a .csv, but got {output_filename}" - output_csv( + write_outputs( output_filename[:-4] + "_compilation_metrics.csv", ["dev", "name", "batch_size"] + headers, [current_device, current_name, current_batch_size] + data, @@ -1422,7 +1486,7 @@ def baselines(models, model_iter_fn, example_inputs, args): for s, p, m in zip(speedup, pvalue, [m for n, m in models[1:]]) ] ) - output_csv( + write_outputs( output_filename, ("dev", "name", "batch_size") + tuple(n for n, m in models[1:]), [current_device, current_name, current_batch_size] @@ -1449,7 +1513,7 @@ def xla(args, model_iter_fn, model, example_inputs): pvalue = ttest_ind(timings[:, 0], timings[:, 1]).pvalue time_baseline, time_xla = np.median(timings, axis=0) speedup = time_baseline / time_xla - output_csv( + write_outputs( output_filename, ("dev", "name", "batch_size", "speedup", "time_baseline", "time_xla"), [ @@ -2245,7 +2309,7 @@ def optimize_onnx_ctx( # `torch.onnx.dynamo_export` raises error that encloses diagnostics. diagnostic_context = e.onnx_program.diagnostic_context for parsed_error in parser.parse_diagnostic_context(diagnostic_context): - output_csv( + write_outputs( output_error_filename, parsed_error.headers, parsed_error.row ) if context.onnx_model is not None: @@ -2261,7 +2325,7 @@ def optimize_onnx_ctx( cause_of_exception, diagnostics.RuntimeErrorWithDiagnostic ): parsed_error = parser.parse_exception(cause_of_exception) - output_csv( + write_outputs( output_error_filename, parsed_error.headers, parsed_error.row ) raise @@ -2269,7 +2333,7 @@ def optimize_onnx_ctx( # `torch.onnx.export` errors. # ORT errors. parsed_error = parser.parse_exception(e) - output_csv(output_error_filename, parsed_error.headers, parsed_error.row) + write_outputs(output_error_filename, parsed_error.headers, parsed_error.row) raise run_n_iterations_onnx.context = context @@ -2836,7 +2900,7 @@ class BenchmarkRunner: headers.append(k) fields.append(v) - output_csv(output_filename, headers, fields) + write_outputs(output_filename, headers, fields) output_signpost( dict(zip(o_headers, o_fields)), @@ -3125,7 +3189,7 @@ class BenchmarkRunner: mean.item(), div.item(), ] - output_csv(output_filename, headers, fields) + write_outputs(output_filename, headers, fields) return tolerance_status def run_performance_test_non_alternate( @@ -3588,7 +3652,7 @@ class BenchmarkRunner: user_stack = add_double_quotes( ", ".join([str(x) for x in graph_break.user_stack]) ) - output_csv( + write_outputs( filename, ["model", "reason", "user_stack"], [current_name, reason, user_stack], @@ -4271,7 +4335,7 @@ def write_csv_when_exception(args, name: str, status: str, device=None): rows = [[device, name, placeholder_batch_size, 0.0] for device in devices] for row in rows: - output_csv(output_filename, headers, row) + write_outputs(output_filename, headers, row) def run(runner, args, original_dir=None): @@ -4466,6 +4530,11 @@ def run(runner, args, original_dir=None): current_name, \ current_device, \ current_batch_size, \ + current_backend, \ + current_mode, \ + current_dtype, \ + current_quantization, \ + current_settings, \ output_filename, \ disable_output, \ optimize_ctx, \ @@ -4677,7 +4746,7 @@ def run(runner, args, original_dir=None): for device in args.devices: batch_size = runner.batch_size_finder(device, args.only) print(args.only, batch_size) - output_csv(output_filename, [], [args.only, batch_size]) + write_outputs(output_filename, [], [args.only, batch_size]) return if args.export_profiler_trace: @@ -4818,6 +4887,23 @@ def run(runner, args, original_dir=None): current_name = name current_device = device current_batch_size = batch_size + current_backend = args.backend + current_mode = ( + "training" if args.training else "inference" if args.inference else "" + ) + if args.float16: + current_dtype = "float16" + elif args.bfloat16: + current_dtype = "bfloat16" + elif args.float32: + current_dtype = "float32" + elif args.amp: + current_dtype = "amp" + else: + current_dtype = "" + current_quantization = args.quantization + # Keep the remaining of the settings + current_settings = vars(args) set_model_name(name) # Look for stuff that looks like batch size, and mark it dynamic. @@ -4881,7 +4967,7 @@ def run(runner, args, original_dir=None): ) if args.generate_aot_autograd_stats: stats_file = output_filename.split(".csv")[0] + "_stats.csv" - output_csv( + write_outputs( stats_file, ("dev", "name", "batch_size", "total_aot_graphs", "ok_aot_graphs"), [