onnxruntime/orttraining/tools/ci_test/run_bert_perf_test.py
Justin Chu faea42af95
Bump ruff to 0.3.2 and black to 24 (#19878)
### Motivation and Context

Routing updates
2024-03-13 10:00:32 -07:00

113 lines
3.7 KiB
Python

#!/usr/bin/env python3
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import argparse
import json
import os
import subprocess
import sys
from collections import namedtuple
SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__))
def parse_args():
parser = argparse.ArgumentParser(description="Runs BERT performance tests.")
parser.add_argument("--binary_dir", required=True, help="Path to the ORT binary directory.")
parser.add_argument("--training_data_root", required=True, help="Path to the training data root directory.")
parser.add_argument("--model_root", required=True, help="Path to the model root directory.")
parser.add_argument(
"--gpu_sku",
choices=["V100_16G", "MI100_32G"],
default="V100_16G",
required=False,
help="GPU model (e.g. V100_16G, MI100_32G).",
)
return parser.parse_args()
# using the same params from "GitHub Master Merge Schedule" in OneNotes
def main():
args = parse_args()
Config = namedtuple(
"Config", ["use_mixed_precision", "max_seq_length", "batch_size", "max_predictions_per_seq", "expected_perf"]
)
configs = {}
configs["V100_16G"] = [
Config(True, 128, 76, 20, -1.0),
Config(True, 512, 11, 80, -1.0),
Config(False, 128, 39, 20, -1.0),
Config(False, 512, 6, 80, -1.0),
]
configs["MI100_32G"] = [
Config(True, 128, 128, 20, 240),
]
# run BERT training
for c in configs[args.gpu_sku]:
model = "bert-large-uncased_L_24_H_1024_A_16_V_30528_S_512_Dp_0.1_optimized_layer_norm_opset12"
precision_prefix = "fp16" if c.use_mixed_precision else "fp32"
print(
"######## testing name - "
+ ("fp16-" if c.use_mixed_precision else "fp32-")
+ str(c.max_seq_length)
+ " ##############"
)
cmds = [
os.path.join(args.binary_dir, "onnxruntime_training_bert"),
"--model_name",
os.path.join(args.model_root, f"nv/bert-large/{model}"),
"--train_data_dir",
os.path.join(args.training_data_root, str(c.max_seq_length), "books_wiki_en_corpus/train"),
"--test_data_dir",
os.path.join(args.training_data_root, str(c.max_seq_length), "books_wiki_en_corpus/test"),
"--train_batch_size",
str(c.batch_size),
"--mode",
"train",
"--max_seq_length",
str(c.max_seq_length),
"--num_train_steps",
"640",
"--display_loss_steps",
"5",
"--optimizer",
"Lamb",
"--learning_rate",
"3e-3",
"--warmup_ratio",
"0.2843",
"--warmup_mode",
"Poly",
"--gradient_accumulation_steps",
"1",
"--max_predictions_per_seq",
str(c.max_predictions_per_seq),
"--lambda",
"0",
"--use_nccl",
"--perf_output_dir",
os.path.join(SCRIPT_DIR, "results"),
]
if c.use_mixed_precision:
cmds.append("--use_mixed_precision"),
cmds.append("--allreduce_in_fp16"),
subprocess.run(cmds).check_returncode() # noqa: PLW1510
if c.expected_perf > 0.0:
json_filename = (
f"onnxruntime_perf_metrics_{model}.onnx_bert_{precision_prefix}_{c.max_seq_length}_Lamb.json"
)
with open(os.path.join(SCRIPT_DIR, "results", json_filename)) as json_file:
results = json.load(json_file)
assert results["EndToEndThroughput"] > 0.98 * c.expected_perf
return 0
if __name__ == "__main__":
sys.exit(main())