mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-07-01 03:45:06 +00:00
Add yaml/perf scripts for new perf test pipeline (#3909)
* yaml/perf scripts for new pipeline * yaml/perf scripts for new pipeline * remove unused imports * testing some comments change * testing some comments change * testing jdbc * testing jdbc * testing jdbc * exclude pwd from jdbc properties * exclude pwd from jdbc properties * namedtuple * on comments Co-authored-by: Ethan Tao <ettao@microsoft.com>
This commit is contained in:
parent
e86214e5c0
commit
93eb9bcfde
5 changed files with 138 additions and 3 deletions
|
|
@ -820,6 +820,8 @@ Status TrainingRunner::SavePerfMetrics(const size_t number_of_batches, const siz
|
|||
perf_metrics_stream.open(perf_metrics_path, std::ios::out | std::ios::trunc);
|
||||
ORT_RETURN_IF_NOT(perf_metrics_stream << json_string << "\n", "Failed to write to output file.");
|
||||
|
||||
std::cout << "\n\nSaved perf metrics file: " << ToMBString(perf_metrics_path) << "\n\n";
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
|
|
|||
71
orttraining/tools/ci_test/run_bert_perf_test.py
Normal file
71
orttraining/tools/ci_test/run_bert_perf_test.py
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
#!/usr/bin/env python3
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import argparse
|
||||
import subprocess
|
||||
import sys
|
||||
import os
|
||||
from collections import namedtuple
|
||||
|
||||
SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__))
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="Runs BERT performance tests.")
|
||||
parser.add_argument("--binary_dir", required=True,
|
||||
help="Path to the ORT binary directory.")
|
||||
parser.add_argument("--training_data_root", required=True,
|
||||
help="Path to the training data root directory.")
|
||||
parser.add_argument("--model_root", required=True,
|
||||
help="Path to the model root directory.")
|
||||
return parser.parse_args()
|
||||
|
||||
# using the same params from "GitHub Master Merge Schedule" in OneNotes
|
||||
def main():
|
||||
args = parse_args()
|
||||
|
||||
Config = namedtuple('Config', ['use_mixed_precision', 'max_seq_length', 'batch_size', 'max_predictions_per_seq'])
|
||||
configs = [
|
||||
Config(True, 128, 66, 20),
|
||||
Config(True, 512, 10, 80),
|
||||
Config(False, 128, 33, 20),
|
||||
Config(False, 512, 5, 80)
|
||||
]
|
||||
|
||||
# run BERT training
|
||||
for c in configs:
|
||||
print("######## testing name - " + ('fp16-' if c.use_mixed_precision else 'fp32-') + str(c.max_seq_length) + " ##############")
|
||||
cmds = [
|
||||
os.path.join(args.binary_dir, "onnxruntime_training_bert"),
|
||||
"--model_name", os.path.join(
|
||||
args.model_root, "nv/bert-large/bert-large-uncased_L_24_H_1024_A_16_V_30528_S_512_Dp_0.1_optimized_layer_norm"),
|
||||
"--train_data_dir", os.path.join(
|
||||
args.training_data_root, str(c.max_seq_length), "books_wiki_en_corpus/train"),
|
||||
"--test_data_dir", os.path.join(
|
||||
args.training_data_root, str(c.max_seq_length), "books_wiki_en_corpus/test"),
|
||||
"--train_batch_size", str(c.batch_size),
|
||||
"--mode", "train",
|
||||
"--max_seq_length", str(c.max_seq_length),
|
||||
"--num_train_steps", "100",
|
||||
"--display_loss_steps", "5",
|
||||
"--optimizer", "Lamb",
|
||||
"--learning_rate", "3e-3",
|
||||
"--warmup_ratio", "0.2843",
|
||||
"--warmup_mode", "Poly",
|
||||
"--gradient_accumulation_steps", "1",
|
||||
"--max_predictions_per_seq", str(c.max_predictions_per_seq),
|
||||
"--lambda", "0",
|
||||
"--use_nccl",
|
||||
"--perf_output_dir", os.path.join(SCRIPT_DIR, "results"),
|
||||
]
|
||||
|
||||
if c.use_mixed_precision:
|
||||
cmds.append("--use_mixed_precision"),
|
||||
cmds.append("--allreduce_in_fp16"),
|
||||
|
||||
subprocess.run(cmds).check_returncode()
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
trigger: none
|
||||
|
||||
jobs:
|
||||
- job: Onnxruntime_Linux_GPU_Training_Perf_Test
|
||||
|
||||
timeoutInMinutes: 120
|
||||
|
||||
variables:
|
||||
- group: 'ortperf' # variable group
|
||||
|
||||
steps:
|
||||
- checkout: self
|
||||
clean: true
|
||||
submodules: recursive
|
||||
|
||||
- script: >
|
||||
tools/ci_build/github/linux/run_dockerbuild.sh
|
||||
-o ubuntu16.04 -d gpu -r $(Build.BinariesDirectory)
|
||||
-x "
|
||||
--config RelWithDebInfo
|
||||
--enable_training
|
||||
--update --build
|
||||
"
|
||||
displayName: 'Build performance tests'
|
||||
|
||||
- script: >
|
||||
docker run --gpus all --rm --name onnxruntime-gpu-perf
|
||||
--volume $(Build.SourcesDirectory):/onnxruntime_src
|
||||
--volume $(Build.BinariesDirectory):/build
|
||||
--volume /bert_ort/bert_models:/build/bert_models:ro
|
||||
--volume /bert_data:/build/bert_data:ro
|
||||
-e NIGHTLY_BUILD onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6
|
||||
/usr/bin/python3.6 /onnxruntime_src/orttraining/tools/ci_test/run_bert_perf_test.py
|
||||
--binary_dir /build/RelWithDebInfo
|
||||
--training_data_root /build/bert_data
|
||||
--model_root /build/bert_models
|
||||
displayName: 'Run bert performance tests'
|
||||
|
||||
# generate jdbc.properties
|
||||
- script: >
|
||||
mkdir -p $(Build.SourcesDirectory)/tools/perf_util/src/main/resources &&
|
||||
printf "url=jdbc:mysql://onnxruntimedashboard.mysql.database.azure.com/onnxruntime?serverTimezone=UTC&useUnicode=true&characterEncoding=UTF-8\nuser=powerbi@onnxruntimedashboard\npassword_env=ORT_PERF_PASSWORD"
|
||||
> $(Build.SourcesDirectory)/tools/perf_util/src/main/resources/jdbc.properties
|
||||
displayName: 'Create resource file'
|
||||
|
||||
- script: >
|
||||
mvn package
|
||||
displayName: 'Maven build'
|
||||
workingDirectory: $(Build.SourcesDirectory)/tools/perf_util
|
||||
|
||||
# process json files
|
||||
- script: >
|
||||
java -cp target/send_perf_metrics-0.0.1-SNAPSHOT-jar-with-dependencies.jar com.msft.send_perf_metrics.App "$(Build.SourcesDirectory)/orttraining/tools/ci_test/results"
|
||||
env:
|
||||
ORT_PERF_PASSWORD: $(ortperf)
|
||||
displayName: 'Populate perf metrics'
|
||||
workingDirectory: $(Build.SourcesDirectory)/tools/perf_util
|
||||
|
||||
- template: templates/clean-agent-build-directory-step.yml
|
||||
|
|
@ -51,12 +51,12 @@ public class App {
|
|||
|
||||
if (!filename.startsWith(".") && filename.endsWith(".json")) {
|
||||
perf_metrics.add(file);
|
||||
System.out.println(filename);
|
||||
}
|
||||
return FileVisitResult.CONTINUE;
|
||||
}
|
||||
|
||||
});
|
||||
System.out.println(perf_metrics);
|
||||
|
||||
final Path cwd_dir = Paths.get(System.getProperty("user.dir"));
|
||||
// git rev-parse HEAD
|
||||
|
|
@ -78,6 +78,7 @@ public class App {
|
|||
String batch_id) throws Exception {
|
||||
try {
|
||||
Connection conn = JdbcUtil.GetConn();
|
||||
System.out.println("MySQL DB connection established.\n");
|
||||
// go thru each json file
|
||||
JSONParser jsonParser = new JSONParser();
|
||||
for (Path metrics_json : perf_metrics) {
|
||||
|
|
|
|||
|
|
@ -1,17 +1,19 @@
|
|||
package com.msft.send_perf_metrics;
|
||||
|
||||
import java.sql.DriverManager;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
public class JdbcUtil {
|
||||
static java.sql.Connection GetConn() throws Exception {
|
||||
try (java.io.InputStream in = App.class.getResourceAsStream("/jdbc.properties")) {
|
||||
if (in == null)
|
||||
throw new RuntimeException("err");
|
||||
throw new RuntimeException("Error reading jdbc properties");
|
||||
Properties props = new Properties();
|
||||
props.load(in);
|
||||
// loading password via env variable
|
||||
return DriverManager.getConnection(props.getProperty("url"), props.getProperty("user"),
|
||||
props.getProperty("password"));
|
||||
System.getenv(props.getProperty("password_env")));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue