From 93eb9bcfdec2e3204d6a420939a112ee10e4550a Mon Sep 17 00:00:00 2001 From: ytaous <4484531+ytaous@users.noreply.github.com> Date: Wed, 13 May 2020 14:15:17 -0700 Subject: [PATCH] Add yaml/perf scripts for new perf test pipeline (#3909) * yaml/perf scripts for new pipeline * yaml/perf scripts for new pipeline * remove unused imports * testing some comments change * testing some comments change * testing jdbc * testing jdbc * testing jdbc * exclude pwd from jdbc properties * exclude pwd from jdbc properties * namedtuple * on comments Co-authored-by: Ethan Tao --- .../models/runner/training_runner.cc | 2 + .../tools/ci_test/run_bert_perf_test.py | 71 +++++++++++++++++++ ...aining-linux-gpu-perf-test-ci-pipeline.yml | 59 +++++++++++++++ .../java/com/msft/send_perf_metrics/App.java | 3 +- .../com/msft/send_perf_metrics/JdbcUtil.java | 6 +- 5 files changed, 138 insertions(+), 3 deletions(-) create mode 100644 orttraining/tools/ci_test/run_bert_perf_test.py create mode 100644 tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-perf-test-ci-pipeline.yml diff --git a/orttraining/orttraining/models/runner/training_runner.cc b/orttraining/orttraining/models/runner/training_runner.cc index dc408c9440..b638a22f3e 100644 --- a/orttraining/orttraining/models/runner/training_runner.cc +++ b/orttraining/orttraining/models/runner/training_runner.cc @@ -820,6 +820,8 @@ Status TrainingRunner::SavePerfMetrics(const size_t number_of_batches, const siz perf_metrics_stream.open(perf_metrics_path, std::ios::out | std::ios::trunc); ORT_RETURN_IF_NOT(perf_metrics_stream << json_string << "\n", "Failed to write to output file."); + std::cout << "\n\nSaved perf metrics file: " << ToMBString(perf_metrics_path) << "\n\n"; + return Status::OK(); } diff --git a/orttraining/tools/ci_test/run_bert_perf_test.py b/orttraining/tools/ci_test/run_bert_perf_test.py new file mode 100644 index 0000000000..4290d9edc9 --- /dev/null +++ b/orttraining/tools/ci_test/run_bert_perf_test.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import argparse +import subprocess +import sys +import os +from collections import namedtuple + +SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__)) + +def parse_args(): + parser = argparse.ArgumentParser(description="Runs BERT performance tests.") + parser.add_argument("--binary_dir", required=True, + help="Path to the ORT binary directory.") + parser.add_argument("--training_data_root", required=True, + help="Path to the training data root directory.") + parser.add_argument("--model_root", required=True, + help="Path to the model root directory.") + return parser.parse_args() + +# using the same params from "GitHub Master Merge Schedule" in OneNotes +def main(): + args = parse_args() + + Config = namedtuple('Config', ['use_mixed_precision', 'max_seq_length', 'batch_size', 'max_predictions_per_seq']) + configs = [ + Config(True, 128, 66, 20), + Config(True, 512, 10, 80), + Config(False, 128, 33, 20), + Config(False, 512, 5, 80) + ] + + # run BERT training + for c in configs: + print("######## testing name - " + ('fp16-' if c.use_mixed_precision else 'fp32-') + str(c.max_seq_length) + " ##############") + cmds = [ + os.path.join(args.binary_dir, "onnxruntime_training_bert"), + "--model_name", os.path.join( + args.model_root, "nv/bert-large/bert-large-uncased_L_24_H_1024_A_16_V_30528_S_512_Dp_0.1_optimized_layer_norm"), + "--train_data_dir", os.path.join( + args.training_data_root, str(c.max_seq_length), "books_wiki_en_corpus/train"), + "--test_data_dir", os.path.join( + args.training_data_root, str(c.max_seq_length), "books_wiki_en_corpus/test"), + "--train_batch_size", str(c.batch_size), + "--mode", "train", + "--max_seq_length", str(c.max_seq_length), + "--num_train_steps", "100", + "--display_loss_steps", "5", + "--optimizer", "Lamb", + "--learning_rate", "3e-3", + "--warmup_ratio", "0.2843", + "--warmup_mode", "Poly", + "--gradient_accumulation_steps", "1", + "--max_predictions_per_seq", str(c.max_predictions_per_seq), + "--lambda", "0", + "--use_nccl", + "--perf_output_dir", os.path.join(SCRIPT_DIR, "results"), + ] + + if c.use_mixed_precision: + cmds.append("--use_mixed_precision"), + cmds.append("--allreduce_in_fp16"), + + subprocess.run(cmds).check_returncode() + + return 0 + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-perf-test-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-perf-test-ci-pipeline.yml new file mode 100644 index 0000000000..4b9fa47de8 --- /dev/null +++ b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-perf-test-ci-pipeline.yml @@ -0,0 +1,59 @@ +trigger: none + +jobs: +- job: Onnxruntime_Linux_GPU_Training_Perf_Test + + timeoutInMinutes: 120 + + variables: + - group: 'ortperf' # variable group + + steps: + - checkout: self + clean: true + submodules: recursive + + - script: > + tools/ci_build/github/linux/run_dockerbuild.sh + -o ubuntu16.04 -d gpu -r $(Build.BinariesDirectory) + -x " + --config RelWithDebInfo + --enable_training + --update --build + " + displayName: 'Build performance tests' + + - script: > + docker run --gpus all --rm --name onnxruntime-gpu-perf + --volume $(Build.SourcesDirectory):/onnxruntime_src + --volume $(Build.BinariesDirectory):/build + --volume /bert_ort/bert_models:/build/bert_models:ro + --volume /bert_data:/build/bert_data:ro + -e NIGHTLY_BUILD onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 + /usr/bin/python3.6 /onnxruntime_src/orttraining/tools/ci_test/run_bert_perf_test.py + --binary_dir /build/RelWithDebInfo + --training_data_root /build/bert_data + --model_root /build/bert_models + displayName: 'Run bert performance tests' + + # generate jdbc.properties + - script: > + mkdir -p $(Build.SourcesDirectory)/tools/perf_util/src/main/resources && + printf "url=jdbc:mysql://onnxruntimedashboard.mysql.database.azure.com/onnxruntime?serverTimezone=UTC&useUnicode=true&characterEncoding=UTF-8\nuser=powerbi@onnxruntimedashboard\npassword_env=ORT_PERF_PASSWORD" + > $(Build.SourcesDirectory)/tools/perf_util/src/main/resources/jdbc.properties + displayName: 'Create resource file' + + - script: > + mvn package + displayName: 'Maven build' + workingDirectory: $(Build.SourcesDirectory)/tools/perf_util + + # process json files + - script: > + java -cp target/send_perf_metrics-0.0.1-SNAPSHOT-jar-with-dependencies.jar com.msft.send_perf_metrics.App "$(Build.SourcesDirectory)/orttraining/tools/ci_test/results" + env: + ORT_PERF_PASSWORD: $(ortperf) + displayName: 'Populate perf metrics' + workingDirectory: $(Build.SourcesDirectory)/tools/perf_util + + - template: templates/clean-agent-build-directory-step.yml diff --git a/tools/perf_util/src/main/java/com/msft/send_perf_metrics/App.java b/tools/perf_util/src/main/java/com/msft/send_perf_metrics/App.java index 4948b785d8..80b20535d7 100644 --- a/tools/perf_util/src/main/java/com/msft/send_perf_metrics/App.java +++ b/tools/perf_util/src/main/java/com/msft/send_perf_metrics/App.java @@ -51,12 +51,12 @@ public class App { if (!filename.startsWith(".") && filename.endsWith(".json")) { perf_metrics.add(file); + System.out.println(filename); } return FileVisitResult.CONTINUE; } }); - System.out.println(perf_metrics); final Path cwd_dir = Paths.get(System.getProperty("user.dir")); // git rev-parse HEAD @@ -78,6 +78,7 @@ public class App { String batch_id) throws Exception { try { Connection conn = JdbcUtil.GetConn(); + System.out.println("MySQL DB connection established.\n"); // go thru each json file JSONParser jsonParser = new JSONParser(); for (Path metrics_json : perf_metrics) { diff --git a/tools/perf_util/src/main/java/com/msft/send_perf_metrics/JdbcUtil.java b/tools/perf_util/src/main/java/com/msft/send_perf_metrics/JdbcUtil.java index da88f5fe64..58cedbeee5 100644 --- a/tools/perf_util/src/main/java/com/msft/send_perf_metrics/JdbcUtil.java +++ b/tools/perf_util/src/main/java/com/msft/send_perf_metrics/JdbcUtil.java @@ -1,17 +1,19 @@ package com.msft.send_perf_metrics; import java.sql.DriverManager; +import java.util.Map; import java.util.Properties; public class JdbcUtil { static java.sql.Connection GetConn() throws Exception { try (java.io.InputStream in = App.class.getResourceAsStream("/jdbc.properties")) { if (in == null) - throw new RuntimeException("err"); + throw new RuntimeException("Error reading jdbc properties"); Properties props = new Properties(); props.load(in); + // loading password via env variable return DriverManager.getConnection(props.getProperty("url"), props.getProperty("user"), - props.getProperty("password")); + System.getenv(props.getProperty("password_env"))); } } }