mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-18 21:21:17 +00:00
GPT-2 training perf scripts (#3974)
* gpt2 training perf * gpt2 training perf * debug * debug * debug * fix bug * minor * on comments * dynamic sql * fix build * minor * linked hash * on comments * minor * mem * minor Co-authored-by: Ethan Tao <ettao@microsoft.com>
This commit is contained in:
parent
36bcb28238
commit
fb4efafc8e
6 changed files with 173 additions and 70 deletions
|
|
@ -432,8 +432,16 @@ float GetLossValue(const Tensor& loss_tensor) {
|
|||
return loss;
|
||||
}
|
||||
|
||||
// mapping of max_sequence_length and max_predictions_per_sequence position derived from training data
|
||||
std::map<std::string, std::pair<std::string, size_t>> input_to_dimension_mapping;
|
||||
// use this table mapping to define what to be stored in mapped_dimensions, and ultimately in json structure
|
||||
// Be mindful on the position, if it's invalid or out of bound, the property population process will be
|
||||
// either incorrect or aborted. Also make sure to substract the index position by 1 to get valid correspondent value
|
||||
// namely, in the graph, sequence is at position 1, but in initial tensor shape vector loaded from training data is at position 0,
|
||||
// batch is not part of the initial tensor shape vector till later
|
||||
// see GetTensorDimensionsFromInputs() in training_util.h and training_runner.cc for more details
|
||||
const std::map<std::string, std::pair<std::string, size_t>> input_to_dimension_mapping = {
|
||||
{"input1", {"SeqLen", 0}}, // int64[batch,sequence] "sequence" -> "SeqLen", 0
|
||||
{"masked_lm_ids", {"PredictionsPerSeq", 0}} // int64[batch,dynamic_prediction_count]
|
||||
};
|
||||
|
||||
// generic properties for storing perf metrics
|
||||
MapStringToString mapped_dimensions;
|
||||
|
|
@ -516,17 +524,6 @@ void setup_training_params(BertParameters& params) {
|
|||
{"masked_lm_weights", "masked_lm_weights"},
|
||||
{"next_sentence_label", "next_sentence_labels"}};
|
||||
|
||||
// use this table mapping to define what to be stored in mapped_dimensions, and ultimately in json structure
|
||||
// Be mindful on the position, if it's invalid or out of bound, the property population process will be
|
||||
// either incorrect or aborted. Also make sure to substract the index position by 1 to get valid correspondent value
|
||||
// namely, in the graph, sequence is at position 1, but in initial tensor shape vector loaded from training data is at position 0,
|
||||
// batch is not part of the initial tensor shape vector till later
|
||||
// see GetTensorDimensionsFromInputs() in training_util.h and training_runner.cc for more details
|
||||
input_to_dimension_mapping = {
|
||||
{"input1", {"SeqLen", 0}}, // int64[batch,sequence] "sequence" -> "SeqLen", 0
|
||||
{"masked_lm_ids", {"PredictionsPerSeq", 0}} // int64[batch,dynamic_prediction_count]
|
||||
};
|
||||
|
||||
params.model_type = "bert";
|
||||
|
||||
params.skip_evaluation = params.is_perf_test;
|
||||
|
|
|
|||
|
|
@ -49,9 +49,12 @@ Status ParseArguments(int argc, char* argv[], GPT2Parameters& params, OrtParamet
|
|||
cxxopts::value<std::string>()->default_value("data/1024/books_wiki_en_corpus/test"))
|
||||
("output_dir", "The output directory where the trained model files will be written.",
|
||||
cxxopts::value<std::string>()->default_value(""))
|
||||
("perf_output_dir", "The output directory where the trained perf metrics files will be written.",
|
||||
cxxopts::value<std::string>()->default_value(""))
|
||||
("log_dir", "The directory to write tensorboard events.",
|
||||
cxxopts::value<std::string>()->default_value(""))
|
||||
("train_batch_size", "Total batch size for training.", cxxopts::value<int>())
|
||||
("eval_batch_size", "Total batch size for eval.", cxxopts::value<int>())
|
||||
("learning_rate", "The initial learning rate for the optimizer.", cxxopts::value<float>()->default_value("5e-5"))
|
||||
("num_train_steps", "Total number of training steps to perform.", cxxopts::value<int>()->default_value("100"))
|
||||
("warmup_ratio", "Fraction of training steps for learning rate warmup.", cxxopts::value<float>()->default_value("0"))
|
||||
|
|
@ -119,7 +122,11 @@ Status ParseArguments(int argc, char* argv[], GPT2Parameters& params, OrtParamet
|
|||
|
||||
params.num_train_steps = flags["num_train_steps"].as<int>();
|
||||
params.batch_size = flags["train_batch_size"].as<int>();
|
||||
|
||||
if (flags.count("eval_batch_size")) {
|
||||
params.eval_batch_size = flags["eval_batch_size"].as<int>();
|
||||
} else {
|
||||
params.eval_batch_size = params.batch_size;
|
||||
}
|
||||
params.max_sequence_length = flags["max_seq_length"].as<int>();
|
||||
|
||||
params.gradient_accumulation_steps = flags["gradient_accumulation_steps"].as<int>();
|
||||
|
|
@ -136,6 +143,10 @@ Status ParseArguments(int argc, char* argv[], GPT2Parameters& params, OrtParamet
|
|||
if (params.output_dir.empty()) {
|
||||
printf("No output directory specified. Trained model files will not be saved.\n");
|
||||
}
|
||||
params.perf_output_dir = ToPathString(flags["perf_output_dir"].as<std::string>());
|
||||
if (params.perf_output_dir.empty()) {
|
||||
printf("No perf output directory specified. Trained perf metrics will not be saved.\n");
|
||||
}
|
||||
|
||||
params.use_mixed_precision = flags["use_mixed_precision"].as<bool>();
|
||||
params.allreduce_in_fp16 = flags["allreduce_in_fp16"].as<bool>() && params.use_mixed_precision;
|
||||
|
|
@ -260,6 +271,15 @@ float GetLossValue(const Tensor& loss_tensor) {
|
|||
return loss;
|
||||
}
|
||||
|
||||
// mapping to define what to be stored in mapped_dimensions
|
||||
// see GetTensorDimensionsFromInputs() in training_util.h and training_runner.cc for more details
|
||||
const std::map<std::string, std::pair<std::string, size_t>> input_to_dimension_mapping = {
|
||||
{"input_ids", {"SeqLen", 0}}, // int64[batch,seqlen] "seqlen" -> "SeqLen", 0
|
||||
};
|
||||
|
||||
// generic properties for storing perf metrics
|
||||
MapStringToString mapped_dimensions;
|
||||
|
||||
void setup_training_params(GPT2Parameters& params) {
|
||||
params.model_path = ToPathString(params.model_name) + ORT_TSTR(".onnx");
|
||||
params.model_with_loss_func_path = ToPathString(params.model_name) + ORT_TSTR("_with_cost.onnx");
|
||||
|
|
@ -316,6 +336,8 @@ void setup_training_params(GPT2Parameters& params) {
|
|||
{"attention_mask", "attention_mask"},
|
||||
{"labels", "labels"}};
|
||||
|
||||
params.model_type = "gpt2";
|
||||
|
||||
#ifdef USE_CUDA
|
||||
OrtDevice::DeviceId device_id = static_cast<OrtDevice::DeviceId>(params.mpi_context.local_rank);
|
||||
params.providers.emplace(kCudaExecutionProvider, CreateExecutionProviderFactory_CUDA(device_id));
|
||||
|
|
@ -408,7 +430,13 @@ static Status RunTraining(const GPT2Parameters& params, const Environment& env)
|
|||
max_num_files_preload);
|
||||
}
|
||||
|
||||
ORT_RETURN_IF_ERROR(runner->Run(training_data_loader.get(), test_data_loader.get()));
|
||||
if (!params.perf_output_dir.empty()) {
|
||||
// collecting GPT2 related params from training data
|
||||
auto training_data = training_data_loader->CurrentDataSet();
|
||||
ORT_RETURN_IF_ERROR(training_data->GetTensorDimensionsFromInputs(input_to_dimension_mapping, mapped_dimensions));
|
||||
}
|
||||
|
||||
ORT_RETURN_IF_ERROR(runner->Run(training_data_loader.get(), test_data_loader.get(), mapped_dimensions));
|
||||
|
||||
// only test and save trained model on device #0
|
||||
if (params.mpi_context.world_rank == 0) {
|
||||
|
|
|
|||
|
|
@ -923,7 +923,7 @@ Status TrainingRunner::SavePerfMetrics(const size_t number_of_batches, const siz
|
|||
(seq_len.empty() ? "" : "_" + seq_len) + "_" + optimizer;
|
||||
perf_metrics["DisplayName"] = display_name;
|
||||
|
||||
perf_metrics["Memory"] = peak_workingset_size;
|
||||
perf_metrics["Memory"] = peak_workingset_size >> 20; // mb
|
||||
perf_metrics["AvgCPU"] = average_cpu_usage;
|
||||
|
||||
//
|
||||
|
|
|
|||
60
orttraining/tools/ci_test/run_gpt2_perf_test.py
Normal file
60
orttraining/tools/ci_test/run_gpt2_perf_test.py
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
#!/usr/bin/env python3
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import argparse
|
||||
import subprocess
|
||||
import sys
|
||||
import os
|
||||
from collections import namedtuple
|
||||
|
||||
SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__))
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="Runs GPT-2 performance tests.")
|
||||
parser.add_argument("--binary_dir", required=True,
|
||||
help="Path to the ORT binary directory.")
|
||||
parser.add_argument("--training_data_root", required=True,
|
||||
help="Path to the training data root directory.")
|
||||
parser.add_argument("--model_root", required=True,
|
||||
help="Path to the model root directory.")
|
||||
return parser.parse_args()
|
||||
|
||||
# TODO - review to finalize params
|
||||
def main():
|
||||
args = parse_args()
|
||||
|
||||
Config = namedtuple('Config', ['use_mixed_precision', 'max_seq_length', 'batch_size'])
|
||||
configs = [
|
||||
Config(True, 1024, 1),
|
||||
Config(False, 1024, 1)
|
||||
]
|
||||
|
||||
# run GPT-2 training
|
||||
for c in configs:
|
||||
print("######## testing name - " + ('fp16-' if c.use_mixed_precision else 'fp32-') + str(c.max_seq_length) + " ##############")
|
||||
cmds = [
|
||||
os.path.join(args.binary_dir, "onnxruntime_training_gpt2"),
|
||||
"--model_name", os.path.join(
|
||||
args.model_root, "megatron-gpt2_hidden-size-1024_num-layers-24_vocab-size-50257_num-attention-heads-16_max-position-embeddings-1024_optimized"),
|
||||
"--train_data_dir", os.path.join(
|
||||
args.training_data_root, "train"),
|
||||
"--test_data_dir", os.path.join(
|
||||
args.training_data_root, "test"),
|
||||
"--train_batch_size", str(c.batch_size),
|
||||
"--mode", "train",
|
||||
"--max_seq_length", str(c.max_seq_length),
|
||||
"--num_train_steps", "200",
|
||||
"--gradient_accumulation_steps", "1",
|
||||
"--perf_output_dir", os.path.join(SCRIPT_DIR, "results"),
|
||||
]
|
||||
|
||||
if c.use_mixed_precision:
|
||||
cmds.append("--use_mixed_precision"),
|
||||
|
||||
subprocess.run(cmds).check_returncode()
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
|
@ -36,6 +36,19 @@ jobs:
|
|||
--model_root /build/bert_models
|
||||
displayName: 'Run bert performance tests'
|
||||
|
||||
- script: >
|
||||
docker run --gpus all --rm --name onnxruntime-gpu-perf
|
||||
--volume $(Build.SourcesDirectory):/onnxruntime_src
|
||||
--volume $(Build.BinariesDirectory):/build
|
||||
--volume /bert_ort/gpt2_models:/build/gpt2_models:ro
|
||||
--volume /bert_data/gpt2_data:/build/gpt2_data:ro
|
||||
-e NIGHTLY_BUILD onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6
|
||||
/usr/bin/python3.6 /onnxruntime_src/orttraining/tools/ci_test/run_gpt2_perf_test.py
|
||||
--binary_dir /build/RelWithDebInfo
|
||||
--training_data_root /build/gpt2_data
|
||||
--model_root /build/gpt2_models
|
||||
displayName: 'Run gpt-2 performance tests'
|
||||
|
||||
# generate jdbc.properties
|
||||
- script: >
|
||||
mkdir -p $(Build.SourcesDirectory)/tools/perf_util/src/main/resources &&
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
package com.msft.send_perf_metrics;
|
||||
|
||||
import org.json.simple.JSONArray;
|
||||
import org.json.simple.JSONObject;
|
||||
import org.json.simple.parser.JSONParser;
|
||||
|
||||
|
|
@ -13,11 +12,9 @@ import java.nio.file.SimpleFileVisitor;
|
|||
import java.nio.file.attribute.BasicFileAttributes;
|
||||
|
||||
import java.sql.Connection;
|
||||
import java.sql.Types;
|
||||
import java.sql.PreparedStatement;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
public class App {
|
||||
|
||||
|
|
@ -94,63 +91,57 @@ public class App {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
static private void loadMetricsIntoMySQL(java.sql.Connection conn, String commit_id, String batch_id,
|
||||
JSONObject json_object) throws Exception {
|
||||
|
||||
try (java.sql.PreparedStatement st = conn.prepareStatement(
|
||||
"INSERT INTO perf_test_training_data (BatchId,CommitId,Model,ModelName,DisplayName,UseMixedPrecision,Optimizer,BatchSize,SeqLen,PredictionsPerSeq," +
|
||||
"NumOfBatches,WeightUpdateSteps,Round,GradAccSteps,AvgTimePerBatch,Throughput,StabilizedThroughput,TotalTime,AvgCPU,Memory,RunConfig,Time) " +
|
||||
"values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,Now())"
|
||||
+ " ON DUPLICATE KEY UPDATE AvgTimePerBatch=?,Throughput=?,StabilizedThroughput=?,TotalTime=?,AvgCPU=?,Memory=?")) {
|
||||
// field name -> json value
|
||||
Map<String, Object> field_mapping = new LinkedHashMap();
|
||||
Set<String> update_on_duplicate_fields =
|
||||
new LinkedHashSet<> (Arrays.asList("AvgTimePerBatch", "Throughput", "StabilizedThroughput", "TotalTime", "AvgCPU", "Memory"));
|
||||
|
||||
int i = 0;
|
||||
|
||||
// unique key section
|
||||
st.setString(++i, batch_id);
|
||||
st.setString(++i, commit_id.substring(0, 8));
|
||||
st.setString(++i, (String) json_object.get("Model"));
|
||||
st.setString(++i, (String) json_object.get("ModelName"));
|
||||
st.setString(++i, (String) json_object.get("DisplayName"));
|
||||
st.setBoolean(++i, (Boolean) json_object.get("UseMixedPrecision"));
|
||||
st.setString(++i, (String) json_object.get("Optimizer"));
|
||||
st.setInt(++i, (int)(long) json_object.get("BatchSize"));
|
||||
|
||||
// non-key section
|
||||
JSONObject properties = (JSONObject) json_object.get("DerivedProperties");
|
||||
if (properties != null) {
|
||||
if (properties.get("SeqLen") == null) // mysql allows null value in unique key column
|
||||
st.setNull(++i, Types.INTEGER);
|
||||
else
|
||||
st.setInt(++i, Integer.parseInt((String) properties.get("SeqLen")));
|
||||
|
||||
if (properties.get("PredictionsPerSeq") == null) // mysql allows null value in unique key column
|
||||
st.setNull(++i, Types.INTEGER);
|
||||
else
|
||||
st.setInt(++i, Integer.parseInt((String) properties.get("PredictionsPerSeq")));
|
||||
field_mapping.put("BatchId", batch_id);
|
||||
field_mapping.put("CommitId", commit_id.substring(0, 8));
|
||||
json_object.forEach((key, value) -> {
|
||||
if (key.equals("DerivedProperties")) {
|
||||
JSONObject properties = (JSONObject) json_object.get("DerivedProperties");
|
||||
properties.forEach((sub_key, sub_value) -> {
|
||||
field_mapping.put((String)sub_key, sub_value);
|
||||
});
|
||||
} else {
|
||||
st.setNull(++i, Types.INTEGER);
|
||||
st.setNull(++i, Types.INTEGER);
|
||||
field_mapping.put((String)key, value);
|
||||
}
|
||||
});
|
||||
|
||||
// building sql statement
|
||||
StringBuilder sb = new StringBuilder("INSERT INTO perf_test_training_data (");
|
||||
field_mapping.forEach((key, value) -> {
|
||||
sb.append(key).append(",");
|
||||
});
|
||||
sb.append("Time) values (");
|
||||
for(int i = 0; i < field_mapping.size(); i++) {
|
||||
sb.append("?,");
|
||||
}
|
||||
sb.append("Now()) ON DUPLICATE KEY UPDATE ");
|
||||
update_on_duplicate_fields.forEach((key) -> {
|
||||
if(field_mapping.get(key) != null) {
|
||||
sb.append(key).append("=?,");
|
||||
}
|
||||
});
|
||||
|
||||
try (java.sql.PreparedStatement st = conn.prepareStatement(sb.substring(0, sb.length() - 1))) {
|
||||
int i = 0; // param index
|
||||
for (Map.Entry<String, Object> entry : field_mapping.entrySet()) {
|
||||
setSqlParam(++i, st, entry.getValue());
|
||||
}
|
||||
|
||||
st.setInt(++i, (int)(long) json_object.get("NumOfBatches"));
|
||||
st.setInt(++i, (int)(long) json_object.get("WeightUpdateSteps"));
|
||||
st.setInt(++i, (int)(long) json_object.get("Round"));
|
||||
st.setInt(++i, (int)(long) json_object.get("GradAccSteps"));
|
||||
st.setFloat(++i, (float)(double) json_object.get("AvgTimePerBatch")); // ms
|
||||
st.setFloat(++i, (float)(double) json_object.get("Throughput")); // examples/sec
|
||||
st.setFloat(++i, (float)(double) json_object.get("StabilizedThroughput")); // examples/sec
|
||||
st.setFloat(++i, (float)(double) json_object.get("TotalTime")); // secs
|
||||
st.setInt(++i, (int)(long) json_object.get("AvgCPU"));
|
||||
st.setInt(++i, (int)((long) json_object.get("Memory") >> 20)); // mb
|
||||
st.setString(++i, (String) json_object.get("RunConfig"));
|
||||
|
||||
// update section
|
||||
st.setFloat(++i, (float)(double) json_object.get("AvgTimePerBatch")); // ms
|
||||
st.setFloat(++i, (float)(double) json_object.get("Throughput")); // examples/sec
|
||||
st.setFloat(++i, (float)(double) json_object.get("StabilizedThroughput")); // examples/sec
|
||||
st.setFloat(++i, (float)(double) json_object.get("TotalTime")); // secs
|
||||
st.setInt(++i, (int)((long) json_object.get("Memory") >> 20)); // mb
|
||||
st.setString(++i, (String) json_object.get("RunConfig"));
|
||||
for(String key : update_on_duplicate_fields) {
|
||||
Object value = field_mapping.get(key);
|
||||
if(value != null) {
|
||||
setSqlParam(++i, st, value);
|
||||
}
|
||||
}
|
||||
|
||||
st.executeUpdate();
|
||||
} catch (Exception e) {
|
||||
|
|
@ -160,4 +151,18 @@ public class App {
|
|||
|
||||
}
|
||||
|
||||
static void setSqlParam(int param_index, PreparedStatement st, Object value) throws Exception {
|
||||
if (value instanceof String) {
|
||||
st.setString(param_index, (String) value);
|
||||
} else if (value instanceof Long) {
|
||||
st.setInt(param_index, (int) (long) value);
|
||||
} else if (value instanceof Double) {
|
||||
st.setFloat(param_index, (float) (double) value);
|
||||
} else if (value instanceof Boolean) {
|
||||
st.setBoolean(param_index, (Boolean) value);
|
||||
} else {
|
||||
throw new Exception("Unsupported data type:" + value.getClass().getName());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue