frontend test to use random seed (#3209)

frontend test to use random seed
This commit is contained in:
liqunfu 2020-04-08 10:03:07 -07:00 committed by GitHub
parent b35468289a
commit 1ddfe1249b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 32 additions and 24 deletions

View file

@ -38,7 +38,6 @@ def bert_model_description():
num_classes=vocab_size)
next_sentence_labels_desc = IODescription('next_sentence_labels', ['batch', ], torch.int64, num_classes=2)
loss_desc = IODescription('loss', [], torch.float32)
# probability_desc = IODescription('probability', ['batch', 10], torch.float32)
return ModelDescription([input_ids_desc, segment_ids_desc, input_mask_desc, masked_lm_labels_desc,
next_sentence_labels_desc], [loss_desc])
@ -72,7 +71,8 @@ def runBertTrainingTest(gradient_accumulation_steps, use_mixed_precision, allred
gradient_accumulation_steps=gradient_accumulation_steps,
world_rank=0, world_size=1,
use_mixed_precision=use_mixed_precision,
allreduce_post_accumulation=allreduce_post_accumulation)
allreduce_post_accumulation=allreduce_post_accumulation,
seed=1)
loss_scaler = LossScaler(model.loss_scale_input_name, True)
@ -134,44 +134,42 @@ class TestOrtTrainer(unittest.TestCase):
def testBertTrainingBasic(self):
torch.manual_seed(1)
expected_losses = [
11.050175666809082, 11.16925048828125, 11.017821311950684, 11.052311897277832,
10.89547061920166, 10.996326446533203, 11.079578399658203, 10.966521263122559]
expected_eval_loss = [11.05634880065918]
11.032349586486816, 11.165414810180664, 11.018413543701172, 11.050261497497559,
10.855697631835938, 10.947554588317871, 11.083847999572754, 10.97836685180664]
expected_eval_loss = [10.972074508666992]
actual_losses, actual_eval_loss = runBertTrainingTest(
gradient_accumulation_steps=1, use_mixed_precision=False, allreduce_post_accumulation=False)
# to update expected outcomes, enable pdb and run the test with -s and copy paste outputs
# print('actual_losses ', actual_losses)
# print('eval_loss', actual_eval_loss)
print('actual_losses ', actual_losses)
print('eval_loss', actual_eval_loss)
# import pdb; pdb.set_trace()
rtol = 1e-01
assert_allclose(expected_losses, actual_losses, rtol=rtol, err_msg="loss mismatch")
assert_allclose(expected_eval_loss, actual_eval_loss, rtol=rtol, err_msg="evaluation loss mismatch")
assert_allclose(expected_losses, actual_losses, err_msg="loss mismatch")
assert_allclose(expected_eval_loss, actual_eval_loss, err_msg="evaluation loss mismatch")
def testBertTrainingGradientAccumulation(self):
torch.manual_seed(1)
# this commented expected results are for runing test individually (pytest with -k).
# expected_losses = [
# 11.050175666809082, 11.16925048828125, 11.017815589904785, 11.0523099899292,
# 10.895469665527344, 10.996331214904785, 11.079588890075684, 10.966512680053711]
# expected_eval_loss = [11.05636978149414]
# 11.071269035339355, 10.996841430664062, 11.06226921081543, 10.981647491455078,
# 11.032355308532715, 11.04256534576416, 10.976116180419922, 11.065701484680176]
# expected_eval_loss = [10.991236686706543]
expected_losses = [
11.041119575500488, 11.142148971557617, 11.022183418273926, 11.047553062438965,
10.866510391235352, 10.95550537109375, 11.083690643310547, 11.002318382263184]
expected_eval_loss = [10.977485656738281]
11.026690483093262, 11.117761611938477, 11.010371208190918, 11.068782806396484,
10.894888877868652, 10.923206329345703, 11.06037425994873, 11.008777618408203]
expected_eval_loss = [11.011880874633789]
actual_losses, actual_eval_loss = runBertTrainingTest(
gradient_accumulation_steps=4, use_mixed_precision=False, allreduce_post_accumulation=False)
# to update expected outcomes, enable pdb and run the test with -s and copy paste outputs
# print('actual_losses ', actual_losses)
# print('eval_loss', actual_eval_loss)
print('actual_losses ', actual_losses)
print('eval_loss', actual_eval_loss)
# import pdb; pdb.set_trace()
rtol = 1e-01
assert_allclose(expected_losses, actual_losses, rtol=rtol, err_msg="loss mismatch")
assert_allclose(expected_eval_loss, actual_eval_loss, rtol=rtol, err_msg="evaluation loss mismatch")
assert_allclose(expected_losses, actual_losses, err_msg="loss mismatch")
assert_allclose(expected_eval_loss, actual_eval_loss, err_msg="evaluation loss mismatch")
def testBertTrainingMixedPrecision(self):
# skip the test due to the lack of mixed precision capacity of ort CI.

View file

@ -384,7 +384,9 @@ def create_ort_training_session_with_optimizer(model, device, training_optimizer
map_optimizer_attributes, world_rank=-1, world_size=1,
gradient_accumulation_steps=1, bind_parameters=False,
use_mixed_precision=False, allreduce_post_accumulation=False,
partition_optimizer=False, enable_grad_norm_clip=True,
partition_optimizer=False,
enable_grad_norm_clip=True,
seed=None,
frozen_weights=[]):
output_name = model.graph.output[0].name
ort_parameters = ort.TrainingParameters()
@ -396,6 +398,8 @@ def create_ort_training_session_with_optimizer(model, device, training_optimizer
ort_parameters.use_mixed_precision = use_mixed_precision
ort_parameters.allreduce_post_accumulation = allreduce_post_accumulation
ort_parameters.partition_optimizer = partition_optimizer
if seed is not None:
ort_parameters.seed = seed
ort_parameters.enable_grad_norm_clip = enable_grad_norm_clip
output_types = {}
@ -516,6 +520,7 @@ class ORTTrainer():
learning_rate_description, device, gradient_accumulation_steps=1, postprocess_model=None,
world_rank=0, world_size=1, use_mixed_precision=False, allreduce_post_accumulation=False,
global_step=0, get_lr_this_step=None, loss_scaler=None, partition_optimizer=False,
seed=None,
enable_grad_norm_clip=True, frozen_weights=[]):
super(ORTTrainer, self).__init__()
"""
@ -546,6 +551,7 @@ class ORTTrainer():
use_mixed_precision:
allreduce_post_accumulation:
partition_optimizer: Whether to partition the optimizer state. (default=False)
seed: allow user code to set backend static random seed.
"""
self.is_train = True
@ -593,6 +599,7 @@ class ORTTrainer():
self.enable_grad_norm_clip_ = enable_grad_norm_clip
self.frozen_weights_ = frozen_weights
self.loss_scale_input_name = ''
self.seed_ = seed
self._init_session()
@ -608,7 +615,9 @@ class ORTTrainer():
self.world_rank, self.world_size,
self.gradient_accumulation_steps, bind_parameters=False,
use_mixed_precision=self.use_mixed_precision, allreduce_post_accumulation=self.allreduce_post_accumulation_,
partition_optimizer=self.partition_optimizer_, enable_grad_norm_clip=self.enable_grad_norm_clip_,
partition_optimizer=self.partition_optimizer_,
enable_grad_norm_clip=self.enable_grad_norm_clip_,
seed=self.seed_,
frozen_weights=self.frozen_weights_)
self.loss_scale_input_name = self.session.loss_scale_input_name

View file

@ -187,7 +187,8 @@ void addObjectMethodsForTraining(py::module& m) {
.def_readwrite("world_size", &TrainingParameters::world_size)
.def_readwrite("gradient_accumulation_steps", &TrainingParameters::gradient_accumulation_steps)
.def_readwrite("partition_optimizer", &TrainingParameters::partition_optimizer)
.def_readwrite("enable_grad_norm_clip", &TrainingParameters::enable_grad_norm_clip);
.def_readwrite("enable_grad_norm_clip", &TrainingParameters::enable_grad_norm_clip)
.def_readwrite("seed", &TrainingParameters::seed);
py::class_<TrainingConfigurationResult> config_result(m, "TrainingConfigurationResult", "pbdoc(Configuration result for training.)pbdoc");
config_result.def(py::init())