mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-01 23:30:35 +00:00
frontend test to use random seed (#3209)
frontend test to use random seed
This commit is contained in:
parent
b35468289a
commit
1ddfe1249b
3 changed files with 32 additions and 24 deletions
|
|
@ -38,7 +38,6 @@ def bert_model_description():
|
|||
num_classes=vocab_size)
|
||||
next_sentence_labels_desc = IODescription('next_sentence_labels', ['batch', ], torch.int64, num_classes=2)
|
||||
loss_desc = IODescription('loss', [], torch.float32)
|
||||
# probability_desc = IODescription('probability', ['batch', 10], torch.float32)
|
||||
|
||||
return ModelDescription([input_ids_desc, segment_ids_desc, input_mask_desc, masked_lm_labels_desc,
|
||||
next_sentence_labels_desc], [loss_desc])
|
||||
|
|
@ -72,7 +71,8 @@ def runBertTrainingTest(gradient_accumulation_steps, use_mixed_precision, allred
|
|||
gradient_accumulation_steps=gradient_accumulation_steps,
|
||||
world_rank=0, world_size=1,
|
||||
use_mixed_precision=use_mixed_precision,
|
||||
allreduce_post_accumulation=allreduce_post_accumulation)
|
||||
allreduce_post_accumulation=allreduce_post_accumulation,
|
||||
seed=1)
|
||||
|
||||
loss_scaler = LossScaler(model.loss_scale_input_name, True)
|
||||
|
||||
|
|
@ -134,44 +134,42 @@ class TestOrtTrainer(unittest.TestCase):
|
|||
def testBertTrainingBasic(self):
|
||||
torch.manual_seed(1)
|
||||
expected_losses = [
|
||||
11.050175666809082, 11.16925048828125, 11.017821311950684, 11.052311897277832,
|
||||
10.89547061920166, 10.996326446533203, 11.079578399658203, 10.966521263122559]
|
||||
expected_eval_loss = [11.05634880065918]
|
||||
11.032349586486816, 11.165414810180664, 11.018413543701172, 11.050261497497559,
|
||||
10.855697631835938, 10.947554588317871, 11.083847999572754, 10.97836685180664]
|
||||
expected_eval_loss = [10.972074508666992]
|
||||
actual_losses, actual_eval_loss = runBertTrainingTest(
|
||||
gradient_accumulation_steps=1, use_mixed_precision=False, allreduce_post_accumulation=False)
|
||||
|
||||
# to update expected outcomes, enable pdb and run the test with -s and copy paste outputs
|
||||
# print('actual_losses ', actual_losses)
|
||||
# print('eval_loss', actual_eval_loss)
|
||||
print('actual_losses ', actual_losses)
|
||||
print('eval_loss', actual_eval_loss)
|
||||
# import pdb; pdb.set_trace()
|
||||
|
||||
rtol = 1e-01
|
||||
assert_allclose(expected_losses, actual_losses, rtol=rtol, err_msg="loss mismatch")
|
||||
assert_allclose(expected_eval_loss, actual_eval_loss, rtol=rtol, err_msg="evaluation loss mismatch")
|
||||
assert_allclose(expected_losses, actual_losses, err_msg="loss mismatch")
|
||||
assert_allclose(expected_eval_loss, actual_eval_loss, err_msg="evaluation loss mismatch")
|
||||
|
||||
def testBertTrainingGradientAccumulation(self):
|
||||
torch.manual_seed(1)
|
||||
# this commented expected results are for runing test individually (pytest with -k).
|
||||
# expected_losses = [
|
||||
# 11.050175666809082, 11.16925048828125, 11.017815589904785, 11.0523099899292,
|
||||
# 10.895469665527344, 10.996331214904785, 11.079588890075684, 10.966512680053711]
|
||||
# expected_eval_loss = [11.05636978149414]
|
||||
# 11.071269035339355, 10.996841430664062, 11.06226921081543, 10.981647491455078,
|
||||
# 11.032355308532715, 11.04256534576416, 10.976116180419922, 11.065701484680176]
|
||||
# expected_eval_loss = [10.991236686706543]
|
||||
expected_losses = [
|
||||
11.041119575500488, 11.142148971557617, 11.022183418273926, 11.047553062438965,
|
||||
10.866510391235352, 10.95550537109375, 11.083690643310547, 11.002318382263184]
|
||||
expected_eval_loss = [10.977485656738281]
|
||||
11.026690483093262, 11.117761611938477, 11.010371208190918, 11.068782806396484,
|
||||
10.894888877868652, 10.923206329345703, 11.06037425994873, 11.008777618408203]
|
||||
expected_eval_loss = [11.011880874633789]
|
||||
|
||||
actual_losses, actual_eval_loss = runBertTrainingTest(
|
||||
gradient_accumulation_steps=4, use_mixed_precision=False, allreduce_post_accumulation=False)
|
||||
|
||||
# to update expected outcomes, enable pdb and run the test with -s and copy paste outputs
|
||||
# print('actual_losses ', actual_losses)
|
||||
# print('eval_loss', actual_eval_loss)
|
||||
print('actual_losses ', actual_losses)
|
||||
print('eval_loss', actual_eval_loss)
|
||||
# import pdb; pdb.set_trace()
|
||||
|
||||
rtol = 1e-01
|
||||
assert_allclose(expected_losses, actual_losses, rtol=rtol, err_msg="loss mismatch")
|
||||
assert_allclose(expected_eval_loss, actual_eval_loss, rtol=rtol, err_msg="evaluation loss mismatch")
|
||||
assert_allclose(expected_losses, actual_losses, err_msg="loss mismatch")
|
||||
assert_allclose(expected_eval_loss, actual_eval_loss, err_msg="evaluation loss mismatch")
|
||||
|
||||
def testBertTrainingMixedPrecision(self):
|
||||
# skip the test due to the lack of mixed precision capacity of ort CI.
|
||||
|
|
|
|||
|
|
@ -384,7 +384,9 @@ def create_ort_training_session_with_optimizer(model, device, training_optimizer
|
|||
map_optimizer_attributes, world_rank=-1, world_size=1,
|
||||
gradient_accumulation_steps=1, bind_parameters=False,
|
||||
use_mixed_precision=False, allreduce_post_accumulation=False,
|
||||
partition_optimizer=False, enable_grad_norm_clip=True,
|
||||
partition_optimizer=False,
|
||||
enable_grad_norm_clip=True,
|
||||
seed=None,
|
||||
frozen_weights=[]):
|
||||
output_name = model.graph.output[0].name
|
||||
ort_parameters = ort.TrainingParameters()
|
||||
|
|
@ -396,6 +398,8 @@ def create_ort_training_session_with_optimizer(model, device, training_optimizer
|
|||
ort_parameters.use_mixed_precision = use_mixed_precision
|
||||
ort_parameters.allreduce_post_accumulation = allreduce_post_accumulation
|
||||
ort_parameters.partition_optimizer = partition_optimizer
|
||||
if seed is not None:
|
||||
ort_parameters.seed = seed
|
||||
ort_parameters.enable_grad_norm_clip = enable_grad_norm_clip
|
||||
|
||||
output_types = {}
|
||||
|
|
@ -516,6 +520,7 @@ class ORTTrainer():
|
|||
learning_rate_description, device, gradient_accumulation_steps=1, postprocess_model=None,
|
||||
world_rank=0, world_size=1, use_mixed_precision=False, allreduce_post_accumulation=False,
|
||||
global_step=0, get_lr_this_step=None, loss_scaler=None, partition_optimizer=False,
|
||||
seed=None,
|
||||
enable_grad_norm_clip=True, frozen_weights=[]):
|
||||
super(ORTTrainer, self).__init__()
|
||||
"""
|
||||
|
|
@ -546,6 +551,7 @@ class ORTTrainer():
|
|||
use_mixed_precision:
|
||||
allreduce_post_accumulation:
|
||||
partition_optimizer: Whether to partition the optimizer state. (default=False)
|
||||
seed: allow user code to set backend static random seed.
|
||||
"""
|
||||
self.is_train = True
|
||||
|
||||
|
|
@ -593,6 +599,7 @@ class ORTTrainer():
|
|||
self.enable_grad_norm_clip_ = enable_grad_norm_clip
|
||||
self.frozen_weights_ = frozen_weights
|
||||
self.loss_scale_input_name = ''
|
||||
self.seed_ = seed
|
||||
|
||||
self._init_session()
|
||||
|
||||
|
|
@ -608,7 +615,9 @@ class ORTTrainer():
|
|||
self.world_rank, self.world_size,
|
||||
self.gradient_accumulation_steps, bind_parameters=False,
|
||||
use_mixed_precision=self.use_mixed_precision, allreduce_post_accumulation=self.allreduce_post_accumulation_,
|
||||
partition_optimizer=self.partition_optimizer_, enable_grad_norm_clip=self.enable_grad_norm_clip_,
|
||||
partition_optimizer=self.partition_optimizer_,
|
||||
enable_grad_norm_clip=self.enable_grad_norm_clip_,
|
||||
seed=self.seed_,
|
||||
frozen_weights=self.frozen_weights_)
|
||||
|
||||
self.loss_scale_input_name = self.session.loss_scale_input_name
|
||||
|
|
|
|||
|
|
@ -187,7 +187,8 @@ void addObjectMethodsForTraining(py::module& m) {
|
|||
.def_readwrite("world_size", &TrainingParameters::world_size)
|
||||
.def_readwrite("gradient_accumulation_steps", &TrainingParameters::gradient_accumulation_steps)
|
||||
.def_readwrite("partition_optimizer", &TrainingParameters::partition_optimizer)
|
||||
.def_readwrite("enable_grad_norm_clip", &TrainingParameters::enable_grad_norm_clip);
|
||||
.def_readwrite("enable_grad_norm_clip", &TrainingParameters::enable_grad_norm_clip)
|
||||
.def_readwrite("seed", &TrainingParameters::seed);
|
||||
|
||||
py::class_<TrainingConfigurationResult> config_result(m, "TrainingConfigurationResult", "pbdoc(Configuration result for training.)pbdoc");
|
||||
config_result.def(py::init())
|
||||
|
|
|
|||
Loading…
Reference in a new issue