From f2acf5fe341c3799a4e7e396fcda4761c303c32f Mon Sep 17 00:00:00 2001 From: "[[ -z $EMAIL ]] && read -e -p \"Enter your email (for git configuration): \" EMAIL" Date: Tue, 21 Jan 2025 09:25:18 -0500 Subject: [PATCH] Expound x2 --- src/transformers/trainer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 43287f8b7..bf713b58a 100644 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -1479,7 +1479,7 @@ class Trainer: num_train_tokens = None # Since the actual `max_steps` depends on if we have a dataloader length, number of epochs, or max_steps, - # we keep it defined here first as the "base case" if we don't have a dataloader length later + # we keep it defined here first as the "base case" if we don't have a dataloader length later (Case 1) max_steps = args.max_steps # If max_steps is negative, we use the number of epochs to determine the number of total steps later epoch_based = max_steps < 0 @@ -1497,8 +1497,8 @@ class Trainer: len_dataloader = len(train_dataloader) num_update_steps_per_epoch = max(len_dataloader // args.gradient_accumulation_steps, 1) num_examples = self.num_examples(train_dataloader) + # Case 3, we have a length but are using epochs, we can extrapolate the number of steps if epoch_based: - # Since we have a dataloader length and are utilizing epochs, we can extrapolate the number of steps max_steps = math.ceil(args.num_train_epochs * num_update_steps_per_epoch) else: # Otherwise, we calculate the number of epochs from max_steps & update steps