From 2e72bbab2cd169903b1e77b439718c1bdc5d50b2 Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 15 Nov 2023 18:18:54 +0000 Subject: [PATCH] Incorrect setting for num_beams in translation and summarization examples (#27519) * Remove the torch main_process_first context manager from TF examples * Correctly set num_beams=1 in our examples, and add a guard in GenerationConfig.validate() * Update src/transformers/generation/configuration_utils.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> --------- Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> --- .../summarization/run_summarization_flax.py | 2 +- .../summarization/run_summarization.py | 2 +- .../pytorch/translation/run_translation.py | 2 +- .../tensorflow/multiple-choice/run_swag.py | 26 +++++++------- .../summarization/run_summarization.py | 36 +++++++++---------- .../tensorflow/translation/run_translation.py | 36 +++++++++---------- .../generation/configuration_utils.py | 4 +++ 7 files changed, 53 insertions(+), 55 deletions(-) diff --git a/examples/flax/summarization/run_summarization_flax.py b/examples/flax/summarization/run_summarization_flax.py index f47c11e4e..a7d6633f6 100644 --- a/examples/flax/summarization/run_summarization_flax.py +++ b/examples/flax/summarization/run_summarization_flax.py @@ -312,7 +312,7 @@ class DataTrainingArguments: default=False, metadata={"help": "Whether to use generate to calculate generative metrics (ROUGE, BLEU)."} ) num_beams: Optional[int] = field( - default=None, + default=1, metadata={ "help": ( "Number of beams to use for evaluation. This argument will be passed to `model.generate`, " diff --git a/examples/pytorch/summarization/run_summarization.py b/examples/pytorch/summarization/run_summarization.py index d7f8b9f1c..d7f543c24 100755 --- a/examples/pytorch/summarization/run_summarization.py +++ b/examples/pytorch/summarization/run_summarization.py @@ -249,7 +249,7 @@ class DataTrainingArguments: }, ) num_beams: Optional[int] = field( - default=None, + default=1, metadata={ "help": ( "Number of beams to use for evaluation. This argument will be passed to ``model.generate``, " diff --git a/examples/pytorch/translation/run_translation.py b/examples/pytorch/translation/run_translation.py index 92af72ccd..6edbe6a99 100755 --- a/examples/pytorch/translation/run_translation.py +++ b/examples/pytorch/translation/run_translation.py @@ -217,7 +217,7 @@ class DataTrainingArguments: }, ) num_beams: Optional[int] = field( - default=None, + default=1, metadata={ "help": ( "Number of beams to use for evaluation. This argument will be passed to ``model.generate``, " diff --git a/examples/tensorflow/multiple-choice/run_swag.py b/examples/tensorflow/multiple-choice/run_swag.py index e78becda8..db73e137b 100644 --- a/examples/tensorflow/multiple-choice/run_swag.py +++ b/examples/tensorflow/multiple-choice/run_swag.py @@ -415,13 +415,12 @@ def main(): if data_args.max_train_samples is not None: max_train_samples = min(len(train_dataset), data_args.max_train_samples) train_dataset = train_dataset.select(range(max_train_samples)) - with training_args.main_process_first(desc="train dataset map pre-processing"): - train_dataset = train_dataset.map( - preprocess_function, - batched=True, - num_proc=data_args.preprocessing_num_workers, - load_from_cache_file=not data_args.overwrite_cache, - ) + train_dataset = train_dataset.map( + preprocess_function, + batched=True, + num_proc=data_args.preprocessing_num_workers, + load_from_cache_file=not data_args.overwrite_cache, + ) if training_args.do_eval: if "validation" not in raw_datasets: @@ -430,13 +429,12 @@ def main(): if data_args.max_eval_samples is not None: max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples) eval_dataset = eval_dataset.select(range(max_eval_samples)) - with training_args.main_process_first(desc="validation dataset map pre-processing"): - eval_dataset = eval_dataset.map( - preprocess_function, - batched=True, - num_proc=data_args.preprocessing_num_workers, - load_from_cache_file=not data_args.overwrite_cache, - ) + eval_dataset = eval_dataset.map( + preprocess_function, + batched=True, + num_proc=data_args.preprocessing_num_workers, + load_from_cache_file=not data_args.overwrite_cache, + ) if data_args.pad_to_max_length: data_collator = DefaultDataCollator(return_tensors="np") diff --git a/examples/tensorflow/summarization/run_summarization.py b/examples/tensorflow/summarization/run_summarization.py index 3ca57b033..c60893399 100644 --- a/examples/tensorflow/summarization/run_summarization.py +++ b/examples/tensorflow/summarization/run_summarization.py @@ -238,7 +238,7 @@ class DataTrainingArguments: }, ) num_beams: Optional[int] = field( - default=None, + default=1, metadata={ "help": ( "Number of beams to use for evaluation. This argument will be passed to ``model.generate``, " @@ -488,15 +488,14 @@ def main(): if data_args.max_train_samples is not None: max_train_samples = min(len(train_dataset), data_args.max_train_samples) train_dataset = train_dataset.select(range(max_train_samples)) - with training_args.main_process_first(desc="train dataset map pre-processing"): - train_dataset = train_dataset.map( - preprocess_function, - batched=True, - num_proc=data_args.preprocessing_num_workers, - remove_columns=column_names, - load_from_cache_file=not data_args.overwrite_cache, - desc="Running tokenizer on train dataset", - ) + train_dataset = train_dataset.map( + preprocess_function, + batched=True, + num_proc=data_args.preprocessing_num_workers, + remove_columns=column_names, + load_from_cache_file=not data_args.overwrite_cache, + desc="Running tokenizer on train dataset", + ) else: train_dataset = None @@ -508,15 +507,14 @@ def main(): if data_args.max_eval_samples is not None: max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples) eval_dataset = eval_dataset.select(range(max_eval_samples)) - with training_args.main_process_first(desc="validation dataset map pre-processing"): - eval_dataset = eval_dataset.map( - preprocess_function, - batched=True, - num_proc=data_args.preprocessing_num_workers, - remove_columns=column_names, - load_from_cache_file=not data_args.overwrite_cache, - desc="Running tokenizer on validation dataset", - ) + eval_dataset = eval_dataset.map( + preprocess_function, + batched=True, + num_proc=data_args.preprocessing_num_workers, + remove_columns=column_names, + load_from_cache_file=not data_args.overwrite_cache, + desc="Running tokenizer on validation dataset", + ) else: eval_dataset = None # endregion diff --git a/examples/tensorflow/translation/run_translation.py b/examples/tensorflow/translation/run_translation.py index e7fc47b33..7dd926774 100644 --- a/examples/tensorflow/translation/run_translation.py +++ b/examples/tensorflow/translation/run_translation.py @@ -226,7 +226,7 @@ class DataTrainingArguments: }, ) num_beams: Optional[int] = field( - default=None, + default=1, metadata={ "help": ( "Number of beams to use for evaluation. This argument will be passed to ``model.generate``, " @@ -454,15 +454,14 @@ def main(): if data_args.max_train_samples is not None: max_train_samples = min(len(train_dataset), data_args.max_train_samples) train_dataset = train_dataset.select(range(max_train_samples)) - with training_args.main_process_first(desc="train dataset map pre-processing"): - train_dataset = train_dataset.map( - preprocess_function, - batched=True, - num_proc=data_args.preprocessing_num_workers, - remove_columns=column_names, - load_from_cache_file=not data_args.overwrite_cache, - desc="Running tokenizer on train dataset", - ) + train_dataset = train_dataset.map( + preprocess_function, + batched=True, + num_proc=data_args.preprocessing_num_workers, + remove_columns=column_names, + load_from_cache_file=not data_args.overwrite_cache, + desc="Running tokenizer on train dataset", + ) else: train_dataset = None @@ -474,15 +473,14 @@ def main(): if data_args.max_eval_samples is not None: max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples) eval_dataset = eval_dataset.select(range(max_eval_samples)) - with training_args.main_process_first(desc="validation dataset map pre-processing"): - eval_dataset = eval_dataset.map( - preprocess_function, - batched=True, - num_proc=data_args.preprocessing_num_workers, - remove_columns=column_names, - load_from_cache_file=not data_args.overwrite_cache, - desc="Running tokenizer on validation dataset", - ) + eval_dataset = eval_dataset.map( + preprocess_function, + batched=True, + num_proc=data_args.preprocessing_num_workers, + remove_columns=column_names, + load_from_cache_file=not data_args.overwrite_cache, + desc="Running tokenizer on validation dataset", + ) else: eval_dataset = None # endregion diff --git a/src/transformers/generation/configuration_utils.py b/src/transformers/generation/configuration_utils.py index c53738f14..6b0b434ec 100644 --- a/src/transformers/generation/configuration_utils.py +++ b/src/transformers/generation/configuration_utils.py @@ -409,6 +409,10 @@ class GenerationConfig(PushToHubMixin): ) # 2. detect beam-only parameterization when not in beam mode + if self.num_beams is None: + logging.warning("`num_beams` is set to None - defaulting to 1.", UserWarning) + self.num_beams = 1 + if self.num_beams == 1: single_beam_wrong_parameter_msg = ( "`num_beams` is set to 1. However, `{flag_name}` is set to `{flag_value}` -- this flag is only used "