From f82a2a5e8e6827343322a4a9831924c5bb9bd2b2 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Wed, 8 Jul 2020 12:11:09 +0200 Subject: [PATCH] [Benchmark] Add benchmarks for TF Training (#5594) * tf_train * adapt timing for tpu * fix timing * fix timing * fix timing * fix timing * update notebook * add tests --- notebooks/05-benchmark.ipynb | 6 +- src/transformers/benchmark/benchmark.py | 6 +- src/transformers/benchmark/benchmark_tf.py | 75 +++++++++++++++++++--- tests/test_benchmark_tf.py | 31 +++++++++ 4 files changed, 103 insertions(+), 15 deletions(-) diff --git a/notebooks/05-benchmark.ipynb b/notebooks/05-benchmark.ipynb index 67e6bff0f..fcecfcd11 100644 --- a/notebooks/05-benchmark.ipynb +++ b/notebooks/05-benchmark.ipynb @@ -312,8 +312,8 @@ ":-- | :--- | :--- | :--- | :--- | :--- | :--- | :--- |\n", "**Speed - Inference** | ✔ | ✔ | ✔ | ✔ | ✔ | ✘ | ✔ |\n", "**Memory - Inference** | ✔ | ✔ | ✔ | ✔ | ✔ | ✘ | ✘ |\n", - "**Speed - Train** | ✘ | ✘ | ✘ | ✘ | ✘ | ✘ | ✘ |\n", - "**Memory - Train** | ✘ | ✘ | ✘ | ✘ | ✘ | ✘ | ✘ |\n", + "**Speed - Train** | ✔ | ✘ | ✔ | ✘ | ✘ | ✘ | ✔ |\n", + "**Memory - Train** | ✔ | ✘ | ✔ | ✘ | ✘ | ✘ | ✘ |\n", "\n", "* *eager execution* means that the function is run in the eager execution environment of TensorFlow 2, see [here](https://www.tensorflow.org/guide/eager).\n", "\n", @@ -321,7 +321,7 @@ "\n", "* *FP16* stands for TensorFlow's mixed-precision package and is analogous to PyTorch's FP16 feature, see [here](https://www.tensorflow.org/guide/mixed_precision).\n", "\n", - "***Note***: In ~1,2 weeks it will also be possible to benchmark training in TensorFlow.\n", + "***Note***: Benchmark training in TensorFlow is not included in v3.0.2, but available in master.\n", "\n", "\n", "This notebook will show the user how to use `PyTorchBenchmark` and `TensorFlowBenchmark` for two different scenarios:\n", diff --git a/src/transformers/benchmark/benchmark.py b/src/transformers/benchmark/benchmark.py index 37d2e26aa..eb7c32d81 100644 --- a/src/transformers/benchmark/benchmark.py +++ b/src/transformers/benchmark/benchmark.py @@ -157,7 +157,7 @@ class PyTorchBenchmark(Benchmark): else: train_model = model - model.eval() + model.train() model.to(self.args.device) # encoder-decoder has vocab size saved differently @@ -175,12 +175,12 @@ class PyTorchBenchmark(Benchmark): def compute_loss_and_backprob_encoder(): loss = train_model(input_ids, labels=input_ids)[0] loss.backward() - train_model.zero_grad() + return loss def compute_loss_and_backprob_encoder_decoder(): loss = train_model(input_ids, decoder_input_ids=input_ids, labels=input_ids)[0] loss.backward() - train_model.zero_grad() + return loss _train = ( compute_loss_and_backprob_encoder_decoder diff --git a/src/transformers/benchmark/benchmark_tf.py b/src/transformers/benchmark/benchmark_tf.py index 9d1a339f5..cb67a6a89 100644 --- a/src/transformers/benchmark/benchmark_tf.py +++ b/src/transformers/benchmark/benchmark_tf.py @@ -24,7 +24,13 @@ import timeit from functools import wraps from typing import Callable, Optional -from transformers import TF_MODEL_MAPPING, PretrainedConfig, is_py3nvml_available, is_tf_available +from transformers import ( + TF_MODEL_MAPPING, + TF_MODEL_WITH_LM_HEAD_MAPPING, + PretrainedConfig, + is_py3nvml_available, + is_tf_available, +) from .benchmark_utils import ( Benchmark, @@ -92,10 +98,11 @@ class TensorFlowBenchmark(Benchmark): _inference = self._prepare_inference_func(model_name, batch_size, sequence_length) return self._measure_speed(_inference) - def _train_speed(self, model_name, batch_size, sequence_length): - raise NotImplementedError( - "Training is currently not really implemented." "Wait for TFTrainer to support CLM and MLM." - ) + def _train_speed(self, model_name: str, batch_size: int, sequence_length: int) -> float: + strategy = self.args.strategy + assert strategy is not None, "A device strategy has to be initialized before using TensorFlow." + _train = self._prepare_train_func(model_name, batch_size, sequence_length) + return self._measure_speed(_train) def _inference_memory( self, model_name: str, batch_size: int, sequence_length: int @@ -108,10 +115,16 @@ class TensorFlowBenchmark(Benchmark): _inference = self._prepare_inference_func(model_name, batch_size, sequence_length) return self._measure_memory(_inference) - def _train_memory(self, model_name, batch_size, sequence_length): - raise NotImplementedError( - "Training is currently not really implemented. Wait for TFTrainer to support CLM and MLM." - ) + def _train_memory( + self, model_name: str, batch_size: int, sequence_length: int + ) -> [Memory, Optional[MemorySummary]]: + if self.args.is_gpu: + tf.config.experimental.set_memory_growth(self.args.gpu_list[self.args.device_idx], True) + strategy = self.args.strategy + assert strategy is not None, "A device strategy has to be initialized before using TensorFlow." + + _train = self._prepare_train_func(model_name, batch_size, sequence_length) + return self._measure_memory(_train) def _prepare_inference_func(self, model_name: str, batch_size: int, sequence_length: int) -> Callable[[], None]: config = self.config_dict[model_name] @@ -149,6 +162,50 @@ class TensorFlowBenchmark(Benchmark): return _inference + def _prepare_train_func(self, model_name: str, batch_size: int, sequence_length: int) -> Callable[[], None]: + config = self.config_dict[model_name] + + assert ( + self.args.eager_mode is False + ), "Training cannot be done in eager mode. Please make sure that `args.eager_mode = False`." + + if self.args.fp16: + raise NotImplementedError("Mixed precision is currently not supported.") + + has_model_class_in_config = hasattr(config, "architecture") and len(config.architectures) > 1 + if not self.args.only_pretrain_model and has_model_class_in_config: + try: + model_class = "TF" + config.architectures[0] # prepend 'TF' for tensorflow model + transformers_module = __import__("transformers", fromlist=[model_class]) + model_cls = getattr(transformers_module, model_class) + model = model_cls(config) + except ImportError: + raise ImportError( + f"{model_class} does not exist. If you just want to test the pretrained model, you might want to set `--only_pretrain_model` or `args.only_pretrain_model=True`." + ) + else: + model = TF_MODEL_WITH_LM_HEAD_MAPPING[config.__class__](config) + + # encoder-decoder has vocab size saved differently + vocab_size = config.vocab_size if hasattr(config, "vocab_size") else config.encoder.vocab_size + input_ids = random_input_ids(batch_size, sequence_length, vocab_size) + + @run_with_tf_optimizations(self.args.eager_mode, self.args.use_xla) + def encoder_decoder_train(): + loss = model(input_ids, decoder_input_ids=input_ids, labels=input_ids, training=True)[0] + gradients = tf.gradients(loss, model.trainable_variables) + return gradients + + @run_with_tf_optimizations(self.args.eager_mode, self.args.use_xla) + def encoder_train(): + loss = model(input_ids, labels=input_ids, training=True)[0] + gradients = tf.gradients(loss, model.trainable_variables) + return gradients + + _train = encoder_decoder_train if config.is_encoder_decoder else encoder_train + + return _train + def _measure_speed(self, func) -> float: with self.args.strategy.scope(): try: diff --git a/tests/test_benchmark_tf.py b/tests/test_benchmark_tf.py index 929d5624c..30ead4c95 100644 --- a/tests/test_benchmark_tf.py +++ b/tests/test_benchmark_tf.py @@ -100,6 +100,37 @@ class TFBenchmarkTest(unittest.TestCase): self.check_results_dict_not_empty(results.time_inference_result) self.check_results_dict_not_empty(results.memory_inference_result) + def test_train_no_configs(self): + MODEL_ID = "sshleifer/tiny-gpt2" + benchmark_args = TensorFlowBenchmarkArguments( + models=[MODEL_ID], + training=True, + no_inference=True, + sequence_lengths=[8], + batch_sizes=[1], + no_multi_process=True, + ) + benchmark = TensorFlowBenchmark(benchmark_args) + results = benchmark.run() + self.check_results_dict_not_empty(results.time_train_result) + self.check_results_dict_not_empty(results.memory_train_result) + + def test_train_with_configs(self): + MODEL_ID = "sshleifer/tiny-gpt2" + config = AutoConfig.from_pretrained(MODEL_ID) + benchmark_args = TensorFlowBenchmarkArguments( + models=[MODEL_ID], + training=True, + no_inference=True, + sequence_lengths=[8], + batch_sizes=[1], + no_multi_process=True, + ) + benchmark = TensorFlowBenchmark(benchmark_args, [config]) + results = benchmark.run() + self.check_results_dict_not_empty(results.time_train_result) + self.check_results_dict_not_empty(results.memory_train_result) + def test_inference_encoder_decoder_with_configs(self): MODEL_ID = "patrickvonplaten/t5-tiny-random" config = AutoConfig.from_pretrained(MODEL_ID)