transformers/tests/optimization/test_optimization_tf.py

# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest

from transformers import is_tf_available
from transformers.testing_utils import require_tf


if is_tf_available():
    import tensorflow as tf
    from tensorflow.python.eager import context
    from tensorflow.python.framework import ops

    from transformers import GradientAccumulator, create_optimizer


@require_tf
class OptimizationFTest(unittest.TestCase):
    def assertListAlmostEqual(self, list1, list2, tol):
        self.assertEqual(len(list1), len(list2))
        for a, b in zip(list1, list2):
            self.assertAlmostEqual(a, b, delta=tol)

    def testGradientAccumulator(self):
        accumulator = GradientAccumulator()
        accumulator([tf.constant([1.0, 2.0])])
        accumulator([tf.constant([-2.0, 1.0])])
        accumulator([tf.constant([-1.0, 2.0])])
        with self.assertRaises(ValueError):
            accumulator([tf.constant([1.0, 1.0]), tf.constant([2.0, 2.0])])
        self.assertEqual(accumulator.step, 3)
        self.assertEqual(len(accumulator.gradients), 1)
        self.assertListAlmostEqual(accumulator.gradients[0].numpy().tolist(), [-2.0, 5.0], tol=1e-2)
        accumulator.reset()
        self.assertEqual(accumulator.step, 0)
        self.assertListAlmostEqual(accumulator.gradients[0].numpy().tolist(), [0.0, 0.0], tol=1e-2)

    def testGradientAccumulatorDistributionStrategy(self):
        context._context = None
        ops.enable_eager_execution_internal()
        physical_devices = tf.config.list_physical_devices("CPU")
        if len(physical_devices) == 1:
            tf.config.set_logical_device_configuration(
                physical_devices[0], [tf.config.LogicalDeviceConfiguration(), tf.config.LogicalDeviceConfiguration()]
            )
        devices = tf.config.list_logical_devices(device_type="CPU")
        strategy = tf.distribute.MirroredStrategy(devices=devices[:2])

        with strategy.scope():
            accumulator = GradientAccumulator()
            variable = tf.Variable([4.0, 3.0])
            optimizer, _ = create_optimizer(5e-5, 10, 5)
            gradient_placeholder = tf.Variable([0.0, 0.0], trainable=False)

        def accumulate_on_replica(gradient):
            accumulator([gradient])

        def apply_on_replica():
            optimizer.apply_gradients(list(zip(accumulator.gradients, [variable])))

        @tf.function
        def accumulate(grad1, grad2):
            with strategy.scope():
                local_variables = strategy.experimental_local_results(gradient_placeholder)
                local_variables[0].assign(grad1)
                local_variables[1].assign(grad2)
                strategy.run(accumulate_on_replica, args=(gradient_placeholder,))

        @tf.function
        def apply_grad():
            with strategy.scope():
                strategy.run(apply_on_replica)

        def _check_local_values(grad1, grad2):
            values = strategy.experimental_local_results(accumulator._gradients[0])
            self.assertListAlmostEqual(values[0].value(), grad1, tol=1e-2)
            self.assertListAlmostEqual(values[1].value(), grad2, tol=1e-2)

        accumulate([1.0, 2.0], [-1.0, 1.0])
        accumulate([3.0, -1.0], [-1.0, -1.0])
        accumulate([-2.0, 2.0], [3.0, -2.0])
        self.assertEqual(accumulator.step, 3)
        _check_local_values([2.0, 3.0], [1.0, -2.0])
        apply_grad()
        self.assertListAlmostEqual(variable.value(), [4.0, 3.0], tol=1e-2)
        accumulator.reset()
        self.assertEqual(accumulator.step, 0)
        _check_local_values([0.0, 0.0], [0.0, 0.0])
Copyright (#8970) * Add copyright everywhere missing * Style 2020-12-07 23:36:34 +00:00			`# Copyright 2020 The HuggingFace Team. All rights reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`

Add few tests on the TF optimization file with some info in the documentation. Complete the README. 2019-12-05 11:56:43 +00:00			`import unittest`

			`from transformers import is_tf_available`
Move tests/utils.py -> transformers/testing_utils.py (#5350) 2020-07-01 14:31:17 +00:00			`from transformers.testing_utils import require_tf`
Remove pytest dependency. (#2093) 2019-12-07 12:46:14 +00:00
Sort imports with isort. This is the result of: $ isort --recursive examples templates transformers utils hubconf.py setup.py 2019-12-21 14:57:32 +00:00
Add few tests on the TF optimization file with some info in the documentation. Complete the README. 2019-12-05 11:56:43 +00:00			`if is_tf_available():`
			`import tensorflow as tf`
			`from tensorflow.python.eager import context`
			`from tensorflow.python.framework import ops`
Update repo to isort v5 (#6686) * Run new isort * More changes * Update CI, CONTRIBUTING and benchmarks 2020-08-24 15:03:01 +00:00
			`from transformers import GradientAccumulator, create_optimizer`
Add few tests on the TF optimization file with some info in the documentation. Complete the README. 2019-12-05 11:56:43 +00:00
Remove pytest dependency. (#2093) 2019-12-07 12:46:14 +00:00
			`@require_tf`
Add few tests on the TF optimization file with some info in the documentation. Complete the README. 2019-12-05 11:56:43 +00:00			`class OptimizationFTest(unittest.TestCase):`
			`def assertListAlmostEqual(self, list1, list2, tol):`
			`self.assertEqual(len(list1), len(list2))`
			`for a, b in zip(list1, list2):`
			`self.assertAlmostEqual(a, b, delta=tol)`
Reformat source code with black. This is the result of: $ black --line-length 119 examples templates transformers utils hubconf.py setup.py There's a lot of fairly long lines in the project. As a consequence, I'm picking the longest widely accepted line length, 119 characters. This is also Thomas' preference, because it allows for explicit variable names, to make the code easier to understand. 2019-12-21 14:46:46 +00:00
Add few tests on the TF optimization file with some info in the documentation. Complete the README. 2019-12-05 11:56:43 +00:00			`def testGradientAccumulator(self):`
			`accumulator = GradientAccumulator()`
			`accumulator([tf.constant([1.0, 2.0])])`
			`accumulator([tf.constant([-2.0, 1.0])])`
			`accumulator([tf.constant([-1.0, 2.0])])`
			`with self.assertRaises(ValueError):`
			`accumulator([tf.constant([1.0, 1.0]), tf.constant([2.0, 2.0])])`
			`self.assertEqual(accumulator.step, 3)`
			`self.assertEqual(len(accumulator.gradients), 1)`
			`self.assertListAlmostEqual(accumulator.gradients[0].numpy().tolist(), [-2.0, 5.0], tol=1e-2)`
			`accumulator.reset()`
			`self.assertEqual(accumulator.step, 0)`
			`self.assertListAlmostEqual(accumulator.gradients[0].numpy().tolist(), [0.0, 0.0], tol=1e-2)`

			`def testGradientAccumulatorDistributionStrategy(self):`
			`context._context = None`
			`ops.enable_eager_execution_internal()`
TF version of the trainer (#4017) * First commit to add a TF version of the trainer. * Make the TF trainer closer to what looks the PT trainer * Refactoring common code between the PT and TF trainer into an util file. * Some bugfix + better similarity with the PT trainer * Add missing class in transformers init * Bugfix over prediction + use classification report instead of simple metrics * Fix name error * Fix optimization tests + style * Apply style * Several bugfix for multi-gpu training * Apply style * Apply style * Add glue example for the TF trainer * Several bugix + address the reviews * Fix on the TF training args file * Add a debug mode * Bugfix in utils_ner.py when segment_ids is None * Apply style * Apply style * Add TPU strategy * Fix selection strategy 2020-05-06 16:56:52 +00:00			`physical_devices = tf.config.list_physical_devices("CPU")`
			`if len(physical_devices) == 1:`
			`tf.config.set_logical_device_configuration(`
			`physical_devices[0], [tf.config.LogicalDeviceConfiguration(), tf.config.LogicalDeviceConfiguration()]`
			`)`
			`devices = tf.config.list_logical_devices(device_type="CPU")`
			`strategy = tf.distribute.MirroredStrategy(devices=devices[:2])`
Add few tests on the TF optimization file with some info in the documentation. Complete the README. 2019-12-05 11:56:43 +00:00
			`with strategy.scope():`
			`accumulator = GradientAccumulator()`
			`variable = tf.Variable([4.0, 3.0])`
Tensorflow improvements (#4530) * Better None gradients handling * Apply Style * Apply Style * Create a loss class per task to compute its respective loss * Add loss classes to the ALBERT TF models * Add loss classes to the BERT TF models * Add question answering and multiple choice to TF Camembert * Remove prints * Add multiple choice model to TF DistilBERT + loss computation * Add question answering model to TF Electra + loss computation * Add token classification, question answering and multiple choice models to TF Flaubert * Add multiple choice model to TF Roberta + loss computation * Add multiple choice model to TF XLM + loss computation * Add multiple choice and question answering models to TF XLM-Roberta * Add multiple choice model to TF XLNet + loss computation * Remove unused parameters * Add task loss classes * Reorder TF imports + add new model classes * Add new model classes * Bugfix in TF T5 model * Bugfix for TF T5 tests * Bugfix in TF T5 model * Fix TF T5 model tests * Fix T5 tests + some renaming * Fix inheritance issue in the AutoX tests * Add tests for TF Flaubert and TF XLM Roberta * Add tests for TF Flaubert and TF XLM Roberta * Remove unused piece of code in the TF trainer * bugfix and remove unused code * Bugfix for TF 2.2 * Apply Style * Divide TFSequenceClassificationAndMultipleChoiceLoss into their two respective name * Apply style * Mirror the PT Trainer in the TF one: fp16, optimizers and tb_writer as class parameter and better dataset handling * Fix TF optimizations tests and apply style * Remove useless parameter * Bugfix and apply style * Fix TF Trainer prediction * Now the TF models return the loss such as their PyTorch couterparts * Apply Style * Ignore some tests output * Take into account the SQuAD cls_index, p_mask and is_impossible parameters for the QuestionAnswering task models. * Fix names for SQuAD data * Apply Style * Fix conflicts with 2.11 release * Fix conflicts with 2.11 * Fix wrongname * Add better documentation on the new create_optimizer function * Fix isort * logging_dir: use same default as PyTorch Co-authored-by: Julien Chaumond <chaumond@gmail.com> 2020-06-04 23:45:53 +00:00			`optimizer, _ = create_optimizer(5e-5, 10, 5)`
Add few tests on the TF optimization file with some info in the documentation. Complete the README. 2019-12-05 11:56:43 +00:00			`gradient_placeholder = tf.Variable([0.0, 0.0], trainable=False)`

			`def accumulate_on_replica(gradient):`
			`accumulator([gradient])`

			`def apply_on_replica():`
TF version of the trainer (#4017) * First commit to add a TF version of the trainer. * Make the TF trainer closer to what looks the PT trainer * Refactoring common code between the PT and TF trainer into an util file. * Some bugfix + better similarity with the PT trainer * Add missing class in transformers init * Bugfix over prediction + use classification report instead of simple metrics * Fix name error * Fix optimization tests + style * Apply style * Several bugfix for multi-gpu training * Apply style * Apply style * Add glue example for the TF trainer * Several bugix + address the reviews * Fix on the TF training args file * Add a debug mode * Bugfix in utils_ner.py when segment_ids is None * Apply style * Apply style * Add TPU strategy * Fix selection strategy 2020-05-06 16:56:52 +00:00			`optimizer.apply_gradients(list(zip(accumulator.gradients, [variable])))`
Add few tests on the TF optimization file with some info in the documentation. Complete the README. 2019-12-05 11:56:43 +00:00
			`@tf.function`
			`def accumulate(grad1, grad2):`
			`with strategy.scope():`
TF version of the trainer (#4017) * First commit to add a TF version of the trainer. * Make the TF trainer closer to what looks the PT trainer * Refactoring common code between the PT and TF trainer into an util file. * Some bugfix + better similarity with the PT trainer * Add missing class in transformers init * Bugfix over prediction + use classification report instead of simple metrics * Fix name error * Fix optimization tests + style * Apply style * Several bugfix for multi-gpu training * Apply style * Apply style * Add glue example for the TF trainer * Several bugix + address the reviews * Fix on the TF training args file * Add a debug mode * Bugfix in utils_ner.py when segment_ids is None * Apply style * Apply style * Add TPU strategy * Fix selection strategy 2020-05-06 16:56:52 +00:00			`local_variables = strategy.experimental_local_results(gradient_placeholder)`
			`local_variables[0].assign(grad1)`
			`local_variables[1].assign(grad2)`
Use stable functions (#9369) 2021-01-05 08:58:26 +00:00			`strategy.run(accumulate_on_replica, args=(gradient_placeholder,))`
Add few tests on the TF optimization file with some info in the documentation. Complete the README. 2019-12-05 11:56:43 +00:00
			`@tf.function`
			`def apply_grad():`
			`with strategy.scope():`
Use stable functions (#9369) 2021-01-05 08:58:26 +00:00			`strategy.run(apply_on_replica)`
Add few tests on the TF optimization file with some info in the documentation. Complete the README. 2019-12-05 11:56:43 +00:00
TF version of the trainer (#4017) * First commit to add a TF version of the trainer. * Make the TF trainer closer to what looks the PT trainer * Refactoring common code between the PT and TF trainer into an util file. * Some bugfix + better similarity with the PT trainer * Add missing class in transformers init * Bugfix over prediction + use classification report instead of simple metrics * Fix name error * Fix optimization tests + style * Apply style * Several bugfix for multi-gpu training * Apply style * Apply style * Add glue example for the TF trainer * Several bugix + address the reviews * Fix on the TF training args file * Add a debug mode * Bugfix in utils_ner.py when segment_ids is None * Apply style * Apply style * Add TPU strategy * Fix selection strategy 2020-05-06 16:56:52 +00:00			`def _check_local_values(grad1, grad2):`
			`values = strategy.experimental_local_results(accumulator._gradients[0])`
			`self.assertListAlmostEqual(values[0].value(), grad1, tol=1e-2)`
			`self.assertListAlmostEqual(values[1].value(), grad2, tol=1e-2)`

Add few tests on the TF optimization file with some info in the documentation. Complete the README. 2019-12-05 11:56:43 +00:00			`accumulate([1.0, 2.0], [-1.0, 1.0])`
			`accumulate([3.0, -1.0], [-1.0, -1.0])`
			`accumulate([-2.0, 2.0], [3.0, -2.0])`
			`self.assertEqual(accumulator.step, 3)`
TF version of the trainer (#4017) * First commit to add a TF version of the trainer. * Make the TF trainer closer to what looks the PT trainer * Refactoring common code between the PT and TF trainer into an util file. * Some bugfix + better similarity with the PT trainer * Add missing class in transformers init * Bugfix over prediction + use classification report instead of simple metrics * Fix name error * Fix optimization tests + style * Apply style * Several bugfix for multi-gpu training * Apply style * Apply style * Add glue example for the TF trainer * Several bugix + address the reviews * Fix on the TF training args file * Add a debug mode * Bugfix in utils_ner.py when segment_ids is None * Apply style * Apply style * Add TPU strategy * Fix selection strategy 2020-05-06 16:56:52 +00:00			`_check_local_values([2.0, 3.0], [1.0, -2.0])`
Add few tests on the TF optimization file with some info in the documentation. Complete the README. 2019-12-05 11:56:43 +00:00			`apply_grad()`
TF version of the trainer (#4017) * First commit to add a TF version of the trainer. * Make the TF trainer closer to what looks the PT trainer * Refactoring common code between the PT and TF trainer into an util file. * Some bugfix + better similarity with the PT trainer * Add missing class in transformers init * Bugfix over prediction + use classification report instead of simple metrics * Fix name error * Fix optimization tests + style * Apply style * Several bugfix for multi-gpu training * Apply style * Apply style * Add glue example for the TF trainer * Several bugix + address the reviews * Fix on the TF training args file * Add a debug mode * Bugfix in utils_ner.py when segment_ids is None * Apply style * Apply style * Add TPU strategy * Fix selection strategy 2020-05-06 16:56:52 +00:00			`self.assertListAlmostEqual(variable.value(), [4.0, 3.0], tol=1e-2)`
Add few tests on the TF optimization file with some info in the documentation. Complete the README. 2019-12-05 11:56:43 +00:00			`accumulator.reset()`
			`self.assertEqual(accumulator.step, 0)`
TF version of the trainer (#4017) * First commit to add a TF version of the trainer. * Make the TF trainer closer to what looks the PT trainer * Refactoring common code between the PT and TF trainer into an util file. * Some bugfix + better similarity with the PT trainer * Add missing class in transformers init * Bugfix over prediction + use classification report instead of simple metrics * Fix name error * Fix optimization tests + style * Apply style * Several bugfix for multi-gpu training * Apply style * Apply style * Add glue example for the TF trainer * Several bugix + address the reviews * Fix on the TF training args file * Add a debug mode * Bugfix in utils_ner.py when segment_ids is None * Apply style * Apply style * Add TPU strategy * Fix selection strategy 2020-05-06 16:56:52 +00:00			`_check_local_values([0.0, 0.0], [0.0, 0.0])`