diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index 01d29bd02..12a020aa2 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -1157,6 +1157,9 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu self.built = True else: self.built = True + # Set the serving spec quickly to ensure that Keras doesn't use the specific dummy input shapes as the spec + # Setting it in build() allows users to override the shape when loading a non-pretrained model from config + self._set_save_spec(self._prune_signature(self.input_signature)) self(self.dummy_inputs, training=False) def __init__(self, config, *inputs, **kwargs): @@ -1171,8 +1174,6 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu self.config = config self.name_or_path = config.name_or_path self.generation_config = GenerationConfig.from_model_config(config) if self.can_generate() else None - # Set the serving spec quickly to ensure that Keras doesn't use the specific dummy input shapes as the spec - self._set_save_spec(self._prune_signature(self.input_signature)) def get_config(self): return self.config.to_dict() diff --git a/src/transformers/models/blip/modeling_tf_blip.py b/src/transformers/models/blip/modeling_tf_blip.py index b94c005eb..95c22e204 100644 --- a/src/transformers/models/blip/modeling_tf_blip.py +++ b/src/transformers/models/blip/modeling_tf_blip.py @@ -1216,12 +1216,11 @@ class TFBlipForQuestionAnswering(TFBlipPreTrainedModel): def call( self, input_ids: tf.Tensor, - pixel_values: tf.Tensor, + pixel_values: tf.Tensor | None = None, decoder_input_ids: tf.Tensor | None = None, decoder_attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, - foutput_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, labels: tf.Tensor | None = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/opt/modeling_tf_opt.py b/src/transformers/models/opt/modeling_tf_opt.py index 9d92ef5c4..bc76a30ce 100644 --- a/src/transformers/models/opt/modeling_tf_opt.py +++ b/src/transformers/models/opt/modeling_tf_opt.py @@ -618,7 +618,7 @@ class TFOPTDecoder(tf.keras.layers.Layer): attention_mask = tf.ones(inputs_embeds.shape[:2], dtype=tf.bool) else: tf.debugging.assert_equal( - attention_mask.shape[1], + tf.shape(attention_mask)[1], past_key_values_length + input_shape[1], message=( f"The provided attention mask has length {attention_mask.shape[1]}, but its length should be " diff --git a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py index 59caabffa..86ec432e4 100755 --- a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py +++ b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py @@ -195,30 +195,19 @@ class TFSpeech2TextSinusoidalPositionalEmbedding(tf.keras.layers.Layer): emb = tf.concat([emb[:padding_idx, :], tf.zeros((1, tf.shape(emb)[1])), emb[padding_idx + 1 :, :]], axis=0) return emb - def build(self, input_shape: tf.TensorShape): - """ - Build shared token embedding layer Shared weights logic adapted from - https://github.com/tensorflow/models/blob/a009f4fb9d2fc4949e32192a944688925ef78659/official/transformer/v2/embedding_layer.py#L24 - """ - self.embeddings = self.add_weight( - name="weights", # name also used in PT - shape=tf.shape(self.embedding_weights), - trainable=False, - ) - self.embeddings.assign(self.embedding_weights) - super().build(input_shape) - def call(self, input_ids: tf.Tensor, past_key_values_length: int = 0) -> tf.Tensor: bsz, seq_len = shape_list(input_ids) # Create the position ids from the input token ids. Any padded tokens remain padded. position_ids = self.create_position_ids_from_input_ids(input_ids, self.padding_idx, past_key_values_length) - # expand embeddings if needed - max_pos = self.padding_idx + 1 + seq_len - if max_pos > shape_list(self.embeddings)[0]: - self.embedding_weights = self._get_embedding(max_pos + self.offset, self.embedding_dim, self.padding_idx) - self.embeddings.assign(self.embedding_weights) - return tf.reshape(tf.gather(self.embeddings, tf.reshape(position_ids, (-1,)), axis=0), (bsz, seq_len, -1)) + # Matt: The PyTorch code does a lot of work to cache the embeddings, setting the cached values as a + # model attribute in the forward pass. This is extremely forbidden in TF, which wants forward calls to be + # idempotent. TF doesn't need that caching anyway, since it can just store constants during compilation, + # so we just remove all of that code. + embeddings = self._get_embedding( + self.padding_idx + 1 + seq_len + self.offset + past_key_values_length, self.embedding_dim, self.padding_idx + ) + return tf.reshape(tf.gather(embeddings, tf.reshape(position_ids, (-1,)), axis=0), (bsz, seq_len, -1)) @staticmethod def create_position_ids_from_input_ids( @@ -562,6 +551,7 @@ class TFSpeech2TextPreTrainedModel(TFPreTrainedModel): config_class = Speech2TextConfig base_model_prefix = "model" main_input_name = "input_features" + _keys_to_ignore_on_load_unexpected = [r"encoder.embed_positions.weights"] def _get_feat_extract_output_lengths(self, input_lengths: tf.Tensor): """ diff --git a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py index b8b9d8555..dee1d71bc 100644 --- a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py +++ b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py @@ -588,35 +588,19 @@ class TFTransfoXLMainLayer(tf.keras.layers.Layer): klen = mlen + qlen # Compute decoder attention mask - - # ::: PyTorch masking code for reference ::: - # if self.same_length: - # all_ones = word_emb.new_ones((qlen, klen), dtype=torch.uint8) - # mask_len = klen - self.mem_len - # if mask_len > 0: - # mask_shift_len = qlen - mask_len - # else: - # mask_shift_len = qlen - # dec_attn_mask = (torch.triu(all_ones, 1+mlen) - # + torch.tril(all_ones, -mask_shift_len))[:, :, None] # -1 - # else: - # dec_attn_mask = torch.triu( - # word_emb.new_ones((qlen, klen), dtype=torch.uint8), diagonal=1+mlen)[:,:,None] - - # TensorFlow version - dec_attn_mask = 1 - tf.linalg.band_part( - tf.ones([qlen, klen], dtype=tf.int32), -1, mlen - ) # (q, q): diagonal with 1's + all_ones = tf.ones([qlen, klen], dtype=tf.int32) + upper_mask = 1 - tf.linalg.band_part(tf.ones([qlen, klen], dtype=tf.int32), -1, mlen) if self.same_length: mask_len = klen - self.mem_len - if mask_len > 0: - mask_shift_len = qlen - mask_len - else: - mask_shift_len = qlen - if mask_shift_len >= 1: - dec_attn_mask += 1 - tf.linalg.band_part(tf.ones([qlen, klen], dtype=tf.int32), mask_shift_len - 1, -1) - else: - dec_attn_mask += tf.linalg.band_part(tf.ones([qlen, klen], dtype=tf.int32), -1, -mask_shift_len) + mask_shift_len = qlen - tf.nn.relu(mask_len) # Lazy clamping of negatives to zero + + # Use an indicator variable instead of a conditional to keep the compiler happy + lower_mask = tf.linalg.band_part(all_ones, -1, 0) - ( + tf.linalg.band_part(all_ones, mask_shift_len - 1, 0) * tf.cast(mask_shift_len != 0, tf.int32) + ) + dec_attn_mask = upper_mask + lower_mask + else: + dec_attn_mask = upper_mask hids = [] attentions = [] if output_attentions else None diff --git a/src/transformers/models/xglm/modeling_tf_xglm.py b/src/transformers/models/xglm/modeling_tf_xglm.py index 6cc9db021..639eb912f 100644 --- a/src/transformers/models/xglm/modeling_tf_xglm.py +++ b/src/transformers/models/xglm/modeling_tf_xglm.py @@ -463,19 +463,14 @@ class TFXGLMMainLayer(tf.keras.layers.Layer): ) -> tf.Tensor: # create causal mask # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len] - combined_attention_mask: tf.Tensor | None = None - if input_shape[-1] > 1: - combined_attention_mask = _make_causal_mask(input_shape, past_key_values_length) - - if attention_mask is not None: - expand_attention_mask = _expand_mask(attention_mask, tgt_len=input_shape[-1]) - combined_attention_mask = ( - expand_attention_mask - if combined_attention_mask is None - else expand_attention_mask + combined_attention_mask - ) - - return combined_attention_mask + combined_attention_mask = _make_causal_mask(input_shape, past_key_values_length) + combined_attention_mask = tf.cond( + input_shape[-1] > 1, lambda: combined_attention_mask, lambda: tf.ones_like(combined_attention_mask) + ) + if attention_mask is None: + return combined_attention_mask + expand_attention_mask = _expand_mask(attention_mask, tgt_len=input_shape[-1]) + return expand_attention_mask + combined_attention_mask def embed_positions(self, position_ids: np.ndarray | tf.Tensor | None = None) -> tf.Tensor: position_ids += self.offset @@ -512,10 +507,10 @@ class TFXGLMMainLayer(tf.keras.layers.Layer): if input_ids is not None and inputs_embeds is not None: raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") elif input_ids is not None: - input_shape = shape_list(input_ids) + input_shape = tf.shape(input_ids) input_ids = tf.reshape(input_ids, (-1, input_shape[-1])) elif inputs_embeds is not None: - input_shape = shape_list(inputs_embeds)[:-1] + input_shape = tf.shape(inputs_embeds)[:-1] else: raise ValueError("You have to specify either input_ids or inputs_embeds") diff --git a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py index c7c69aa18..53057814e 100644 --- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py +++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py @@ -2676,7 +2676,6 @@ class TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration(TF{{cookiec def __init__(self, config, *inputs, **kwargs): super().__init__(config, *inputs, **kwargs) self.model = TF{{cookiecutter.camelcase_modelname}}MainLayer(config, name="model") - self.model._set_save_spec(self._prune_signature(self.input_signature)) self.use_cache = config.use_cache # final_bias_logits is registered as a buffer in pytorch, so not trainable for the sake of consistency. self.bias_layer = BiasLayer( diff --git a/tests/models/bart/test_modeling_tf_bart.py b/tests/models/bart/test_modeling_tf_bart.py index dfd953f8c..60b182672 100644 --- a/tests/models/bart/test_modeling_tf_bart.py +++ b/tests/models/bart/test_modeling_tf_bart.py @@ -22,7 +22,7 @@ import unittest import numpy as np from transformers import BartConfig, BartTokenizer, is_tf_available -from transformers.testing_utils import require_tf, slow, tooslow +from transformers.testing_utils import require_tf, slow from transformers.utils import cached_property from ...test_configuration_common import ConfigTester @@ -225,10 +225,6 @@ class TFBartModelTest(TFModelTesterMixin, TFCoreModelTesterMixin, PipelineTester config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common() self.model_tester.check_decoder_model_past_large_inputs(*config_and_inputs) - @tooslow - def test_saved_model_creation(self): - pass - # TODO (Joao): fix me @unittest.skip("Onnx compliancy broke with TF 2.10") def test_onnx_compliancy(self): diff --git a/tests/models/blenderbot/test_modeling_tf_blenderbot.py b/tests/models/blenderbot/test_modeling_tf_blenderbot.py index ac4649cfc..7553bb908 100644 --- a/tests/models/blenderbot/test_modeling_tf_blenderbot.py +++ b/tests/models/blenderbot/test_modeling_tf_blenderbot.py @@ -19,7 +19,7 @@ from __future__ import annotations import unittest from transformers import BlenderbotConfig, BlenderbotTokenizer, is_tf_available -from transformers.testing_utils import require_tf, require_tokenizers, slow, tooslow +from transformers.testing_utils import require_tf, require_tokenizers, slow from transformers.utils import cached_property from ...test_configuration_common import ConfigTester @@ -207,10 +207,6 @@ class TFBlenderbotModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Te config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common() self.model_tester.check_decoder_model_past_large_inputs(*config_and_inputs) - @tooslow - def test_saved_model_creation(self): - pass - @require_tokenizers @require_tf diff --git a/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py b/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py index 31e4c94c6..2118ec683 100644 --- a/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py +++ b/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py @@ -19,7 +19,7 @@ from __future__ import annotations import unittest from transformers import BlenderbotSmallConfig, BlenderbotSmallTokenizer, is_tf_available -from transformers.testing_utils import require_tf, require_tokenizers, slow, tooslow +from transformers.testing_utils import require_tf, require_tokenizers, slow from transformers.utils import cached_property from ...test_configuration_common import ConfigTester @@ -209,10 +209,6 @@ class TFBlenderbotSmallModelTest(TFModelTesterMixin, PipelineTesterMixin, unitte config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common() self.model_tester.check_decoder_model_past_large_inputs(*config_and_inputs) - @tooslow - def test_saved_model_creation(self): - pass - @require_tokenizers @require_tf diff --git a/tests/models/convnext/test_modeling_tf_convnext.py b/tests/models/convnext/test_modeling_tf_convnext.py index 8d049cf9f..a508e038d 100644 --- a/tests/models/convnext/test_modeling_tf_convnext.py +++ b/tests/models/convnext/test_modeling_tf_convnext.py @@ -156,6 +156,7 @@ class TFConvNextModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Test not is_tf_available() or len(tf.config.list_physical_devices("GPU")) == 0, reason="TF does not support backprop for grouped convolutions on CPU.", ) + @slow def test_keras_fit(self): super().test_keras_fit() diff --git a/tests/models/cvt/test_modeling_tf_cvt.py b/tests/models/cvt/test_modeling_tf_cvt.py index 78d95931b..2a42732e6 100644 --- a/tests/models/cvt/test_modeling_tf_cvt.py +++ b/tests/models/cvt/test_modeling_tf_cvt.py @@ -185,6 +185,7 @@ class TFCvtModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase) not is_tf_available() or len(tf.config.list_physical_devices("GPU")) == 0, reason="TF does not support backprop for grouped convolutions on CPU.", ) + @slow def test_keras_fit(self): super().test_keras_fit() diff --git a/tests/models/data2vec/test_modeling_tf_data2vec_vision.py b/tests/models/data2vec/test_modeling_tf_data2vec_vision.py index 320b5ede5..b3f2eec8f 100644 --- a/tests/models/data2vec/test_modeling_tf_data2vec_vision.py +++ b/tests/models/data2vec/test_modeling_tf_data2vec_vision.py @@ -347,6 +347,7 @@ class TFData2VecVisionModelTest(TFModelTesterMixin, PipelineTesterMixin, unittes check_hidden_states_output(inputs_dict, config, model_class) # Overriding this method since the base method won't be compatible with Data2VecVision. + @slow def test_keras_fit(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: diff --git a/tests/models/funnel/test_modeling_tf_funnel.py b/tests/models/funnel/test_modeling_tf_funnel.py index 051da46fa..b8327c86a 100644 --- a/tests/models/funnel/test_modeling_tf_funnel.py +++ b/tests/models/funnel/test_modeling_tf_funnel.py @@ -19,7 +19,7 @@ from __future__ import annotations import unittest from transformers import FunnelConfig, is_tf_available -from transformers.testing_utils import require_tf, tooslow +from transformers.testing_utils import require_tf from ...test_configuration_common import ConfigTester from ...test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask @@ -386,10 +386,6 @@ class TFFunnelModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCa config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_question_answering(*config_and_inputs) - @tooslow - def test_saved_model_creation(self): - pass - @require_tf class TFFunnelBaseModelTest(TFModelTesterMixin, unittest.TestCase): @@ -417,7 +413,3 @@ class TFFunnelBaseModelTest(TFModelTesterMixin, unittest.TestCase): def test_for_multiple_choice(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_multiple_choice(*config_and_inputs) - - @tooslow - def test_saved_model_creation(self): - pass diff --git a/tests/models/groupvit/test_modeling_tf_groupvit.py b/tests/models/groupvit/test_modeling_tf_groupvit.py index a80ef606e..6cd6ae7aa 100644 --- a/tests/models/groupvit/test_modeling_tf_groupvit.py +++ b/tests/models/groupvit/test_modeling_tf_groupvit.py @@ -601,6 +601,7 @@ class TFGroupViTModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Test pass @require_tensorflow_probability + @slow def test_keras_fit(self): super().test_keras_fit() @@ -692,11 +693,6 @@ class TFGroupViTModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Test def test_saved_model_creation(self): pass - @unittest.skip(reason="Currently `saved_model` doesn't work with nested outputs.") - @slow - def test_saved_model_creation_extended(self): - pass - @unittest.skip(reason="`saved_model` doesn't work with nested outputs so no preparation happens.") @slow def test_prepare_serving_output(self): diff --git a/tests/models/led/test_modeling_tf_led.py b/tests/models/led/test_modeling_tf_led.py index 480d4520d..a06a29fd3 100644 --- a/tests/models/led/test_modeling_tf_led.py +++ b/tests/models/led/test_modeling_tf_led.py @@ -19,7 +19,7 @@ from __future__ import annotations import unittest from transformers import LEDConfig, is_tf_available -from transformers.testing_utils import require_tf, slow, tooslow +from transformers.testing_utils import require_tf, slow from ...test_configuration_common import ConfigTester from ...test_modeling_tf_common import TFModelTesterMixin, ids_tensor @@ -292,11 +292,7 @@ class TFLEDModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase) self.assertEqual(model.config.output_hidden_states, True) check_encoder_attentions_output(outputs) - def test_xla_mode(self): - # TODO JP: Make LED XLA compliant - pass - - @tooslow + @unittest.skip("LED keeps using potentially symbolic tensors in conditionals and breaks tracing.") def test_saved_model_creation(self): pass diff --git a/tests/models/longformer/test_modeling_tf_longformer.py b/tests/models/longformer/test_modeling_tf_longformer.py index dcdd68b18..1cba14cdb 100644 --- a/tests/models/longformer/test_modeling_tf_longformer.py +++ b/tests/models/longformer/test_modeling_tf_longformer.py @@ -19,7 +19,7 @@ from __future__ import annotations import unittest from transformers import is_tf_available -from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow, tooslow +from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow from ...test_configuration_common import ConfigTester from ...test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask @@ -356,14 +356,10 @@ class TFLongformerModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Te config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_multiple_choice(*config_and_inputs) - @tooslow + @unittest.skip("Longformer keeps using potentially symbolic tensors in conditionals and breaks tracing.") def test_saved_model_creation(self): pass - def test_xla_mode(self): - # TODO JP: Make Longformer XLA compliant - pass - @require_tf @require_sentencepiece diff --git a/tests/models/lxmert/test_modeling_tf_lxmert.py b/tests/models/lxmert/test_modeling_tf_lxmert.py index a99495d00..52f64d614 100644 --- a/tests/models/lxmert/test_modeling_tf_lxmert.py +++ b/tests/models/lxmert/test_modeling_tf_lxmert.py @@ -15,14 +15,13 @@ from __future__ import annotations -import os import tempfile import unittest import numpy as np from transformers import LxmertConfig, is_tf_available -from transformers.testing_utils import require_tf, slow, tooslow +from transformers.testing_utils import require_tf, slow from ...test_configuration_common import ConfigTester from ...test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask @@ -532,73 +531,6 @@ class TFLxmertModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCa self.assert_outputs_same(after_outputs, outputs) - @tooslow - def test_saved_model_creation(self): - pass - - @slow - def test_saved_model_creation_extended(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.output_hidden_states = True - config.output_attentions = True - - if hasattr(config, "use_cache"): - config.use_cache = True - - encoder_seq_length = getattr(self.model_tester, "encoder_seq_length", self.model_tester.seq_length) - encoder_key_length = getattr(self.model_tester, "key_length", encoder_seq_length) - - for model_class in self.all_model_classes: - class_inputs_dict = self._prepare_for_class(inputs_dict, model_class) - model = model_class(config) - num_out = len(model(class_inputs_dict)) - - with tempfile.TemporaryDirectory() as tmpdirname: - model.save_pretrained(tmpdirname, saved_model=True) - saved_model_dir = os.path.join(tmpdirname, "saved_model", "1") - model = tf.keras.models.load_model(saved_model_dir) - outputs = model(class_inputs_dict) - language_hidden_states = outputs["language_hidden_states"] - vision_hidden_states = outputs["vision_hidden_states"] - language_attentions = outputs["language_attentions"] - vision_attentions = outputs["vision_attentions"] - cross_encoder_attentions = outputs["cross_encoder_attentions"] - - self.assertEqual(len(outputs), num_out) - - self.assertEqual(len(language_hidden_states), self.model_tester.num_hidden_layers["language"] + 1) - self.assertEqual(len(vision_hidden_states), self.model_tester.num_hidden_layers["vision"] + 1) - - seq_length = self.model_tester.seq_length - num_visual_features = self.model_tester.num_visual_features - - self.assertListEqual( - list(language_hidden_states[0].shape[-2:]), - [seq_length, self.model_tester.hidden_size], - ) - self.assertListEqual( - list(vision_hidden_states[0].shape[-2:]), - [num_visual_features, self.model_tester.hidden_size], - ) - - self.assertEqual(len(language_attentions), self.model_tester.num_hidden_layers["language"]) - self.assertEqual(len(vision_attentions), self.model_tester.num_hidden_layers["vision"]) - self.assertEqual(len(cross_encoder_attentions), self.model_tester.num_hidden_layers["cross_encoder"]) - - attentions = [language_attentions, vision_attentions, cross_encoder_attentions] - attention_shapes = [ - [self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length], - [ - self.model_tester.num_attention_heads, - self.model_tester.num_visual_features, - self.model_tester.num_visual_features, - ], - [self.model_tester.num_attention_heads, encoder_key_length, self.model_tester.num_visual_features], - ] - - for attention, attention_shape in zip(attentions, attention_shapes): - self.assertListEqual(list(attention[0].shape[-3:]), attention_shape) - @require_tf class TFLxmertModelIntegrationTest(unittest.TestCase): diff --git a/tests/models/marian/test_modeling_tf_marian.py b/tests/models/marian/test_modeling_tf_marian.py index 1a87f4e98..50cff4219 100644 --- a/tests/models/marian/test_modeling_tf_marian.py +++ b/tests/models/marian/test_modeling_tf_marian.py @@ -20,7 +20,7 @@ import unittest import warnings from transformers import AutoTokenizer, MarianConfig, MarianTokenizer, TranslationPipeline, is_tf_available -from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow, tooslow +from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow from transformers.utils import cached_property from ...test_configuration_common import ConfigTester @@ -208,10 +208,6 @@ class TFMarianModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCa config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common() self.model_tester.check_decoder_model_past_large_inputs(*config_and_inputs) - @tooslow - def test_saved_model_creation(self): - pass - @require_tf class AbstractMarianIntegrationTest(unittest.TestCase): diff --git a/tests/models/mbart/test_modeling_tf_mbart.py b/tests/models/mbart/test_modeling_tf_mbart.py index 753f961d1..9ca4a7d69 100644 --- a/tests/models/mbart/test_modeling_tf_mbart.py +++ b/tests/models/mbart/test_modeling_tf_mbart.py @@ -18,7 +18,7 @@ from __future__ import annotations import unittest from transformers import AutoTokenizer, MBartConfig, is_tf_available -from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow, tooslow +from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow from transformers.utils import cached_property from ...test_configuration_common import ConfigTester @@ -195,10 +195,6 @@ class TFMBartModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCas config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common() self.model_tester.check_decoder_model_past_large_inputs(*config_and_inputs) - @tooslow - def test_saved_model_creation(self): - pass - @require_sentencepiece @require_tokenizers diff --git a/tests/models/mobilebert/test_modeling_tf_mobilebert.py b/tests/models/mobilebert/test_modeling_tf_mobilebert.py index 7e67c56a4..607ba5b88 100644 --- a/tests/models/mobilebert/test_modeling_tf_mobilebert.py +++ b/tests/models/mobilebert/test_modeling_tf_mobilebert.py @@ -20,7 +20,7 @@ import unittest from transformers import MobileBertConfig, is_tf_available from transformers.models.auto import get_values -from transformers.testing_utils import require_tf, slow, tooslow +from transformers.testing_utils import require_tf, slow from ...test_configuration_common import ConfigTester from ...test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask @@ -311,15 +311,6 @@ class TFMobileBertModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Te config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_mobilebert_for_token_classification(*config_and_inputs) - @slow - def test_keras_fit(self): - # Override as it is a slow test on this model - super().test_keras_fit() - - @tooslow - def test_saved_model_creation(self): - pass - @slow def test_model_from_pretrained(self): # for model_name in TF_MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: diff --git a/tests/models/opt/test_modeling_tf_opt.py b/tests/models/opt/test_modeling_tf_opt.py index fb7c842c9..1847ad50a 100644 --- a/tests/models/opt/test_modeling_tf_opt.py +++ b/tests/models/opt/test_modeling_tf_opt.py @@ -20,7 +20,7 @@ import unittest import numpy as np from transformers import OPTConfig, is_tf_available -from transformers.testing_utils import require_sentencepiece, require_tf, slow, tooslow +from transformers.testing_utils import require_sentencepiece, require_tf, slow from ...test_configuration_common import ConfigTester from ...test_modeling_tf_common import TFModelTesterMixin, ids_tensor @@ -219,10 +219,6 @@ class TFOPTModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase) models_equal = False self.assertTrue(models_equal) - @tooslow - def test_saved_model_creation(self): - pass - def _long_tensor(tok_lst): return tf.constant(tok_lst, dtype=tf.int32) diff --git a/tests/models/pegasus/test_modeling_tf_pegasus.py b/tests/models/pegasus/test_modeling_tf_pegasus.py index dcd0479e2..33e908ab2 100644 --- a/tests/models/pegasus/test_modeling_tf_pegasus.py +++ b/tests/models/pegasus/test_modeling_tf_pegasus.py @@ -18,7 +18,7 @@ from __future__ import annotations import unittest from transformers import AutoTokenizer, PegasusConfig, is_tf_available -from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow, tooslow +from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow from transformers.utils import cached_property from ...test_configuration_common import ConfigTester @@ -206,10 +206,6 @@ class TFPegasusModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestC config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common() self.model_tester.check_decoder_model_past_large_inputs(*config_and_inputs) - @tooslow - def test_saved_model_creation(self): - pass - @require_sentencepiece @require_tokenizers diff --git a/tests/models/rag/test_modeling_tf_rag.py b/tests/models/rag/test_modeling_tf_rag.py index b4720f7c7..0041b0e6c 100644 --- a/tests/models/rag/test_modeling_tf_rag.py +++ b/tests/models/rag/test_modeling_tf_rag.py @@ -490,6 +490,7 @@ class TFRagTestMixin: inputs_dict = self.config_and_inputs self.check_model_without_retriever(**inputs_dict) + @slow def test_model_generate_from_context_input_ids(self): inputs_dict = self.config_and_inputs self.check_model_generate_from_context_input_ids(**inputs_dict) @@ -498,6 +499,7 @@ class TFRagTestMixin: inputs_dict = self.config_and_inputs self.check_model_with_encoder_outputs(**inputs_dict) + @slow def test_model_generate(self): inputs_dict = self.config_and_inputs self.check_model_generate(**inputs_dict) diff --git a/tests/models/regnet/test_modeling_tf_regnet.py b/tests/models/regnet/test_modeling_tf_regnet.py index cee3995d2..4536458c5 100644 --- a/tests/models/regnet/test_modeling_tf_regnet.py +++ b/tests/models/regnet/test_modeling_tf_regnet.py @@ -148,6 +148,7 @@ class TFRegNetModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCa not is_tf_available() or len(tf.config.list_physical_devices("GPU")) == 0, reason="TF does not support backprop for grouped convolutions on CPU.", ) + @slow def test_keras_fit(self): super().test_keras_fit() diff --git a/tests/models/segformer/test_modeling_tf_segformer.py b/tests/models/segformer/test_modeling_tf_segformer.py index d3317b207..b381a2e89 100644 --- a/tests/models/segformer/test_modeling_tf_segformer.py +++ b/tests/models/segformer/test_modeling_tf_segformer.py @@ -347,6 +347,7 @@ class TFSegformerModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Tes not is_tf_available() or len(tf.config.list_physical_devices("GPU")) == 0, reason="TF does not support backprop for grouped convolutions on CPU.", ) + @slow def test_keras_fit(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() diff --git a/tests/models/speech_to_text/test_modeling_speech_to_text.py b/tests/models/speech_to_text/test_modeling_speech_to_text.py index d270a1cf4..16ad704fd 100644 --- a/tests/models/speech_to_text/test_modeling_speech_to_text.py +++ b/tests/models/speech_to_text/test_modeling_speech_to_text.py @@ -722,6 +722,10 @@ class Speech2TextModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTest self.assertTrue(models_equal) + def test_pt_tf_model_equivalence(self, allow_missing_keys=True): + # Allow missing keys since TF doesn't cache the sinusoidal embeddings in an attribute + super().test_pt_tf_model_equivalence(allow_missing_keys=allow_missing_keys) + @require_torch @require_torchaudio diff --git a/tests/models/speech_to_text/test_modeling_tf_speech_to_text.py b/tests/models/speech_to_text/test_modeling_tf_speech_to_text.py index b283b4478..c874d5c5c 100644 --- a/tests/models/speech_to_text/test_modeling_tf_speech_to_text.py +++ b/tests/models/speech_to_text/test_modeling_tf_speech_to_text.py @@ -558,6 +558,10 @@ class TFSpeech2TextModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.T ] self.assertListEqual(arg_names[: len(expected_arg_names)], expected_arg_names) + def test_pt_tf_model_equivalence(self, allow_missing_keys=True): + # Allow missing keys since TF doesn't cache the sinusoidal embeddings in an attribute + super().test_pt_tf_model_equivalence(allow_missing_keys=allow_missing_keys) + @require_tf @require_sentencepiece diff --git a/tests/models/swin/test_modeling_tf_swin.py b/tests/models/swin/test_modeling_tf_swin.py index a898d22fb..de271fb38 100644 --- a/tests/models/swin/test_modeling_tf_swin.py +++ b/tests/models/swin/test_modeling_tf_swin.py @@ -23,7 +23,7 @@ import unittest import numpy as np from transformers import SwinConfig -from transformers.testing_utils import require_tf, require_vision, slow, to_2tuple, tooslow +from transformers.testing_utils import require_tf, require_vision, slow, to_2tuple from transformers.utils import cached_property, is_tf_available, is_vision_available from ...test_configuration_common import ConfigTester @@ -232,10 +232,6 @@ class TFSwinModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase def test_inputs_embeds(self): pass - @tooslow - def test_saved_model_creation(self): - pass - def test_model_common_attributes(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() diff --git a/tests/models/t5/test_modeling_tf_t5.py b/tests/models/t5/test_modeling_tf_t5.py index a2caa664f..46b0f4c59 100644 --- a/tests/models/t5/test_modeling_tf_t5.py +++ b/tests/models/t5/test_modeling_tf_t5.py @@ -18,7 +18,7 @@ from __future__ import annotations import unittest from transformers import T5Config, is_tf_available -from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow, tooslow +from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow from transformers.utils import cached_property from ...test_configuration_common import ConfigTester @@ -300,10 +300,6 @@ class TFT5ModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase): self.model_tester.create_and_check_t5_decoder_model_past_large_inputs(*config_and_inputs) - @tooslow - def test_saved_model_creation(self): - pass - @slow def test_model_from_pretrained(self): model = TFT5Model.from_pretrained("t5-small") diff --git a/tests/test_modeling_tf_common.py b/tests/test_modeling_tf_common.py index 3c3ca75ff..840f4d42b 100644 --- a/tests/test_modeling_tf_common.py +++ b/tests/test_modeling_tf_common.py @@ -1415,6 +1415,7 @@ class TFModelTesterMixin: def check_keras_fit_results(self, val_loss1, val_loss2, atol=1e-2, rtol=1e-3): self.assertTrue(np.allclose(val_loss1, val_loss2, atol=atol, rtol=rtol)) + @slow def test_keras_fit(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: