Fix comment (#10886)

2026-05-14 20:58:08 +00:00 · 2021-03-25 19:23:56 +01:00 · 2021-03-25 19:23:56 +01:00 · 86c6f8a8b1
commit 86c6f8a8b1
parent 9856c9213d
1 changed files with 1 additions and 0 deletions
--- a/src/transformers/models/t5/modeling_t5.py
+++ b/src/transformers/models/t5/modeling_t5.py
@ -904,6 +904,7 @@ class T5Stack(T5PreTrainedModel):
        if past_key_values is None:
            past_key_values = [None] * len(self.block)

+        # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
        # ourselves in which case we just need to make it broadcastable to all heads.
        extended_attention_mask = self.get_extended_attention_mask(attention_mask, input_shape, inputs_embeds.device)