[Bugfix] OPTDecoderLayer does not return attentions when gradient_checkpointing and training is enabled. (#23367)

Update modeling_opt.py
This commit is contained in:
AinL 2023-05-15 21:31:53 +09:00 committed by GitHub
parent 569a97adb2
commit 41d47db90f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -299,9 +299,9 @@ class OPTDecoderLayer(nn.Module):
hidden_states: torch.Tensor,
attention_mask: Optional[torch.Tensor] = None,
layer_head_mask: Optional[torch.Tensor] = None,
past_key_value: Optional[Tuple[torch.Tensor]] = None,
output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = False,
past_key_value: Optional[Tuple[torch.Tensor]] = None,
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
"""
Args: