mirror of
https://github.com/saymrwulf/transformers.git
synced 2026-05-14 20:58:08 +00:00
[Bugfix] OPTDecoderLayer does not return attentions when gradient_checkpointing and training is enabled. (#23367)
Update modeling_opt.py
This commit is contained in:
parent
569a97adb2
commit
41d47db90f
1 changed files with 1 additions and 1 deletions
|
|
@ -299,9 +299,9 @@ class OPTDecoderLayer(nn.Module):
|
|||
hidden_states: torch.Tensor,
|
||||
attention_mask: Optional[torch.Tensor] = None,
|
||||
layer_head_mask: Optional[torch.Tensor] = None,
|
||||
past_key_value: Optional[Tuple[torch.Tensor]] = None,
|
||||
output_attentions: Optional[bool] = False,
|
||||
use_cache: Optional[bool] = False,
|
||||
past_key_value: Optional[Tuple[torch.Tensor]] = None,
|
||||
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
|
||||
"""
|
||||
Args:
|
||||
|
|
|
|||
Loading…
Reference in a new issue