From 41d47db90fbe9937c0941f2f9cdb2ddd83e49a2e Mon Sep 17 00:00:00 2001 From: AinL Date: Mon, 15 May 2023 21:31:53 +0900 Subject: [PATCH] [Bugfix] `OPTDecoderLayer` does not return attentions when `gradient_checkpointing` and `training` is enabled. (#23367) Update modeling_opt.py --- src/transformers/models/opt/modeling_opt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/opt/modeling_opt.py b/src/transformers/models/opt/modeling_opt.py index 6086b620e..94269ffbf 100644 --- a/src/transformers/models/opt/modeling_opt.py +++ b/src/transformers/models/opt/modeling_opt.py @@ -299,9 +299,9 @@ class OPTDecoderLayer(nn.Module): hidden_states: torch.Tensor, attention_mask: Optional[torch.Tensor] = None, layer_head_mask: Optional[torch.Tensor] = None, + past_key_value: Optional[Tuple[torch.Tensor]] = None, output_attentions: Optional[bool] = False, use_cache: Optional[bool] = False, - past_key_value: Optional[Tuple[torch.Tensor]] = None, ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]: """ Args: