From 5139db5e0c578157e080bfda48a88fd41f921a17 Mon Sep 17 00:00:00 2001 From: ydshieh Date: Fri, 29 Nov 2024 18:12:14 +0100 Subject: [PATCH] fix --- .../models/xlm_roberta_xl/modeling_xlm_roberta_xl.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py b/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py index cb88cbeab..b95f0f190 100644 --- a/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +++ b/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py @@ -210,11 +210,14 @@ class XLMRobertaXLSelfAttention(nn.Module): value_layer = self.transpose_for_scores(self.value(hidden_states)) key_layer = torch.cat([past_key_value[0], key_layer], dim=2) value_layer = torch.cat([past_key_value[1], value_layer], dim=2) + breakpoint() else: key_layer = self.transpose_for_scores(self.key(hidden_states)) value_layer = self.transpose_for_scores(self.value(hidden_states)) + breakpoint() query_layer = self.transpose_for_scores(mixed_query_layer) + breakpoint() use_cache = past_key_value is not None if self.is_decoder: