This commit is contained in:
ydshieh 2024-11-29 18:12:14 +01:00
parent 640897d330
commit 5139db5e0c

View file

@ -210,11 +210,14 @@ class XLMRobertaXLSelfAttention(nn.Module):
value_layer = self.transpose_for_scores(self.value(hidden_states))
key_layer = torch.cat([past_key_value[0], key_layer], dim=2)
value_layer = torch.cat([past_key_value[1], value_layer], dim=2)
breakpoint()
else:
key_layer = self.transpose_for_scores(self.key(hidden_states))
value_layer = self.transpose_for_scores(self.value(hidden_states))
breakpoint()
query_layer = self.transpose_for_scores(mixed_query_layer)
breakpoint()
use_cache = past_key_value is not None
if self.is_decoder: