From 5990743fddb4780b15b8af2ed7ab55145ab40455 Mon Sep 17 00:00:00 2001 From: Ali Hassani <68103095+alihassanijr@users.noreply.github.com> Date: Tue, 21 Mar 2023 14:21:34 -0700 Subject: [PATCH] Correct NATTEN function signatures and force new version (#22298) --- setup.py | 2 +- src/transformers/dependency_versions_table.py | 2 +- src/transformers/models/dinat/modeling_dinat.py | 2 +- src/transformers/models/nat/modeling_nat.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index 70197c209..bddf49e46 100644 --- a/setup.py +++ b/setup.py @@ -129,7 +129,7 @@ _deps = [ "keras-nlp>=0.3.1", "librosa", "nltk", - "natten>=0.14.5", + "natten>=0.14.6", "numpy>=1.17", "onnxconverter-common", "onnxruntime-tools>=1.4.2", diff --git a/src/transformers/dependency_versions_table.py b/src/transformers/dependency_versions_table.py index aa23c1ad0..c0cacd759 100644 --- a/src/transformers/dependency_versions_table.py +++ b/src/transformers/dependency_versions_table.py @@ -35,7 +35,7 @@ deps = { "keras-nlp": "keras-nlp>=0.3.1", "librosa": "librosa", "nltk": "nltk", - "natten": "natten>=0.14.5", + "natten": "natten>=0.14.6", "numpy": "numpy>=1.17", "onnxconverter-common": "onnxconverter-common", "onnxruntime-tools": "onnxruntime-tools>=1.4.2", diff --git a/src/transformers/models/dinat/modeling_dinat.py b/src/transformers/models/dinat/modeling_dinat.py index efeb68846..2cacab8ac 100644 --- a/src/transformers/models/dinat/modeling_dinat.py +++ b/src/transformers/models/dinat/modeling_dinat.py @@ -356,7 +356,7 @@ class NeighborhoodAttention(nn.Module): # seem a bit unusual, but is taken from the original Transformer paper. attention_probs = self.dropout(attention_probs) - context_layer = natten2dav(attention_probs, value_layer, self.dilation) + context_layer = natten2dav(attention_probs, value_layer, self.kernel_size, self.dilation) context_layer = context_layer.permute(0, 2, 3, 1, 4).contiguous() new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,) context_layer = context_layer.view(new_context_layer_shape) diff --git a/src/transformers/models/nat/modeling_nat.py b/src/transformers/models/nat/modeling_nat.py index 3a93b81e4..dfe801d19 100644 --- a/src/transformers/models/nat/modeling_nat.py +++ b/src/transformers/models/nat/modeling_nat.py @@ -348,7 +348,7 @@ class NeighborhoodAttention(nn.Module): # seem a bit unusual, but is taken from the original Transformer paper. attention_probs = self.dropout(attention_probs) - context_layer = natten2dav(attention_probs, value_layer, 1) + context_layer = natten2dav(attention_probs, value_layer, self.kernel_size, 1) context_layer = context_layer.permute(0, 2, 3, 1, 4).contiguous() new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,) context_layer = context_layer.view(new_context_layer_shape)