Fix natten (#22229)

* Add kernel size to NATTEN's QK arguments.

The new NATTEN 0.14.5 supports PyTorch 2.0, but also adds an additional
argument to the QK operation to allow optional RPBs.

This ends up failing NATTEN tests.

This commit adds NATTEN back to circleci and adds the arguments to get
it working again.

* Force NATTEN >= 0.14.5
This commit is contained in:
Ali Hassani 2023-03-17 11:07:55 -04:00 committed by GitHub
parent 074490b2c2
commit 3028b20a71
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 5 additions and 6 deletions

View file

@ -374,8 +374,7 @@ exotic_models_job = CircleCIJob(
"pip install 'git+https://github.com/facebookresearch/detectron2.git'",
"sudo apt install tesseract-ocr",
"pip install pytesseract",
# wait until natten is ready for torch 2.0.0
# "pip install natten",
"pip install natten",
],
tests_to_run=[
"tests/models/*layoutlmv*",

View file

@ -129,7 +129,7 @@ _deps = [
"keras-nlp>=0.3.1",
"librosa",
"nltk",
"natten>=0.14.4",
"natten>=0.14.5",
"numpy>=1.17",
"onnxconverter-common",
"onnxruntime-tools>=1.4.2",

View file

@ -35,7 +35,7 @@ deps = {
"keras-nlp": "keras-nlp>=0.3.1",
"librosa": "librosa",
"nltk": "nltk",
"natten": "natten>=0.14.4",
"natten": "natten>=0.14.5",
"numpy": "numpy>=1.17",
"onnxconverter-common": "onnxconverter-common",
"onnxruntime-tools": "onnxruntime-tools>=1.4.2",

View file

@ -347,7 +347,7 @@ class NeighborhoodAttention(nn.Module):
query_layer = query_layer / math.sqrt(self.attention_head_size)
# Compute NA between "query" and "key" to get the raw attention scores, and add relative positional biases.
attention_scores = natten2dqkrpb(query_layer, key_layer, self.rpb, self.dilation)
attention_scores = natten2dqkrpb(query_layer, key_layer, self.rpb, self.kernel_size, self.dilation)
# Normalize the attention scores to probabilities.
attention_probs = nn.functional.softmax(attention_scores, dim=-1)

View file

@ -339,7 +339,7 @@ class NeighborhoodAttention(nn.Module):
query_layer = query_layer / math.sqrt(self.attention_head_size)
# Compute NA between "query" and "key" to get the raw attention scores, and add relative positional biases.
attention_scores = natten2dqkrpb(query_layer, key_layer, self.rpb, 1)
attention_scores = natten2dqkrpb(query_layer, key_layer, self.rpb, self.kernel_size, 1)
# Normalize the attention scores to probabilities.
attention_probs = nn.functional.softmax(attention_scores, dim=-1)