mirror of
https://github.com/saymrwulf/transformers.git
synced 2026-05-14 20:58:08 +00:00
Fix natten (#22229)
* Add kernel size to NATTEN's QK arguments. The new NATTEN 0.14.5 supports PyTorch 2.0, but also adds an additional argument to the QK operation to allow optional RPBs. This ends up failing NATTEN tests. This commit adds NATTEN back to circleci and adds the arguments to get it working again. * Force NATTEN >= 0.14.5
This commit is contained in:
parent
074490b2c2
commit
3028b20a71
5 changed files with 5 additions and 6 deletions
|
|
@ -374,8 +374,7 @@ exotic_models_job = CircleCIJob(
|
|||
"pip install 'git+https://github.com/facebookresearch/detectron2.git'",
|
||||
"sudo apt install tesseract-ocr",
|
||||
"pip install pytesseract",
|
||||
# wait until natten is ready for torch 2.0.0
|
||||
# "pip install natten",
|
||||
"pip install natten",
|
||||
],
|
||||
tests_to_run=[
|
||||
"tests/models/*layoutlmv*",
|
||||
|
|
|
|||
2
setup.py
2
setup.py
|
|
@ -129,7 +129,7 @@ _deps = [
|
|||
"keras-nlp>=0.3.1",
|
||||
"librosa",
|
||||
"nltk",
|
||||
"natten>=0.14.4",
|
||||
"natten>=0.14.5",
|
||||
"numpy>=1.17",
|
||||
"onnxconverter-common",
|
||||
"onnxruntime-tools>=1.4.2",
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ deps = {
|
|||
"keras-nlp": "keras-nlp>=0.3.1",
|
||||
"librosa": "librosa",
|
||||
"nltk": "nltk",
|
||||
"natten": "natten>=0.14.4",
|
||||
"natten": "natten>=0.14.5",
|
||||
"numpy": "numpy>=1.17",
|
||||
"onnxconverter-common": "onnxconverter-common",
|
||||
"onnxruntime-tools": "onnxruntime-tools>=1.4.2",
|
||||
|
|
|
|||
|
|
@ -347,7 +347,7 @@ class NeighborhoodAttention(nn.Module):
|
|||
query_layer = query_layer / math.sqrt(self.attention_head_size)
|
||||
|
||||
# Compute NA between "query" and "key" to get the raw attention scores, and add relative positional biases.
|
||||
attention_scores = natten2dqkrpb(query_layer, key_layer, self.rpb, self.dilation)
|
||||
attention_scores = natten2dqkrpb(query_layer, key_layer, self.rpb, self.kernel_size, self.dilation)
|
||||
|
||||
# Normalize the attention scores to probabilities.
|
||||
attention_probs = nn.functional.softmax(attention_scores, dim=-1)
|
||||
|
|
|
|||
|
|
@ -339,7 +339,7 @@ class NeighborhoodAttention(nn.Module):
|
|||
query_layer = query_layer / math.sqrt(self.attention_head_size)
|
||||
|
||||
# Compute NA between "query" and "key" to get the raw attention scores, and add relative positional biases.
|
||||
attention_scores = natten2dqkrpb(query_layer, key_layer, self.rpb, 1)
|
||||
attention_scores = natten2dqkrpb(query_layer, key_layer, self.rpb, self.kernel_size, 1)
|
||||
|
||||
# Normalize the attention scores to probabilities.
|
||||
attention_probs = nn.functional.softmax(attention_scores, dim=-1)
|
||||
|
|
|
|||
Loading…
Reference in a new issue