From 91487cbb8e0a0dfed66c64359e76deb0bfee2557 Mon Sep 17 00:00:00 2001 From: Iz Beltagy Date: Fri, 29 May 2020 04:12:35 -0700 Subject: [PATCH] [Longformer] fix model name in examples (#4653) * fix longformer model names in examples * a better name for the notebook --- notebooks/README.md | 2 +- src/transformers/modeling_longformer.py | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/notebooks/README.md b/notebooks/README.md index 6ea150a77..5bc63df2d 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -30,5 +30,5 @@ Pull Request so it can be included under the Community notebooks. | [Fine-tune BART for Summarization](https://github.com/ohmeow/ohmeow_website/blob/master/_notebooks/2020-05-23-text-generation-with-blurr.ipynb) | How to fine-tune BART for summarization with fastai using blurr | [Wayde Gilliam](https://ohmeow.com/) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ohmeow/ohmeow_website/blob/master/_notebooks/2020-05-23-text-generation-with-blurr.ipynb) | | [Fine-tune a pre-trained Transformer on anyone's tweets](https://colab.research.google.com/github/borisdayma/huggingtweets/blob/master/huggingtweets-demo.ipynb) | How to generate tweets in the style of your favorite Twitter account by fine-tune a GPT-2 model | [Boris Dayma](https://github.com/borisdayma) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/borisdayma/huggingtweets/blob/master/huggingtweets-demo.ipynb) | | [A Step by Step Guide to Tracking Hugging Face Model Performance](https://colab.research.google.com/drive/1NEiqNPhiouu2pPwDAVeFoN4-vTYMz9F8) | A quick tutorial for training NLP models with HuggingFace and & visualizing their performance with Weights & Biases | [Jack Morris](https://github.com/jxmorris12) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1NEiqNPhiouu2pPwDAVeFoN4-vTYMz9F8) | -| [Pretrain Longformer](https://github.com/allenai/longformer/blob/master/scripts/convert_model_to_long.ipynb) | How to convert existing pretrained models into their Long version | [Iz Beltagy](https://beltagy.net) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/allenai/longformer/blob/master/scripts/convert_model_to_long.ipynb) | +| [Pretrain Longformer](https://github.com/allenai/longformer/blob/master/scripts/convert_model_to_long.ipynb) | How to build a "long" version of existing pretrained models | [Iz Beltagy](https://beltagy.net) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/allenai/longformer/blob/master/scripts/convert_model_to_long.ipynb) | | [Fine-tune Longformer for QA](https://github.com/patil-suraj/Notebooks/blob/master/longformer_qa_training.ipynb) | How to fine-tune longformer model for QA task | [Suraj Patil](https://github.com/patil-suraj) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/patil-suraj/Notebooks/blob/master/longformer_qa_training.ipynb) | diff --git a/src/transformers/modeling_longformer.py b/src/transformers/modeling_longformer.py index fcfbeecc9..70e5fbf90 100644 --- a/src/transformers/modeling_longformer.py +++ b/src/transformers/modeling_longformer.py @@ -572,8 +572,8 @@ class LongformerModel(RobertaModel): import torch from transformers import LongformerModel, LongformerTokenizer - model = LongformerModel.from_pretrained('longformer-base-4096') - tokenizer = LongformerTokenizer.from_pretrained('longformer-base-4096') + model = LongformerModel.from_pretrained('allenai/longformer-base-4096') + tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096') SAMPLE_TEXT = ' '.join(['Hello world! '] * 1000) # long input document input_ids = torch.tensor(tokenizer.encode(SAMPLE_TEXT)).unsqueeze(0) # batch of size 1 @@ -681,8 +681,8 @@ class LongformerForMaskedLM(BertPreTrainedModel): import torch from transformers import LongformerForMaskedLM, LongformerTokenizer - model = LongformerForMaskedLM.from_pretrained('longformer-base-4096') - tokenizer = LongformerTokenizer.from_pretrained('longformer-base-4096') + model = LongformerForMaskedLM.from_pretrained('allenai/longformer-base-4096') + tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096') SAMPLE_TEXT = ' '.join(['Hello world! '] * 1000) # long input document input_ids = torch.tensor(tokenizer.encode(SAMPLE_TEXT)).unsqueeze(0) # batch of size 1 @@ -769,8 +769,8 @@ class LongformerForSequenceClassification(BertPreTrainedModel): from transformers import LongformerTokenizer, LongformerForSequenceClassification import torch - tokenizer = LongformerTokenizer.from_pretrained('longformer-base-4096') - model = LongformerForSequenceClassification.from_pretrained('longformer-base-4096') + tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096') + model = LongformerForSequenceClassification.from_pretrained('allenai/longformer-base-4096') input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 outputs = model(input_ids, labels=labels) @@ -909,8 +909,8 @@ class LongformerForQuestionAnswering(BertPreTrainedModel): from transformers import LongformerTokenizer, LongformerForQuestionAnswering import torch - tokenizer = LongformerTokenizer.from_pretrained("longformer-large-4096-finetuned-triviaqa") - model = LongformerForQuestionAnswering.from_pretrained("longformer-large-4096-finetuned-triviaqa") + tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-large-4096-finetuned-triviaqa") + model = LongformerForQuestionAnswering.from_pretrained("allenai/longformer-large-4096-finetuned-triviaqa") question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet" encoding = tokenizer.encode_plus(question, text, return_tensors="pt") @@ -1031,8 +1031,8 @@ class LongformerForTokenClassification(BertPreTrainedModel): from transformers import LongformerTokenizer, LongformerForTokenClassification import torch - tokenizer = LongformerTokenizer.from_pretrained('longformer-base-4096') - model = LongformerForTokenClassification.from_pretrained('longformer-base-4096') + tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096') + model = LongformerForTokenClassification.from_pretrained('allenai/longformer-base-4096') input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1 outputs = model(input_ids, labels=labels)