From f1fe18465d8c4ee3f5710cdfd7de387a1d136f6b Mon Sep 17 00:00:00 2001
From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
Date: Fri, 5 Jun 2020 16:41:46 -0400
Subject: [PATCH] Use labels to remove deprecation warnings (#4807)

---
 tests/test_modeling_albert.py     | 4 ++--
 tests/test_modeling_bart.py       | 6 +++---
 tests/test_modeling_bert.py       | 8 ++++----
 tests/test_modeling_distilbert.py | 2 +-
 tests/test_modeling_electra.py    | 2 +-
 tests/test_modeling_gpt2.py       | 2 +-
 tests/test_modeling_longformer.py | 4 ++--
 tests/test_modeling_openai.py     | 2 +-
 tests/test_modeling_roberta.py    | 2 +-
 tests/test_modeling_t5.py         | 2 +-
 10 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/tests/test_modeling_albert.py b/tests/test_modeling_albert.py
index f1352338b..3c4192823 100644
--- a/tests/test_modeling_albert.py
+++ b/tests/test_modeling_albert.py
@@ -162,7 +162,7 @@ class AlbertModelTest(ModelTesterMixin, unittest.TestCase):
                 input_ids,
                 attention_mask=input_mask,
                 token_type_ids=token_type_ids,
-                masked_lm_labels=token_labels,
+                labels=token_labels,
                 sentence_order_label=sequence_labels,
             )
             result = {
@@ -183,7 +183,7 @@ class AlbertModelTest(ModelTesterMixin, unittest.TestCase):
             model.to(torch_device)
             model.eval()
             loss, prediction_scores = model(
-                input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, masked_lm_labels=token_labels
+                input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels
             )
             result = {
                 "loss": loss,
diff --git a/tests/test_modeling_bart.py b/tests/test_modeling_bart.py
index 0c2be07b9..366b7de2f 100644
--- a/tests/test_modeling_bart.py
+++ b/tests/test_modeling_bart.py
@@ -296,7 +296,7 @@ class BartTranslationTests(unittest.TestCase):
         lm_model = BartForConditionalGeneration(config).to(torch_device)
         context = torch.Tensor([[71, 82, 18, 33, 46, 91, 2], [68, 34, 26, 58, 30, 2, 1]]).long().to(torch_device)
         summary = torch.Tensor([[82, 71, 82, 18, 2], [58, 68, 2, 1, 1]]).long().to(torch_device)
-        loss, logits, enc_features = lm_model(input_ids=context, decoder_input_ids=summary, lm_labels=summary)
+        loss, logits, enc_features = lm_model(input_ids=context, decoder_input_ids=summary, labels=summary)
         expected_shape = (*summary.shape, config.vocab_size)
         self.assertEqual(logits.shape, expected_shape)
 
@@ -361,7 +361,7 @@ class BartHeadTests(unittest.TestCase):
         lm_labels = ids_tensor([batch_size, input_ids.shape[1]], self.vocab_size).to(torch_device)
         lm_model = BartForConditionalGeneration(config)
         lm_model.to(torch_device)
-        loss, logits, enc_features = lm_model(input_ids=input_ids, lm_labels=lm_labels)
+        loss, logits, enc_features = lm_model(input_ids=input_ids, labels=lm_labels)
         expected_shape = (batch_size, input_ids.shape[1], config.vocab_size)
         self.assertEqual(logits.shape, expected_shape)
         self.assertIsInstance(loss.item(), float)
@@ -381,7 +381,7 @@ class BartHeadTests(unittest.TestCase):
         lm_model = BartForConditionalGeneration(config).to(torch_device)
         context = torch.Tensor([[71, 82, 18, 33, 46, 91, 2], [68, 34, 26, 58, 30, 2, 1]]).long().to(torch_device)
         summary = torch.Tensor([[82, 71, 82, 18, 2], [58, 68, 2, 1, 1]]).long().to(torch_device)
-        loss, logits, enc_features = lm_model(input_ids=context, decoder_input_ids=summary, lm_labels=summary)
+        loss, logits, enc_features = lm_model(input_ids=context, decoder_input_ids=summary, labels=summary)
         expected_shape = (*summary.shape, config.vocab_size)
         self.assertEqual(logits.shape, expected_shape)
 
diff --git a/tests/test_modeling_bert.py b/tests/test_modeling_bert.py
index fc994c46b..276ed056c 100644
--- a/tests/test_modeling_bert.py
+++ b/tests/test_modeling_bert.py
@@ -218,7 +218,7 @@ class BertModelTester:
         model.to(torch_device)
         model.eval()
         loss, prediction_scores = model(
-            input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, masked_lm_labels=token_labels
+            input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels
         )
         result = {
             "loss": loss,
@@ -248,7 +248,7 @@ class BertModelTester:
             input_ids,
             attention_mask=input_mask,
             token_type_ids=token_type_ids,
-            masked_lm_labels=token_labels,
+            labels=token_labels,
             encoder_hidden_states=encoder_hidden_states,
             encoder_attention_mask=encoder_attention_mask,
         )
@@ -256,7 +256,7 @@ class BertModelTester:
             input_ids,
             attention_mask=input_mask,
             token_type_ids=token_type_ids,
-            masked_lm_labels=token_labels,
+            labels=token_labels,
             encoder_hidden_states=encoder_hidden_states,
         )
         result = {
@@ -294,7 +294,7 @@ class BertModelTester:
             input_ids,
             attention_mask=input_mask,
             token_type_ids=token_type_ids,
-            masked_lm_labels=token_labels,
+            labels=token_labels,
             next_sentence_label=sequence_labels,
         )
         result = {
diff --git a/tests/test_modeling_distilbert.py b/tests/test_modeling_distilbert.py
index 7b9f20fed..a90288495 100644
--- a/tests/test_modeling_distilbert.py
+++ b/tests/test_modeling_distilbert.py
@@ -151,7 +151,7 @@ class DistilBertModelTest(ModelTesterMixin, unittest.TestCase):
             model = DistilBertForMaskedLM(config=config)
             model.to(torch_device)
             model.eval()
-            loss, prediction_scores = model(input_ids, attention_mask=input_mask, masked_lm_labels=token_labels)
+            loss, prediction_scores = model(input_ids, attention_mask=input_mask, labels=token_labels)
             result = {
                 "loss": loss,
                 "prediction_scores": prediction_scores,
diff --git a/tests/test_modeling_electra.py b/tests/test_modeling_electra.py
index 263967674..22254a81a 100644
--- a/tests/test_modeling_electra.py
+++ b/tests/test_modeling_electra.py
@@ -180,7 +180,7 @@ class ElectraModelTest(ModelTesterMixin, unittest.TestCase):
             model.to(torch_device)
             model.eval()
             loss, prediction_scores = model(
-                input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, masked_lm_labels=token_labels
+                input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels
             )
             result = {
                 "loss": loss,
diff --git a/tests/test_modeling_gpt2.py b/tests/test_modeling_gpt2.py
index 4f4dedb32..01228963a 100644
--- a/tests/test_modeling_gpt2.py
+++ b/tests/test_modeling_gpt2.py
@@ -268,7 +268,7 @@ class GPT2ModelTest(ModelTesterMixin, unittest.TestCase):
                 "mc_token_ids": mc_token_ids,
                 "attention_mask": multiple_choice_input_mask,
                 "token_type_ids": multiple_choice_token_type_ids,
-                "lm_labels": multiple_choice_inputs_ids,
+                "labels": multiple_choice_inputs_ids,
             }
 
             loss, lm_logits, mc_logits, _ = model(**inputs)
diff --git a/tests/test_modeling_longformer.py b/tests/test_modeling_longformer.py
index 51429c845..0e80c0c86 100644
--- a/tests/test_modeling_longformer.py
+++ b/tests/test_modeling_longformer.py
@@ -164,7 +164,7 @@ class LongformerModelTester(object):
         model.to(torch_device)
         model.eval()
         loss, prediction_scores = model(
-            input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, masked_lm_labels=token_labels
+            input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels
         )
         result = {
             "loss": loss,
@@ -361,7 +361,7 @@ class LongformerModelIntegrationTest(unittest.TestCase):
             [[0] + [20920, 232, 328, 1437] * 1000 + [2]], dtype=torch.long, device=torch_device
         )  # long input
 
-        loss, prediction_scores = model(input_ids, masked_lm_labels=input_ids)
+        loss, prediction_scores = model(input_ids, labels=input_ids)
 
         expected_loss = torch.tensor(0.0620, device=torch_device)
         expected_prediction_scores_sum = torch.tensor(-6.1599e08, device=torch_device)
diff --git a/tests/test_modeling_openai.py b/tests/test_modeling_openai.py
index 4d4191b4a..d56f4627a 100644
--- a/tests/test_modeling_openai.py
+++ b/tests/test_modeling_openai.py
@@ -169,7 +169,7 @@ class OpenAIGPTModelTest(ModelTesterMixin, unittest.TestCase):
             model.to(torch_device)
             model.eval()
 
-            loss, lm_logits, mc_logits = model(input_ids, token_type_ids=token_type_ids, lm_labels=input_ids)
+            loss, lm_logits, mc_logits = model(input_ids, token_type_ids=token_type_ids, labels=input_ids)
 
             result = {"loss": loss, "lm_logits": lm_logits}
 
diff --git a/tests/test_modeling_roberta.py b/tests/test_modeling_roberta.py
index d739cfda7..ac9e9396d 100644
--- a/tests/test_modeling_roberta.py
+++ b/tests/test_modeling_roberta.py
@@ -155,7 +155,7 @@ class RobertaModelTest(ModelTesterMixin, unittest.TestCase):
             model.to(torch_device)
             model.eval()
             loss, prediction_scores = model(
-                input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, masked_lm_labels=token_labels
+                input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels
             )
             result = {
                 "loss": loss,
diff --git a/tests/test_modeling_t5.py b/tests/test_modeling_t5.py
index 5c7544f56..766bda3bc 100644
--- a/tests/test_modeling_t5.py
+++ b/tests/test_modeling_t5.py
@@ -206,7 +206,7 @@ class T5ModelTest(ModelTesterMixin, unittest.TestCase):
                 input_ids=input_ids,
                 decoder_input_ids=decoder_input_ids,
                 decoder_attention_mask=decoder_attention_mask,
-                lm_labels=lm_labels,
+                labels=lm_labels,
             )
             loss, prediction_scores, _, _ = outputs
             self.parent.assertEqual(len(outputs), 4)