update sequencesummary module

2026-05-14 20:58:08 +00:00 · 2019-07-09 15:46:43 +02:00 · 2019-07-09 15:46:43 +02:00 · d0efbd3cd1
commit d0efbd3cd1
parent d5481cbe1b
5 changed files with 15 additions and 2 deletions
--- a/.coveragerc
+++ b/.coveragerc
@ -3,6 +3,7 @@ source=pytorch_transformers
 omit =
    # skip convertion scripts from testing for now
    */convert_*
+    */__main__.py
 [report]
 exclude_lines =
    pragma: no cover
--- a/examples/test_examples.py
+++ b/examples/test_examples.py
@ -48,7 +48,7 @@ class ExamplesTests(unittest.TestCase):
        testargs = ["run_glue.py", "--data_dir=./examples/tests_samples/MRPC/",
                    "--task_name=mrpc", "--do_train", "--do_eval", "--output_dir=./examples/tests_samples/temp_dir",
                    "--train_batch_size=4", "--eval_batch_size=2", "--num_train_epochs=2.0", "--overwrite_output_dir"]
-        model_name = "--model_name=xlnet-large-cased"
+        model_name = "--model_name=bert-base-uncased"
        with patch.object(sys, 'argv', testargs + [model_name]):
            result = run_glue.main()
            for value in result.values():
--- a/pytorch_transformers/modeling_gpt2.py
+++ b/pytorch_transformers/modeling_gpt2.py
@ -119,9 +119,12 @@ class GPT2Config(PretrainedConfig):
        layer_norm_epsilon=1e-5,
        initializer_range=0.02,
        predict_special_tokens=True,
+
+        num_labels=1,
        summary_type='token_ids',
        summary_use_proj=True,
        summary_activation=None,
+        summary_proj_to_labels=True,
        summary_first_dropout=0.1,
        **kwargs
    ):
@ -168,10 +171,13 @@ class GPT2Config(PretrainedConfig):
            self.layer_norm_epsilon = layer_norm_epsilon
            self.initializer_range = initializer_range
            self.predict_special_tokens = predict_special_tokens
+
+            self.num_labels = num_labels
            self.summary_type = summary_type
            self.summary_use_proj = summary_use_proj
            self.summary_activation = summary_activation
            self.summary_first_dropout = summary_first_dropout
+            self.summary_proj_to_labels = summary_proj_to_labels
        else:
            raise ValueError(
                "First argument must be either a vocabulary size (int)"
--- a/pytorch_transformers/modeling_openai.py
+++ b/pytorch_transformers/modeling_openai.py
@ -147,9 +147,12 @@ class OpenAIGPTConfig(PretrainedConfig):
        layer_norm_epsilon=1e-5,
        initializer_range=0.02,
        predict_special_tokens=True,
+
+        num_labels=1,
        summary_type='token_ids',
        summary_use_proj=True,
        summary_activation=None,
+        summary_proj_to_labels=True,
        summary_first_dropout=0.1,
        **kwargs
    ):
@ -199,10 +202,13 @@ class OpenAIGPTConfig(PretrainedConfig):
            self.layer_norm_epsilon = layer_norm_epsilon
            self.initializer_range = initializer_range
            self.predict_special_tokens = predict_special_tokens
+
+            self.num_labels = num_labels
            self.summary_type = summary_type
            self.summary_use_proj = summary_use_proj
            self.summary_activation = summary_activation
            self.summary_first_dropout = summary_first_dropout
+            self.summary_proj_to_labels = summary_proj_to_labels
        else:
            raise ValueError(
                "First argument must be either a vocabulary size (int)"
--- a/pytorch_transformers/tests/modeling_tests_commons.py
+++ b/pytorch_transformers/tests/modeling_tests_commons.py
@ -396,7 +396,7 @@ class GPTModelTester(object):
        model = self.double_head_model_class(config)
        model.eval()
        outputs = model(input_ids, mc_token_ids, lm_labels=lm_labels, mc_labels=mc_labels,
-                                                    token_type_ids=token_type_ids, position_ids=position_ids)
+                        token_type_ids=token_type_ids, position_ids=position_ids)
        lm_loss, mc_loss, lm_logits, mc_logits = outputs[:4]
        loss = [lm_loss, mc_loss]