From 7fad617dc1fc681a7f5da5e0172c8b83f4bf0024 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Date: Fri, 10 Jul 2020 17:31:02 -0400 Subject: [PATCH] Document model outputs (#5673) * Document model outputs * Update docs/source/main_classes/output.rst Co-authored-by: Lysandre Debut Co-authored-by: Lysandre Debut --- docs/source/index.rst | 1 + docs/source/main_classes/output.rst | 141 ++++++++++++++++++++++++ docs/source/model_doc/albert.rst | 7 ++ docs/source/model_doc/bert.rst | 7 ++ docs/source/model_doc/dpr.rst | 13 +++ docs/source/model_doc/electra.rst | 7 ++ docs/source/model_doc/gpt.rst | 7 ++ docs/source/model_doc/gpt2.rst | 7 ++ docs/source/model_doc/mobilebert.rst | 7 ++ docs/source/model_doc/transformerxl.rst | 10 ++ docs/source/model_doc/xlm.rst | 8 ++ docs/source/model_doc/xlnet.rst | 25 +++++ src/transformers/file_utils.py | 19 +++- src/transformers/modeling_albert.py | 4 +- src/transformers/modeling_bert.py | 4 +- src/transformers/modeling_electra.py | 4 +- src/transformers/modeling_mobilebert.py | 4 +- src/transformers/modeling_outputs.py | 9 +- 18 files changed, 267 insertions(+), 17 deletions(-) create mode 100644 docs/source/main_classes/output.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index a84ccd0a4..bcc46a01d 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -173,6 +173,7 @@ conversion utilities for the following models: :caption: Package Reference main_classes/configuration + main_classes/output main_classes/model main_classes/tokenizer main_classes/pipelines diff --git a/docs/source/main_classes/output.rst b/docs/source/main_classes/output.rst new file mode 100644 index 000000000..fe43c8e59 --- /dev/null +++ b/docs/source/main_classes/output.rst @@ -0,0 +1,141 @@ +Model outputs +------------- + +PyTorch models have outputs that are instances of subclasses of :class:`~transformers.file_utils.ModelOutput`. Those +are data structures containing all the information returned by the model, but that can also be used as tuples or +dictionaries. + +Let's see of this looks on an example: + +.. code-block:: + + from transformers import BertTokenizer, BertForSequenceClassification + import torch + + tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') + model = BertForSequenceClassification.from_pretrained('bert-base-uncased') + + inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") + labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 + outputs = model(**inputs, labels=labels) + +The ``outputs`` object is a :class:`~transformers.modeling_outputs.SequenceClassifierOutput`, as we can see in the +documentation of that class below, it means it has an optional ``loss``, a ``logits`` an optional ``hidden_states`` and +an optional ``attentions`` attribute. Here we have the ``loss`` since we passed along ``labels``, but we don't have +``hidden_states`` and ``attentions`` because we didn't pass ``output_hidden_states=True`` or +``output_attentions=True``. + +You can access each attribute as you would usually do, and if that attribute has not been returned by the model, you +will get ``None``. Here for instance ``outputs.loss`` is the loss computed by the model, and ``outputs.attentions`` is +``None``. + +When considering our ``outputs`` object as tuple, it only considers the attributes that don't have ``None`` values. +Here for instance, it has two elements, ``loss`` then ``logits``, so + +.. code-block:: + + outputs[:2] + +will return the tuple ``(outputs.loss, outputs.logits)`` for instance. + +When considering our ``outputs`` object as dictionary, it only considers the attributes that don't have ``None`` +values. Here for instance, it has two keys that are ``loss`` and ``logits``. + +We document here the generic model outputs that are used by more than one model type. Specific output types are +documented on their corresponding model page. + +``ModelOutput`` +~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.file_utils.ModelOutput + :members: + +``BaseModelOutput`` +~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.modeling_outputs.BaseModelOutput + :members: + +``BaseModelOutputWithPooling`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.modeling_outputs.BaseModelOutputWithPooling + :members: + +``BaseModelOutputWithPast`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.modeling_outputs.BaseModelOutputWithPast + :members: + +``Seq2SeqModelOutput`` +~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.modeling_outputs.Seq2SeqModelOutput + :members: + +``CausalLMOutput`` +~~~~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.modeling_outputs.CausalLMOutput + :members: + +``CausalLMOutputWithPast`` +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.modeling_outputs.CausalLMOutputWithPast + :members: + +``MaskedLMOutput`` +~~~~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.modeling_outputs.MaskedLMOutput + :members: + +``Seq2SeqLMOutput`` +~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.modeling_outputs.Seq2SeqLMOutput + :members: + +``NextSentencePredictorOutput`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.modeling_outputs.NextSentencePredictorOutput + :members: + +``SequenceClassifierOutput`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.modeling_outputs.SequenceClassifierOutput + :members: + +``Seq2SeqSequenceClassifierOutput`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.modeling_outputs.Seq2SeqSequenceClassifierOutput + :members: + +``MultipleChoiceModelOutput`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.modeling_outputs.MultipleChoiceModelOutput + :members: + +``TokenClassifierOutput`` +~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.modeling_outputs.TokenClassifierOutput + :members: + +``QuestionAnsweringModelOutput`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.modeling_outputs.QuestionAnsweringModelOutput + :members: + +``Seq2SeqQuestionAnsweringModelOutput`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.modeling_outputs.Seq2SeqQuestionAnsweringModelOutput + :members: diff --git a/docs/source/model_doc/albert.rst b/docs/source/model_doc/albert.rst index 8bd0c3e8f..ab382a27c 100644 --- a/docs/source/model_doc/albert.rst +++ b/docs/source/model_doc/albert.rst @@ -47,6 +47,13 @@ AlbertTokenizer create_token_type_ids_from_sequences, save_vocabulary +Albert specific outputs +~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.modeling_albert.AlbertForPretrainingOutput + :members: + + AlbertModel ~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/model_doc/bert.rst b/docs/source/model_doc/bert.rst index b77a241a8..cbc1c8aa7 100644 --- a/docs/source/model_doc/bert.rst +++ b/docs/source/model_doc/bert.rst @@ -59,6 +59,13 @@ BertTokenizerFast :members: +Bert specific outputs +~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.modeling_bert.BertForPretrainingOutput + :members: + + BertModel ~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/model_doc/dpr.rst b/docs/source/model_doc/dpr.rst index 84b0527c2..a77d3868b 100644 --- a/docs/source/model_doc/dpr.rst +++ b/docs/source/model_doc/dpr.rst @@ -69,6 +69,19 @@ DPRReaderTokenizerFast :members: +DPR specific outputs +~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.modeling_dpr.DPRContextEncoderOutput + :members: + +.. autoclass:: transformers.modeling_dpr.DPRQuestionEncoderOutput + :members: + +.. autoclass:: transformers.modeling_dpr.DPRReaderOutput + :members: + + DPRContextEncoder ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/model_doc/electra.rst b/docs/source/model_doc/electra.rst index 431b4f271..895ca9dde 100644 --- a/docs/source/model_doc/electra.rst +++ b/docs/source/model_doc/electra.rst @@ -71,6 +71,13 @@ ElectraTokenizerFast :members: +Electra specific outputs +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.modeling_electra.ElectraForPretrainingOutput + :members: + + ElectraModel ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/model_doc/gpt.rst b/docs/source/model_doc/gpt.rst index 4c54dee70..39c5fe269 100644 --- a/docs/source/model_doc/gpt.rst +++ b/docs/source/model_doc/gpt.rst @@ -71,6 +71,13 @@ OpenAIGPTTokenizerFast :members: +OpenAI specific outputs +~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.modeling_openai.OpenAIGPTDoubleHeadsModelOutput + :members: + + OpenAIGPTModel ~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/model_doc/gpt2.rst b/docs/source/model_doc/gpt2.rst index 45ac90ec2..3f1be1bb4 100644 --- a/docs/source/model_doc/gpt2.rst +++ b/docs/source/model_doc/gpt2.rst @@ -58,6 +58,13 @@ GPT2TokenizerFast :members: +GPT2 specific outputs +~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.modeling_gpt2.GPT2DoubleHeadsModelOutput + :members: + + GPT2Model ~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/model_doc/mobilebert.rst b/docs/source/model_doc/mobilebert.rst index caec4af1f..ad3e0c206 100644 --- a/docs/source/model_doc/mobilebert.rst +++ b/docs/source/model_doc/mobilebert.rst @@ -56,6 +56,13 @@ MobileBertTokenizerFast :members: +MobileBert specific outputs +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.modeling_mobilebert.MobileBertForPretrainingOutput + :members: + + MobileBertModel ~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/model_doc/transformerxl.rst b/docs/source/model_doc/transformerxl.rst index 336bfdcd6..dc1a63783 100644 --- a/docs/source/model_doc/transformerxl.rst +++ b/docs/source/model_doc/transformerxl.rst @@ -54,6 +54,16 @@ TransfoXLTokenizerFast :members: +TransfoXL specific outputs +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.modeling_transfo_xl.TransfoXLModelOutput + :members: + +.. autoclass:: transformers.modeling_transfo_xl.TransfoXLLMHeadModelOutput + :members: + + TransfoXLModel ~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/model_doc/xlm.rst b/docs/source/model_doc/xlm.rst index b043a1bec..cd14a77cb 100644 --- a/docs/source/model_doc/xlm.rst +++ b/docs/source/model_doc/xlm.rst @@ -46,6 +46,14 @@ XLMTokenizer :members: build_inputs_with_special_tokens, get_special_tokens_mask, create_token_type_ids_from_sequences, save_vocabulary + +XLM specific outputs +~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.modeling_xlm.XLMForQuestionAnsweringOutput + :members: + + XLMModel ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/model_doc/xlnet.rst b/docs/source/model_doc/xlnet.rst index 79faab8d5..bea589759 100644 --- a/docs/source/model_doc/xlnet.rst +++ b/docs/source/model_doc/xlnet.rst @@ -50,6 +50,31 @@ XLNetTokenizer create_token_type_ids_from_sequences, save_vocabulary +XLNet specific outputs +~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.modeling_xlnet.XLNetModelOutput + :members: + +.. autoclass:: transformers.modeling_xlnet.XLNetLMHeadModelOutput + :members: + +.. autoclass:: transformers.modeling_xlnet.XLNetForSequenceClassificationOutput + :members: + +.. autoclass:: transformers.modeling_xlnet.XLNetForMultipleChoiceOutput + :members: + +.. autoclass:: transformers.modeling_xlnet.XLNetForTokenClassificationOutput + :members: + +.. autoclass:: transformers.modeling_xlnet.XLNetForQuestionAnsweringSimpleOutput + :members: + +.. autoclass:: transformers.modeling_xlnet.XLNetForQuestionAnsweringOutput + :members: + + XLNetModel ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/transformers/file_utils.py b/src/transformers/file_utils.py index e4bcec6c8..33688b767 100644 --- a/src/transformers/file_utils.py +++ b/src/transformers/file_utils.py @@ -189,7 +189,7 @@ def add_end_docstrings(*docstr): RETURN_INTRODUCTION = r""" Returns: - :class:`~transformers.{output_type}` or :obj:`tuple(torch.FloatTensor)` (if ``return_tuple=True`` is passed or when ``config.return_tuple=True``) comprising various elements depending on the configuration (:class:`~transformers.{config_class}`) and inputs: + :class:`~{full_output_type}` or :obj:`tuple(torch.FloatTensor)` (if ``return_tuple=True`` is passed or when ``config.return_tuple=True``) comprising various elements depending on the configuration (:class:`~transformers.{config_class}`) and inputs: """ @@ -208,7 +208,8 @@ def _prepare_output_docstrings(output_type, config_class): docstrings = "\n".join(lines[(i + 1) :]) # Add the return introduction - intro = RETURN_INTRODUCTION.format(output_type=output_type.__name__, config_class=config_class) + full_output_type = f"{output_type.__module__}.{output_type.__name__}" + intro = RETURN_INTRODUCTION.format(full_output_type=full_output_type, config_class=config_class) return intro + docstrings @@ -857,14 +858,24 @@ def tf_required(func): class ModelOutput: """ - Base class for all model outputs as dataclass. Has a ``__getitem__`` (to make it behave like a ``namedtuple``) that - will ignore ``None`` in the attributes. + Base class for all model outputs as dataclass. Has a ``__getitem__`` that allows indexing by integer or slice (like + a tuple) or strings (like a dictionnary) that will ignore the ``None`` attributes. """ def to_tuple(self): + """ + Converts :obj:`self` to a tuple. + + Return: A tuple containing all non-:obj:`None` attributes of the :obj:`self`. + """ return tuple(getattr(self, f) for f in self.__dataclass_fields__.keys() if getattr(self, f, None) is not None) def to_dict(self): + """ + Converts :obj:`self` to a Python dictionary. + + Return: A dictionary containing all non-:obj:`None` attributes of the :obj:`self`. + """ return {f: getattr(self, f) for f in self.__dataclass_fields__.keys() if getattr(self, f, None) is not None} def __getitem__(self, i): diff --git a/src/transformers/modeling_albert.py b/src/transformers/modeling_albert.py index 9f7f4fb96..c1f1f73c7 100644 --- a/src/transformers/modeling_albert.py +++ b/src/transformers/modeling_albert.py @@ -410,9 +410,9 @@ class AlbertForPretrainingOutput(ModelOutput): Output type of :class:`~transformers.AlbertForPretrainingModel`. Args: - loss (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``: + loss (`optional`, returned when ``labels`` is provided, ``torch.FloatTensor`` of shape :obj:`(1,)`): Total loss as the sum of the masked language modeling loss and the next sequence prediction (classification) loss. - prediction_logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`) + prediction_logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`): Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). sop_logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, 2)`): Prediction scores of the next sequence prediction (classification) head (scores of True/False diff --git a/src/transformers/modeling_bert.py b/src/transformers/modeling_bert.py index 34aa5d167..d2f6c3710 100644 --- a/src/transformers/modeling_bert.py +++ b/src/transformers/modeling_bert.py @@ -585,9 +585,9 @@ class BertForPretrainingOutput(ModelOutput): Output type of :class:`~transformers.BertForPretrainingModel`. Args: - loss (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``: + loss (`optional`, returned when ``labels`` is provided, ``torch.FloatTensor`` of shape :obj:`(1,)`): Total loss as the sum of the masked language modeling loss and the next sequence prediction (classification) loss. - prediction_logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`) + prediction_logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`): Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). seq_relationship_logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, 2)`): Prediction scores of the next sequence prediction (classification) head (scores of True/False diff --git a/src/transformers/modeling_electra.py b/src/transformers/modeling_electra.py index 71d02d7d9..267dbea7d 100644 --- a/src/transformers/modeling_electra.py +++ b/src/transformers/modeling_electra.py @@ -191,9 +191,9 @@ class ElectraForPretrainingOutput(ModelOutput): Output type of :class:`~transformers.ElectraForPretrainingModel`. Args: - loss (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``: + loss (`optional`, returned when ``labels`` is provided, ``torch.FloatTensor`` of shape :obj:`(1,)`): Total loss of the ELECTRA objective. - logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length)`) + logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length)`): Prediction scores of the head (scores for each token before SoftMax). hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``): Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) diff --git a/src/transformers/modeling_mobilebert.py b/src/transformers/modeling_mobilebert.py index cb52395ce..b62035f54 100644 --- a/src/transformers/modeling_mobilebert.py +++ b/src/transformers/modeling_mobilebert.py @@ -685,9 +685,9 @@ class MobileBertForPretrainingOutput(ModelOutput): Output type of :class:`~transformers.MobileBertForPretrainingModel`. Args: - loss (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``: + loss (`optional`, returned when ``labels`` is provided, ``torch.FloatTensor`` of shape :obj:`(1,)`): Total loss as the sum of the masked language modeling loss and the next sequence prediction (classification) loss. - prediction_logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`) + prediction_logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`): Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). seq_relationship_logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, 2)`): Prediction scores of the next sequence prediction (classification) head (scores of True/False diff --git a/src/transformers/modeling_outputs.py b/src/transformers/modeling_outputs.py index 0ef88f2cd..f9cf15c40 100644 --- a/src/transformers/modeling_outputs.py +++ b/src/transformers/modeling_outputs.py @@ -40,12 +40,11 @@ class BaseModelOutputWithPooling(ModelOutput): Args: last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`): Sequence of hidden-states at the output of the last layer of the model. - pooler_output (:obj:`torch.FloatTensor`: of shape :obj:`(batch_size, hidden_size)`): - pooler_output (:obj:`torch.FloatTensor`: of shape :obj:`(batch_size, hidden_size)`): + pooler_output (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, hidden_size)`): Last layer hidden-state of the first token of the sequence (classification token) further processed by a Linear layer and a Tanh activation function. The Linear layer weights are trained from the next sentence prediction (classification) - objective during pre-training. + objective during pretraining. This output is usually *not* a good summary of the semantic content of the input, you're often better with averaging or pooling @@ -114,7 +113,7 @@ class Seq2SeqModelOutput(ModelOutput): last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`): Sequence of hidden-states at the output of the last layer of the decoder of the model. - If `decoder_past_key_values` is used only the last hidden-state of the sequences of shape :obj:`(batch_size, 1, hidden_size)` is output. + If ``decoder_past_key_values`` is used only the last hidden-state of the sequences of shape :obj:`(batch_size, 1, hidden_size)` is output. decoder_past_key_values (:obj:`List[torch.FloatTensor]`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``): List of :obj:`torch.FloatTensor` of length :obj:`config.n_layers`, with each tensor of shape :obj:`(2, batch_size, num_heads, sequence_length, embed_size_per_head)`). @@ -337,7 +336,7 @@ class SequenceClassifierOutput(ModelOutput): Base class for outputs of sentence classification models. Args: - loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`label` is provided): + loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided): Classification (or regression if config.num_labels==1) loss. logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, config.num_labels)`): Classification (or regression if config.num_labels==1) scores (before SoftMax).