diff --git a/docs/source/model_doc/auto.rst b/docs/source/model_doc/auto.rst
index c3345ac4f..e13a9d0b5 100644
--- a/docs/source/model_doc/auto.rst
+++ b/docs/source/model_doc/auto.rst
@@ -1,109 +1,131 @@
-AutoModels
+AutoClasses
 -----------
 
 In many cases, the architecture you want to use can be guessed from the name or the path of the pretrained model you
-are supplying to the ``from_pretrained`` method.
-
+are supplying to the :obj:`from_pretrained()` method.
 AutoClasses are here to do this job for you so that you automatically retrieve the relevant model given the name/path
-to the pretrained weights/config/vocabulary:
+to the pretrained weights/config/vocabulary.
 
-Instantiating one of ``AutoModel``, ``AutoConfig`` and ``AutoTokenizer`` will directly create a class of the relevant
-architecture (ex: ``model = AutoModel.from_pretrained('bert-base-cased')`` will create a instance of 
-:class:`~transformers.BertModel`).
+Instantiating one of :class:`~transformers.AutoConfig`, :class:`~transformers.AutoModel`, and
+:class:`~transformers.AutoTokenizer` will directly create a class of the relevant architecture. For instance
 
 
-``AutoConfig``
-~~~~~~~~~~~~~~~~~~~~~
+.. code-block:: python
+
+    model = AutoModel.from_pretrained('bert-base-cased')
+
+will create a model that is an instance of :class:`~transformers.BertModel`).
+
+There is one class of :obj:`AutoModel` for each task, and for each backend (PyTorch or TensorFlow).
+
+
+AutoConfig
+~~~~~~~~~~
 
 .. autoclass:: transformers.AutoConfig
     :members:
 
 
-``AutoTokenizer``
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+AutoTokenizer
+~~~~~~~~~~~~~
 
 .. autoclass:: transformers.AutoTokenizer
     :members:
 
 
-``AutoModel``
-~~~~~~~~~~~~~~~~~~~~~
+AutoModel
+~~~~~~~~~
 
 .. autoclass:: transformers.AutoModel
     :members:
 
 
-``AutoModelForPreTraining``
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+AutoModelForPreTraining
+~~~~~~~~~~~~~~~~~~~~~~~
 
 .. autoclass:: transformers.AutoModelForPreTraining
     :members:
 
 
-``AutoModelWithLMHead``
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+AutoModelWithLMHead
+~~~~~~~~~~~~~~~~~~~
 
 .. autoclass:: transformers.AutoModelWithLMHead
     :members:
 
 
-``AutoModelForSequenceClassification``
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+AutoModelForSequenceClassification
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. autoclass:: transformers.AutoModelForSequenceClassification
     :members:
 
 
-``AutoModelForQuestionAnswering``
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+AutoModelForMultipleChoice
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: transformers.AutoModelForMultipleChoice
+    :members:
+
+
+AutoModelForTokenClassification
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: transformers.AutoModelForTokenClassification
+    :members:
+
+
+AutoModelForQuestionAnswering
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. autoclass:: transformers.AutoModelForQuestionAnswering
     :members:
 
 
-``AutoModelForTokenClassification``
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-.. autoclass:: transformers.AutoModelForTokenClassification
-    :members:
-
-``TFAutoModel``
-~~~~~~~~~~~~~~~~~~~~~
+TFAutoModel
+~~~~~~~~~~~
 
 .. autoclass:: transformers.TFAutoModel
     :members:
 
 
-``TFAutoModelForPreTraining``
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+TFAutoModelForPreTraining
+~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. autoclass:: transformers.TFAutoModelForPreTraining
     :members:
 
 
-``TFAutoModelWithLMHead``
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+TFAutoModelWithLMHead
+~~~~~~~~~~~~~~~~~~~~~
 
 .. autoclass:: transformers.TFAutoModelWithLMHead
     :members:
 
 
-``TFAutoModelForSequenceClassification``
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+TFAutoModelForSequenceClassification
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. autoclass:: transformers.TFAutoModelForSequenceClassification
     :members:
 
 
-``TFAutoModelForQuestionAnswering``
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+TFAutoModelForMultipleChoice
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. autoclass:: transformers.TFAutoModelForQuestionAnswering
+.. autoclass:: transformers.TFAutoModelForMultipleChoice
     :members:
 
 
-``TFAutoModelForTokenClassification``
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+TFAutoModelForTokenClassification
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. autoclass:: transformers.TFAutoModelForTokenClassification
     :members:
+
+
+TFAutoModelForQuestionAnswering
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: transformers.TFAutoModelForQuestionAnswering
+    :members:
diff --git a/src/transformers/configuration_auto.py b/src/transformers/configuration_auto.py
index bf22aea28..a89916932 100644
--- a/src/transformers/configuration_auto.py
+++ b/src/transformers/configuration_auto.py
@@ -197,9 +197,7 @@ class AutoConfig:
     This is a generic configuration class that will be instantiated as one of the configuration classes of the library
     when created with the :meth:`~transformers.AutoConfig.from_pretrained` class method.
 
-    This method takes care of returning the correct model class instance
-    based on the `model_type` property of the config object, or when it's missing,
-    falling back to using pattern matching on the `pretrained_model_name_or_path` string.
+    This class cannot be instantiated directly using ``__init__()`` (throws an error).
     """
 
     def __init__(self):
@@ -222,58 +220,77 @@ class AutoConfig:
     @classmethod
     @replace_list_option_in_docstrings()
     def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
-        r""" Instantiates one of the configuration classes of the library
-        from a pre-trained model configuration.
+        r"""
+        Instantiate one of the configuration classes of the library from a pretrained model configuration.
 
-        The configuration class to instantiate is selected
-        based on the `model_type` property of the config object, or when it's missing,
-        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
+        The configuration class to instantiate is selected based on the :obj:`model_type` property of the config
+        object that is loaded, or when it's missing, by falling back to using pattern matching on
+        :obj:`pretrained_model_name_or_path`:
 
         List options
 
         Args:
-            pretrained_model_name_or_path (:obj:`string`):
-                Is either: \
-                    - a string with the `shortcut name` of a pre-trained model configuration to load from cache or download, e.g.: ``bert-base-uncased``.
-                    - a string with the `identifier name` of a pre-trained model configuration that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
-                    - a path to a `directory` containing a configuration file saved using the :func:`~transformers.PretrainedConfig.save_pretrained` method, e.g.: ``./my_model_directory/``.
-                    - a path or url to a saved configuration JSON `file`, e.g.: ``./my_model_directory/configuration.json``.
+            pretrained_model_name_or_path (:obj:`str`):
+                Can be either:
 
-            cache_dir (:obj:`string`, optional, defaults to `None`):
-                Path to a directory in which a downloaded pre-trained model
-                configuration should be cached if the standard cache should not be used.
-
-            force_download (:obj:`boolean`, optional, defaults to `False`):
-                Force to (re-)download the model weights and configuration files and override the cached versions if they exist.
-
-            resume_download (:obj:`boolean`, optional, defaults to `False`):
-                Do not delete incompletely received file. Attempt to resume the download if such a file exists.
-
-            proxies (:obj:`Dict[str, str]`, optional, defaults to `None`):
-                A dictionary of proxy servers to use by protocol or endpoint, e.g.: :obj:`{'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}`.
-                The proxies are used on each request. See `the requests documentation <https://requests.readthedocs.io/en/master/user/advanced/#proxies>`__ for usage.
-
-            return_unused_kwargs (:obj:`boolean`, optional, defaults to `False`):
-                - If False, then this function returns just the final configuration object.
-                - If True, then this functions returns a tuple `(config, unused_kwargs)` where `unused_kwargs` is a dictionary consisting of the key/value pairs whose keys are not configuration attributes: ie the part of kwargs which has not been used to update `config` and is otherwise ignored.
-
-            kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`): key/value pairs with which to update the configuration object after loading.
-                - The values in kwargs of any keys which are configuration attributes will be used to override the loaded values.
-                - Behavior concerning key/value pairs whose keys are *not* configuration attributes is controlled by the `return_unused_kwargs` keyword parameter.
+                    - A string with the `shortcut name` of a pretrained model configuration to load from cache or
+                      download, e.g., ``bert-base-uncased``.
+                    - A string with the `identifier name` of a pretrained model configuration that was user-uploaded to
+                      our S3, e.g., ``dbmdz/bert-base-german-cased``.
+                    - A path to a `directory` containing a configuration file saved using the
+                      :meth:`~transformers.PretrainedConfig.save_pretrained` method, or the
+                      :meth:`~transformers.PretrainedModel.save_pretrained` method, e.g., ``./my_model_directory/``.
+                    - A path or url to a saved configuration JSON `file`, e.g.,
+                      ``./my_model_directory/configuration.json``.
+            cache_dir (:obj:`str`, `optional`):
+                Path to a directory in which a downloaded pretrained model configuration should be cached if the
+                standard cache should not be used.
+            force_download (:obj:`bool`, `optional`, defaults to :obj:`False`):
+                Whether or not to force the (re-)download the model weights and configuration files and override the
+                cached versions if they exist.
+            resume_download (:obj:`bool`, `optional`, defaults to :obj:`False`):
+                Whether or not to delete incompletely received files. Will attempt to resume the download if such a
+                file exists.
+            proxies (:obj:`Dict[str, str]`, `optional`):
+                A dictionary of proxy servers to use by protocol or endpoint, e.g.,
+                :obj:`{'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each
+                request.
+            return_unused_kwargs (:obj:`bool`, `optional`, defaults to :obj:`False`):
+                If :obj:`False`, then this function returns just the final configuration object.
 
+                If :obj:`True`, then this functions returns a :obj:`Tuple(config, unused_kwargs)` where `unused_kwargs`
+                is a dictionary consisting of the key/value pairs whose keys are not configuration attributes: i.e.,
+                the part of ``kwargs`` which has not been used to update ``config`` and is otherwise ignored.
+            kwargs(additional keyword arguments, `optional`):
+                The values in kwargs of any keys which are configuration attributes will be used to override the loaded
+                values. Behavior concerning key/value pairs whose keys are *not* configuration attributes is
+                controlled by the ``return_unused_kwargs`` keyword parameter.
 
         Examples::
 
-            config = AutoConfig.from_pretrained('bert-base-uncased')  # Download configuration from S3 and cache.
-            config = AutoConfig.from_pretrained('./test/bert_saved_model/')  # E.g. config (or model) was saved using `save_pretrained('./test/saved_model/')`
-            config = AutoConfig.from_pretrained('./test/bert_saved_model/my_configuration.json')
-            config = AutoConfig.from_pretrained('bert-base-uncased', output_attentions=True, foo=False)
-            assert config.output_attentions == True
-            config, unused_kwargs = AutoConfig.from_pretrained('bert-base-uncased', output_attentions=True,
-                                                               foo=False, return_unused_kwargs=True)
-            assert config.output_attentions == True
-            assert unused_kwargs == {'foo': False}
+            >>> from transformers import AutoConfig
 
+            >>> # Download configuration from S3 and cache.
+            >>> config = AutoConfig.from_pretrained('bert-base-uncased')
+
+            >>> # Download configuration from S3 (user-uploaded) and cache.
+            >>> config = AutoConfig.from_pretrained('dbmdz/bert-base-german-cased')
+
+            >>> # If configuration file is in a directory (e.g., was saved using `save_pretrained('./test/saved_model/')`).
+            >>> config = AutoConfig.from_pretrained('./test/bert_saved_model/')
+
+            >>> # Load a specific configuration file.
+            >>> config = AutoConfig.from_pretrained('./test/bert_saved_model/my_configuration.json')
+
+            >>> # Change some config attributes when loading a pretrained config.
+            >>> config = AutoConfig.from_pretrained('bert-base-uncased', output_attentions=True, foo=False)
+            >>> config.output_attentions
+            True
+            >>> config, unused_kwargs = AutoConfig.from_pretrained('bert-base-uncased', output_attentions=True, foo=False, return_unused_kwargs=True)
+            >>> config.output_attentions
+            True
+            >>> config.unused_kwargs
+            {'foo': False}
         """
         config_dict, _ = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)
 
diff --git a/src/transformers/modeling_auto.py b/src/transformers/modeling_auto.py
index bc2bf8330..fd2371cc8 100644
--- a/src/transformers/modeling_auto.py
+++ b/src/transformers/modeling_auto.py
@@ -50,6 +50,7 @@ from .configuration_auto import (
 )
 from .configuration_marian import MarianConfig
 from .configuration_utils import PretrainedConfig
+from .file_utils import add_start_docstrings
 from .modeling_albert import (
     AlbertForMaskedLM,
     AlbertForMultipleChoice,
@@ -399,14 +400,97 @@ MODEL_FOR_MULTIPLE_CHOICE_MAPPING = OrderedDict(
 )
 
 
+AUTO_MODEL_PRETRAINED_DOCSTRING = r"""
+
+        The model class to instantiate is selected based on the :obj:`model_type` property of the config object
+        (either passed as an argument or loaded from :obj:`pretrained_model_name_or_path` if possible), or when it's
+        missing, by falling back to using pattern matching on :obj:`pretrained_model_name_or_path`:
+
+        List options
+
+        The model is set in evaluation mode by default using ``model.eval()`` (so for instance, dropout modules are
+        deactivated). To train the model, you should first set it back in training mode with ``model.train()``
+
+        Args:
+            pretrained_model_name_or_path:
+                Can be either:
+
+                    - A string with the `shortcut name` of a pretrained model to load from cache or download, e.g.,
+                      ``bert-base-uncased``.
+                    - A string with the `identifier name` of a pretrained model that was user-uploaded to our S3, e.g.,
+                      ``dbmdz/bert-base-german-cased``.
+                    - A path to a `directory` containing model weights saved using
+                      :func:`~transformers.PreTrainedModel.save_pretrained`, e.g., ``./my_model_directory/``.
+                    - A path or url to a `tensorflow index checkpoint file` (e.g, ``./tf_model/model.ckpt.index``). In
+                      this case, ``from_tf`` should be set to :obj:`True` and a configuration object should be provided
+                      as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in
+                      a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.
+            model_args (additional positional arguments, `optional`):
+                Will be passed along to the underlying model ``__init__()`` method.
+            config (:class:`~transformers.PretrainedConfig`, `optional`):
+                Configuration for the model to use instead of an automatically loaded configuation. Configuration can
+                be automatically loaded when:
+
+                    - The model is a model provided by the library (loaded with the `shortcut name` string of a
+                      pretrained model).
+                    - The model was saved using :meth:`~transformers.PreTrainedModel.save_pretrained` and is reloaded
+                      by suppling the save directory.
+                    - The model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a
+                      configuration JSON file named `config.json` is found in the directory.
+            state_dict (`Dict[str, torch.Tensor]`, `optional`):
+                A state dictionary to use instead of a state dictionary loaded from saved weights file.
+
+                This option can be used if you want to create a model from a pretrained configuration but load your own
+                weights. In this case though, you should check if using
+                :func:`~transformers.PreTrainedModel.save_pretrained` and
+                :func:`~transformers.PreTrainedModel.from_pretrained` is not a simpler option.
+            cache_dir (:obj:`str`, `optional`):
+                Path to a directory in which a downloaded pretrained model configuration should be cached if the
+                standard cache should not be used.
+            from_tf (:obj:`bool`, `optional`, defaults to :obj:`False`):
+                Load the model weights from a TensorFlow checkpoint save file (see docstring of
+                ``pretrained_model_name_or_path`` argument).
+            force_download (:obj:`bool`, `optional`, defaults to :obj:`False`):
+                Whether or not to force the (re-)download of the model weights and configuration files, overriding the
+                cached versions if they exist.
+            resume_download (:obj:`bool`, `optional`, defaults to :obj:`False`):
+                Whether or not to delete incompletely received files. Will attempt to resume the download if such a
+                file exists.
+            proxies (:obj:`Dict[str, str], `optional`):
+                A dictionary of proxy servers to use by protocol or endpoint, e.g.,
+                :obj:`{'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each
+                request.
+            output_loading_info(:obj:`bool`, `optional`, defaults to :obj:`False`):
+                Whether ot not to also return a dictionnary containing missing keys, unexpected keys and error
+                messages.
+            local_files_only(:obj:`bool`, `optional`, defaults to :obj:`False`):
+                Whether or not to only look at local files (e.g., not try doanloading the model).
+            use_cdn(:obj:`bool`, `optional`, defaults to :obj:`True`):
+                Whether or not to use Cloudfront (a Content Delivery Network, or CDN) when searching for the model on
+                our S3 (faster). Should be set to :obj:`False` for checkpoints larger than 20GB.
+            kwargs (additional keyword arguments, `optional`):
+                Can be used to update the configuration object (after it being loaded) and initiate the model (e.g.,
+                :obj:`output_attentions=True`). Behaves differently depending on whether a ``config`` is provided or
+                automatically loaded:
+
+                    - If a configuration is provided with ``config``, ``**kwargs`` will be directly passed to the
+                      underlying model's ``__init__`` method (we assume all relevant updates to the configuration have
+                      already been done)
+                    - If a configuration is not provided, ``kwargs`` will be first passed to the configuration class
+                      initialization function (:func:`~transformers.PretrainedConfig.from_pretrained`). Each key of
+                      ``kwargs`` that corresponds to a configuration attribute will be used to override said attribute
+                      with the supplied ``kwargs`` value. Remaining keys that do not correspond to any configuration
+                      attribute will be passed to the underlying model's ``__init__`` function.
+"""
+
+
 class AutoModel:
     r"""
-    :class:`~transformers.AutoModel` is a generic model class
-    that will be instantiated as one of the base model classes of the library
-    when created with the `AutoModel.from_pretrained(pretrained_model_name_or_path)`
-    or the `AutoModel.from_config(config)` class methods.
+    This is a generic model class that will be instantiated as one of the base model classes of the library
+    when created with the when created with the :meth:`~transformers.AutoModel.from_pretrained` class method or the
+    :meth:`~transformers.AutoModel.from_config` class methods.
 
-    This class cannot be instantiated using `__init__()` (throws an error).
+    This class cannot be instantiated directly using ``__init__()`` (throws an error).
     """
 
     def __init__(self):
@@ -419,13 +503,13 @@ class AutoModel:
     @classmethod
     @replace_list_option_in_docstrings(MODEL_MAPPING, use_model_types=False)
     def from_config(cls, config):
-        r"""Instantiates one of the base model classes of the library
-        from a configuration.
+        r"""
+        Instantiates one of the base model classes of the library from a configuration.
 
         Note:
             Loading a model from its configuration file does **not** load the model weights.
-            It only affects the model's configuration. Use :func:`~transformers.AutoModel.from_pretrained` to load
-            the model weights
+            It only affects the model's configuration. Use :meth:`~transformers.AutoModel.from_pretrained` to load
+            the model weights.
 
         Args:
             config (:class:`~transformers.PretrainedConfig`):
@@ -435,8 +519,10 @@ class AutoModel:
 
         Examples::
 
-            >>> config = BertConfig.from_pretrained('bert-base-uncased')    # Download configuration from S3 and cache.
-            >>> model = AutoModel.from_config(config)  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
+            from transformers import AutoConfig, AutoModel
+            # Download configuration from S3 and cache.
+            config = AutoConfig.from_pretrained('bert-base-uncased')
+            model = AutoModel.from_config(config)
         """
         for config_class, model_class in MODEL_MAPPING.items():
             if isinstance(config, config_class):
@@ -450,71 +536,28 @@ class AutoModel:
 
     @classmethod
     @replace_list_option_in_docstrings(MODEL_MAPPING)
+    @add_start_docstrings(
+        "Instantiate one of the base model classes of the library from a pretrained model.",
+        AUTO_MODEL_PRETRAINED_DOCSTRING,
+    )
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
-        r"""Instantiates one of the base model classes of the library
-        from a pre-trained model configuration.
-
-        The `from_pretrained()` method takes care of returning the correct model class instance
-        based on the `model_type` property of the config object, or when it's missing,
-        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
-
-        List options
-
-        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
-        To train the model, you should first set it back in training mode with `model.train()`
-
-        Args:
-            pretrained_model_name_or_path: either:
-
-                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
-                - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
-                - a path to a `directory` containing model weights saved using :func:`~transformers.PreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
-                - a path or url to a `tensorflow index checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In this case, ``from_tf`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.
-
-            model_args: (`optional`) Sequence of positional arguments:
-                All remaning positional arguments will be passed to the underlying model's ``__init__`` method
-
-            config: (`optional`) instance of a class derived from :class:`~transformers.PretrainedConfig`:
-                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:
-
-                - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
-                - the model was saved using :func:`~transformers.PreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
-                - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.
-
-            state_dict: (`optional`) dict:
-                an optional state dictionary for the model to use instead of a state dictionary loaded from saved weights file.
-                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
-                In this case though, you should check if using :func:`~transformers.PreTrainedModel.save_pretrained` and :func:`~transformers.PreTrainedModel.from_pretrained` is not a simpler option.
-
-            cache_dir: (`optional`) string:
-                Path to a directory in which a downloaded pre-trained model
-                configuration should be cached if the standard cache should not be used.
-
-            force_download: (`optional`) boolean, default False:
-                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.
-
-            resume_download: (`optional`) boolean, default False:
-                Do not delete incompletely recieved file. Attempt to resume the download if such a file exists.
-
-            proxies: (`optional`) dict, default None:
-                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
-                The proxies are used on each request.
-
-            output_loading_info: (`optional`) boolean:
-                Set to ``True`` to also return a dictionary containing missing keys, unexpected keys and error messages.
-
-            kwargs: (`optional`) Remaining dictionary of keyword arguments:
-                These arguments will be passed to the configuration and the model.
+        r"""
 
         Examples::
 
-            model = AutoModel.from_pretrained('bert-base-uncased')    # Download model and configuration from S3 and cache.
-            model = AutoModel.from_pretrained('bert-base-uncased', output_attentions=True)  # Update configuration during loading
-            assert model.config.output_attentions == True
-            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
-            config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
-            model = AutoModel.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
+            >>> from transformers import AutoConfig, AutoModel
 
+            >>> # Download model and configuration from S3 and cache.
+            >>> model = AutoModel.from_pretrained('bert-base-uncased')
+
+            >>> # Update configuration during loading
+            >>> model = AutoModel.from_pretrained('bert-base-uncased', output_attentions=True)
+            >>> model.config.output_attentions
+            True
+
+            >>> # Loading from a TF checkpoint file instead of a PyTorch model (slower)
+            >>> config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
+            >>> model = AutoModel.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
         """
         config = kwargs.pop("config", None)
         if not isinstance(config, PretrainedConfig):
@@ -535,11 +578,12 @@ class AutoModel:
 
 class AutoModelForPreTraining:
     r"""
-    :class:`~transformers.AutoModelForPreTraining` is a generic model class
-    that will be instantiated as one of the model classes of the library -with the architecture used for pretraining this model– when created with the `AutoModelForPreTraining.from_pretrained(pretrained_model_name_or_path)`
-    class method.
+    This is a generic model class that will be instantiated as one of the model classes of the library---with the
+    architecture used for pretraining this model---when created with the when created with the
+    :meth:`~transformers.AutoModelForPreTraining.from_pretrained` class method or the
+    :meth:`~transformers.AutoModelForPreTraining.from_config` class method.
 
-    This class cannot be instantiated using `__init__()` (throws an error).
+    This class cannot be instantiated directly using ``__init__()`` (throws an error).
     """
 
     def __init__(self):
@@ -552,13 +596,14 @@ class AutoModelForPreTraining:
     @classmethod
     @replace_list_option_in_docstrings(MODEL_FOR_PRETRAINING_MAPPING, use_model_types=False)
     def from_config(cls, config):
-        r"""Instantiates one of the base model classes of the library
-        from a configuration.
+        r"""
+        Instantiates one of the model classes of the library---with the architecture used for pretraining this
+        model---from a configuration.
 
         Note:
             Loading a model from its configuration file does **not** load the model weights.
-            It only affects the model's configuration. Use :func:`~transformers.AutoModel.from_pretrained` to load
-            the model weights
+            It only affects the model's configuration. Use
+            :meth:`~transformers.AutoModelForPreTraining.from_pretrained` to load the model weights.
 
         Args:
             config (:class:`~transformers.PretrainedConfig`):
@@ -568,8 +613,10 @@ class AutoModelForPreTraining:
 
         Examples::
 
-            >>> config = BertConfig.from_pretrained('bert-base-uncased')    # Download configuration from S3 and cache.
-            >>> model = AutoModelForPreTraining.from_config(config)  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
+            from transformers import AutoConfig, AutoModelForPreTraining
+            # Download configuration from S3 and cache.
+            config = AutoConfig.from_pretrained('bert-base-uncased')
+            model = AutoModelForPreTraining.from_config(config)
         """
         for config_class, model_class in MODEL_FOR_PRETRAINING_MAPPING.items():
             if isinstance(config, config_class):
@@ -583,64 +630,28 @@ class AutoModelForPreTraining:
 
     @classmethod
     @replace_list_option_in_docstrings(MODEL_FOR_PRETRAINING_MAPPING)
+    @add_start_docstrings(
+        "Instantiate one of the model classes of the library---with the architecture used for pretraining this ",
+        "model---from a pretrained model.",
+        AUTO_MODEL_PRETRAINED_DOCSTRING,
+    )
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
-        r"""Instantiates one of the model classes of the library -with the architecture used for pretraining this model– from a pre-trained model configuration.
-
-        The `from_pretrained()` method takes care of returning the correct model class instance
-        based on the `model_type` property of the config object, or when it's missing,
-        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
-
-        List options
-
-        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
-        To train the model, you should first set it back in training mode with `model.train()`
-
-        Args:
-            pretrained_model_name_or_path:
-                Either:
-
-                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
-                - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
-                - a path to a `directory` containing model weights saved using :func:`~transformers.PreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
-                - a path or url to a `tensorflow index checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In this case, ``from_tf`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.
-            model_args: (`optional`) Sequence of positional arguments:
-                All remaning positional arguments will be passed to the underlying model's ``__init__`` method
-            config: (`optional`) instance of a class derived from :class:`~transformers.PretrainedConfig`:
-                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:
-
-                - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
-                - the model was saved using :func:`~transformers.PreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
-                - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.
-
-            state_dict: (`optional`) dict:
-                an optional state dictionary for the model to use instead of a state dictionary loaded from saved weights file.
-                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
-                In this case though, you should check if using :func:`~transformers.PreTrainedModel.save_pretrained` and :func:`~transformers.PreTrainedModel.from_pretrained` is not a simpler option.
-            cache_dir: (`optional`) string:
-                Path to a directory in which a downloaded pre-trained model
-                configuration should be cached if the standard cache should not be used.
-            force_download: (`optional`) boolean, default False:
-                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.
-            resume_download: (`optional`) boolean, default False:
-                Do not delete incompletely received file. Attempt to resume the download if such a file exists.
-            proxies: (`optional`) dict, default None:
-                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
-                The proxies are used on each request.
-            output_loading_info: (`optional`) boolean:
-                Set to ``True`` to also return a dictionary containing missing keys, unexpected keys and error messages.
-            kwargs: (`optional`) Remaining dictionary of keyword arguments:
-                These arguments will be passed to the configuration and the model.
-
+        r"""
         Examples::
 
-            model = AutoModelForPreTraining.from_pretrained('bert-base-uncased')    # Download model and configuration from S3 and cache.
-            model = AutoModelForPreTraining.from_pretrained('./test/bert_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
-            model = AutoModelForPreTraining.from_pretrained('bert-base-uncased', output_attentions=True)  # Update configuration during loading
-            assert model.config.output_attentions == True
-            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
-            config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
-            model = AutoModelForPreTraining.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
+            >>> from transformers import AutoConfig, AutoModelForPreTraining
 
+            >>> # Download model and configuration from S3 and cache.
+            >>> model = AutoModelForPreTraining.from_pretrained('bert-base-uncased')
+
+            >>> # Update configuration during loading
+            >>> model = AutoModelForPreTraining.from_pretrained('bert-base-uncased', output_attentions=True)
+            >>> model.config.output_attentions
+            True
+
+            >>> # Loading from a TF checkpoint file instead of a PyTorch model (slower)
+            >>> config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
+            >>> model = AutoModelForPreTraining.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
         """
         config = kwargs.pop("config", None)
         if not isinstance(config, PretrainedConfig):
@@ -661,12 +672,19 @@ class AutoModelForPreTraining:
 
 class AutoModelWithLMHead:
     r"""
-    :class:`~transformers.AutoModelWithLMHead` is a generic model class
-    that will be instantiated as one of the language modeling model classes of the library
-    when created with the `AutoModelWithLMHead.from_pretrained(pretrained_model_name_or_path)`
-    class method.
+    This is a generic model class that will be instantiated as one of the model classes of the library---with a
+    language modeling head---when created with the when created with the
+    :meth:`~transformers.AutoModelWithLMHead.from_pretrained` class method or the
+    :meth:`~transformers.AutoModelWithLMHead.from_config` class method.
 
-    This class cannot be instantiated using `__init__()` (throws an error).
+    This class cannot be instantiated directly using ``__init__()`` (throws an error).
+
+    .. warning::
+
+        This class is deprecated and will be removed in a future version. Please use
+        :class:`~transformers.AutoModelForCausalLM` for causal language models,
+        :class:`~transformers.AutoModelForMaskedLM` for masked language models and
+        :class:`~transformers.AutoModelForSeq2SeqLM` for encoder-decoder models.
     """
 
     def __init__(self):
@@ -679,13 +697,13 @@ class AutoModelWithLMHead:
     @classmethod
     @replace_list_option_in_docstrings(MODEL_WITH_LM_HEAD_MAPPING, use_model_types=False)
     def from_config(cls, config):
-        r"""Instantiates one of the base model classes of the library
-        from a configuration.
+        r"""
+        Instantiates one of the model classes of the library---with a language modeling head---from a configuration.
 
         Note:
             Loading a model from its configuration file does **not** load the model weights.
-            It only affects the model's configuration. Use :func:`~transformers.AutoModel.from_pretrained` to load
-            the model weights
+            It only affects the model's configuration. Use :meth:`~transformers.AutoModelWithLMHead.from_pretrained`
+            to load the model weights.
 
         Args:
             config (:class:`~transformers.PretrainedConfig`):
@@ -695,11 +713,15 @@ class AutoModelWithLMHead:
 
         Examples::
 
-            config = BertConfig.from_pretrained('bert-base-uncased')    # Download configuration from S3 and cache.
-            model = AutoModelWithLMHead.from_config(config)  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
+            from transformers import AutoConfig, AutoModelWithLMHead
+            # Download configuration from S3 and cache.
+            config = AutoConfig.from_pretrained('bert-base-uncased')
+            model = AutoModelWithLMHead.from_config(config)
         """
         warnings.warn(
-            "The class `AutoModelWithLMHead` is deprecated and will be removed in a future version. Please use `AutoModelForCausalLM` for causal language models, `AutoModelForMaskedLM` for masked language models and `AutoModelForSeq2SeqLM` for encoder-decoder models.",
+            "The class `AutoModelWithLMHead` is deprecated and will be removed in a future version. Please use "
+            "`AutoModelForCausalLM` for causal language models, `AutoModelForMaskedLM` for masked language models and "
+            "`AutoModelForSeq2SeqLM` for encoder-decoder models.",
             FutureWarning,
         )
         for config_class, model_class in MODEL_WITH_LM_HEAD_MAPPING.items():
@@ -714,68 +736,33 @@ class AutoModelWithLMHead:
 
     @classmethod
     @replace_list_option_in_docstrings(MODEL_WITH_LM_HEAD_MAPPING)
+    @add_start_docstrings(
+        "Instantiate one of the model classes of the library---with a language modeling head---from a pretrained ",
+        "model.",
+        AUTO_MODEL_PRETRAINED_DOCSTRING,
+    )
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
-        r"""Instantiates one of the language modeling model classes of the library
-        from a pre-trained model configuration.
-
-        The `from_pretrained()` method takes care of returning the correct model class instance
-        based on the `model_type` property of the config object, or when it's missing,
-        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
-
-        List options
-
-        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
-        To train the model, you should first set it back in training mode with `model.train()`
-
-        Args:
-            pretrained_model_name_or_path:
-                Either:
-
-                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
-                - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
-                - a path to a `directory` containing model weights saved using :func:`~transformers.PreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
-                - a path or url to a `tensorflow index checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In this case, ``from_tf`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.
-            model_args: (`optional`) Sequence of positional arguments:
-                All remaning positional arguments will be passed to the underlying model's ``__init__`` method
-            config: (`optional`) instance of a class derived from :class:`~transformers.PretrainedConfig`:
-                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:
-
-                - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
-                - the model was saved using :func:`~transformers.PreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
-                - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.
-
-            state_dict: (`optional`) dict:
-                an optional state dictionary for the model to use instead of a state dictionary loaded from saved weights file.
-                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
-                In this case though, you should check if using :func:`~transformers.PreTrainedModel.save_pretrained` and :func:`~transformers.PreTrainedModel.from_pretrained` is not a simpler option.
-            cache_dir: (`optional`) string:
-                Path to a directory in which a downloaded pre-trained model
-                configuration should be cached if the standard cache should not be used.
-            force_download: (`optional`) boolean, default False:
-                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.
-            resume_download: (`optional`) boolean, default False:
-                Do not delete incompletely received file. Attempt to resume the download if such a file exists.
-            proxies: (`optional`) dict, default None:
-                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
-                The proxies are used on each request.
-            output_loading_info: (`optional`) boolean:
-                Set to ``True`` to also return a dictionary containing missing keys, unexpected keys and error messages.
-            kwargs: (`optional`) Remaining dictionary of keyword arguments:
-                These arguments will be passed to the configuration and the model.
-
+        r"""
         Examples::
 
-            model = AutoModelWithLMHead.from_pretrained('bert-base-uncased')    # Download model and configuration from S3 and cache.
-            model = AutoModelWithLMHead.from_pretrained('./test/bert_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
-            model = AutoModelWithLMHead.from_pretrained('bert-base-uncased', output_attentions=True)  # Update configuration during loading
-            assert model.config.output_attentions == True
-            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
-            config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
-            model = AutoModelWithLMHead.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
+            >>> from transformers import AutoConfig, AutoModelWithLMHead
 
+            >>> # Download model and configuration from S3 and cache.
+            >>> model = AutoModelWithLMHead.from_pretrained('bert-base-uncased')
+
+            >>> # Update configuration during loading
+            >>> model = AutoModelWithLMHead.from_pretrained('bert-base-uncased', output_attentions=True)
+            >>> model.config.output_attentions
+            True
+
+            >>> # Loading from a TF checkpoint file instead of a PyTorch model (slower)
+            >>> config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
+            >>> model = AutoModelWithLMHead.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
         """
         warnings.warn(
-            "The class `AutoModelWithLMHead` is deprecated and will be removed in a future version. Please use `AutoModelForCausalLM` for causal language models, `AutoModelForMaskedLM` for masked language models and `AutoModelForSeq2SeqLM` for encoder-decoder models.",
+            "The class `AutoModelWithLMHead` is deprecated and will be removed in a future version. Please use "
+            "`AutoModelForCausalLM` for causal language models, `AutoModelForMaskedLM` for masked language models and "
+            "`AutoModelForSeq2SeqLM` for encoder-decoder models.",
             FutureWarning,
         )
         config = kwargs.pop("config", None)
@@ -797,12 +784,12 @@ class AutoModelWithLMHead:
 
 class AutoModelForCausalLM:
     r"""
-    :class:`~transformers.AutoModelForCausalLM` is a generic model class
-    that will be instantiated as one of the language modeling model classes of the library
-    when created with the `AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path)`
-    class method.
+    This is a generic model class that will be instantiated as one of the model classes of the library---with a
+    causal language modeling head---when created with the when created with the
+    :meth:`~transformers.AutoModelForCausalLM.from_pretrained` class method or the
+    :meth:`~transformers.AutoModelForCausalLM.from_config` class method.
 
-    This class cannot be instantiated using `__init__()` (throws an error).
+    This class cannot be instantiated directly using ``__init__()`` (throws an error).
     """
 
     def __init__(self):
@@ -815,13 +802,14 @@ class AutoModelForCausalLM:
     @classmethod
     @replace_list_option_in_docstrings(MODEL_FOR_CAUSAL_LM_MAPPING, use_model_types=False)
     def from_config(cls, config):
-        r"""Instantiates one of the base model classes of the library
-        from a configuration.
+        r"""
+        Instantiates one of the model classes of the library---with a causal language modeling head---from a
+        configuration.
 
         Note:
             Loading a model from its configuration file does **not** load the model weights.
-            It only affects the model's configuration. Use :func:`~transformers.AutoModel.from_pretrained` to load
-            the model weights
+            It only affects the model's configuration. Use :meth:`~transformers.AutoModelForCausalLM.from_pretrained`
+            to load the model weights.
 
         Args:
             config (:class:`~transformers.PretrainedConfig`):
@@ -831,8 +819,10 @@ class AutoModelForCausalLM:
 
         Examples::
 
-            config = GPT2Config.from_pretrained('gpt2')    # Download configuration from S3 and cache.
-            model = AutoModelForCausalLM.from_config(config)  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
+            from transformers import AutoConfig, AutoModelForCausalLM
+            # Download configuration from S3 and cache.
+            config = AutoConfig.from_pretrained('gpt2')
+            model = AutoModelForCausalLM.from_config(config)
         """
         for config_class, model_class in MODEL_FOR_CAUSAL_LM_MAPPING.items():
             if isinstance(config, config_class):
@@ -846,65 +836,28 @@ class AutoModelForCausalLM:
 
     @classmethod
     @replace_list_option_in_docstrings(MODEL_FOR_CAUSAL_LM_MAPPING)
+    @add_start_docstrings(
+        "Instantiate one of the model classes of the library---with a causal language modeling head---from a "
+        "pretrained model.",
+        AUTO_MODEL_PRETRAINED_DOCSTRING,
+    )
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
-        r"""Instantiates one of the language modeling model classes of the library
-        from a pre-trained model configuration.
-
-        The `from_pretrained()` method takes care of returning the correct model class instance
-        based on the `model_type` property of the config object, or when it's missing,
-        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
-
-        List options
-
-        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
-        To train the model, you should first set it back in training mode with `model.train()`
-
-        Args:
-            pretrained_model_name_or_path:
-                Either:
-
-                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
-                - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
-                - a path to a `directory` containing model weights saved using :func:`~transformers.PreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
-                - a path or url to a `tensorflow index checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In this case, ``from_tf`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.
-            model_args: (`optional`) Sequence of positional arguments:
-                All remaning positional arguments will be passed to the underlying model's ``__init__`` method
-            config: (`optional`) instance of a class derived from :class:`~transformers.PretrainedConfig`:
-                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:
-
-                - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
-                - the model was saved using :func:`~transformers.PreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
-                - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.
-
-            state_dict: (`optional`) dict:
-                an optional state dictionary for the model to use instead of a state dictionary loaded from saved weights file.
-                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
-                In this case though, you should check if using :func:`~transformers.PreTrainedModel.save_pretrained` and :func:`~transformers.PreTrainedModel.from_pretrained` is not a simpler option.
-            cache_dir: (`optional`) string:
-                Path to a directory in which a downloaded pre-trained model
-                configuration should be cached if the standard cache should not be used.
-            force_download: (`optional`) boolean, default False:
-                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.
-            resume_download: (`optional`) boolean, default False:
-                Do not delete incompletely received file. Attempt to resume the download if such a file exists.
-            proxies: (`optional`) dict, default None:
-                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
-                The proxies are used on each request.
-            output_loading_info: (`optional`) boolean:
-                Set to ``True`` to also return a dictionary containing missing keys, unexpected keys and error messages.
-            kwargs: (`optional`) Remaining dictionary of keyword arguments:
-                These arguments will be passed to the configuration and the model.
-
+        r"""
         Examples::
 
-            model = AutoModelForCausalLM.from_pretrained('gpt2')    # Download model and configuration from S3 and cache.
-            model = AutoModelForCausalLM.from_pretrained('./test/gpt2_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
-            model = AutoModelForCausalLM.from_pretrained('bert-base-uncased', output_attentions=True)  # Update configuration during loading
-            assert model.config.output_attentions == True
-            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
-            config = AutoConfig.from_json_file('./tf_model/gpt2_tf_model_config.json')
-            model =  AutoModelForCausalLM.from_pretrained('./tf_model/gpt2_tf_checkpoint.ckpt.index', from_tf=True, config=config)
+            >>> from transformers import AutoConfig, AutoModelForCausalLM
 
+            >>> # Download model and configuration from S3 and cache.
+            >>> model = AutoModelForCausalLM.from_pretrained('gpt2')
+
+            >>> # Update configuration during loading
+            >>> model = AutoModelForCausalLM.from_pretrained('gpt2', output_attentions=True)
+            >>> model.config.output_attentions
+            True
+
+            >>> # Loading from a TF checkpoint file instead of a PyTorch model (slower)
+            >>> config = AutoConfig.from_json_file('./tf_model/gpt2_tf_model_config.json')
+            >>> model = AutoModelForCausalLM.from_pretrained('./tf_model/gpt2_tf_checkpoint.ckpt.index', from_tf=True, config=config)
         """
         config = kwargs.pop("config", None)
         if not isinstance(config, PretrainedConfig):
@@ -925,12 +878,12 @@ class AutoModelForCausalLM:
 
 class AutoModelForMaskedLM:
     r"""
-    :class:`~transformers.AutoModelForMaskedLM` is a generic model class
-    that will be instantiated as one of the language modeling model classes of the library
-    when created with the `AutoModelForMaskedLM.from_pretrained(pretrained_model_name_or_path)`
-    class method.
+    This is a generic model class that will be instantiated as one of the model classes of the library---with a
+    masked language modeling head---when created with the when created with the
+    :meth:`~transformers.AutoModelForMaskedLM.from_pretrained` class method or the
+    :meth:`~transformers.AutoModelForMasedLM.from_config` class method.
 
-    This class cannot be instantiated using `__init__()` (throws an error).
+    This class cannot be instantiated directly using ``__init__()`` (throws an error).
     """
 
     def __init__(self):
@@ -943,13 +896,14 @@ class AutoModelForMaskedLM:
     @classmethod
     @replace_list_option_in_docstrings(MODEL_FOR_MASKED_LM_MAPPING, use_model_types=False)
     def from_config(cls, config):
-        r"""Instantiates one of the base model classes of the library
-        from a configuration.
+        r"""
+        Instantiates one of the model classes of the library---with a masked language modeling head---from a
+        configuration.
 
         Note:
             Loading a model from its configuration file does **not** load the model weights.
-            It only affects the model's configuration. Use :func:`~transformers.AutoModel.from_pretrained` to load
-            the model weights
+            It only affects the model's configuration. Use :meth:`~transformers.AutoModelForMaskedLM.from_pretrained`
+            to load the model weights.
 
         Args:
             config (:class:`~transformers.PretrainedConfig`):
@@ -959,8 +913,10 @@ class AutoModelForMaskedLM:
 
         Examples::
 
-            config = BertConfig.from_pretrained('bert-base-uncased')    # Download configuration from S3 and cache.
-            model = AutoModelForMaskedLM.from_config(config)  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
+            from transformers import AutoConfig, AutoModelForMaskedLM
+            # Download configuration from S3 and cache.
+            config = AutoConfig.from_pretrained('bert-base-uncased')
+            model = AutoModelForMaskedLM.from_config(config)
         """
         for config_class, model_class in MODEL_FOR_MASKED_LM_MAPPING.items():
             if isinstance(config, config_class):
@@ -974,65 +930,28 @@ class AutoModelForMaskedLM:
 
     @classmethod
     @replace_list_option_in_docstrings(MODEL_FOR_MASKED_LM_MAPPING)
+    @add_start_docstrings(
+        "Instantiate one of the model classes of the library---with a masked language modeling head---from a "
+        "pretrained model.",
+        AUTO_MODEL_PRETRAINED_DOCSTRING,
+    )
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
-        r"""Instantiates one of the language modeling model classes of the library
-        from a pre-trained model configuration.
-
-        The `from_pretrained()` method takes care of returning the correct model class instance
-        based on the `model_type` property of the config object, or when it's missing,
-        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
-
-        List options
-
-        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
-        To train the model, you should first set it back in training mode with `model.train()`
-
-        Args:
-            pretrained_model_name_or_path:
-                Either:
-
-                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
-                - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
-                - a path to a `directory` containing model weights saved using :func:`~transformers.PreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
-                - a path or url to a `tensorflow index checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In this case, ``from_tf`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.
-            model_args: (`optional`) Sequence of positional arguments:
-                All remaning positional arguments will be passed to the underlying model's ``__init__`` method
-            config: (`optional`) instance of a class derived from :class:`~transformers.PretrainedConfig`:
-                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:
-
-                - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
-                - the model was saved using :func:`~transformers.PreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
-                - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.
-
-            state_dict: (`optional`) dict:
-                an optional state dictionary for the model to use instead of a state dictionary loaded from saved weights file.
-                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
-                In this case though, you should check if using :func:`~transformers.PreTrainedModel.save_pretrained` and :func:`~transformers.PreTrainedModel.from_pretrained` is not a simpler option.
-            cache_dir: (`optional`) string:
-                Path to a directory in which a downloaded pre-trained model
-                configuration should be cached if the standard cache should not be used.
-            force_download: (`optional`) boolean, default False:
-                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.
-            resume_download: (`optional`) boolean, default False:
-                Do not delete incompletely received file. Attempt to resume the download if such a file exists.
-            proxies: (`optional`) dict, default None:
-                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
-                The proxies are used on each request.
-            output_loading_info: (`optional`) boolean:
-                Set to ``True`` to also return a dictionary containing missing keys, unexpected keys and error messages.
-            kwargs: (`optional`) Remaining dictionary of keyword arguments:
-                These arguments will be passed to the configuration and the model.
-
+        r"""
         Examples::
 
-            model = AutoModelForMaskedLM.from_pretrained('bert')    # Download model and configuration from S3 and cache.
-            model = AutoModelForMaskedLM.from_pretrained('./test/bert_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
-            model = AutoModelForMaskedLM.from_pretrained('bert-base-uncased', output_attentions=True)  # Update configuration during loading
-            assert model.config.output_attentions == True
-            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
-            config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
-            model =  AutoModelForMaskedLM.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
+            >>> from transformers import AutoConfig, AutoModelForMaskedLM
 
+            >>> # Download model and configuration from S3 and cache.
+            >>> model = AutoModelForMaskedLM.from_pretrained('bert-base-uncased')
+
+            >>> # Update configuration during loading
+            >>> model = AutoModelForMaskedLM.from_pretrained('bert-base-uncased', output_attentions=True)
+            >>> model.config.output_attentions
+            True
+
+            >>> # Loading from a TF checkpoint file instead of a PyTorch model (slower)
+            >>> config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
+            >>> model = AutoModelForMaskedLM.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
         """
         config = kwargs.pop("config", None)
         if not isinstance(config, PretrainedConfig):
@@ -1053,12 +972,12 @@ class AutoModelForMaskedLM:
 
 class AutoModelForSeq2SeqLM:
     r"""
-    :class:`~transformers.AutoModelForSeq2SeqLM` is a generic model class
-    that will be instantiated as one of the language modeling model classes of the library
-    when created with the `AutoModelForSeq2SeqLM.from_pretrained(pretrained_model_name_or_path)`
-    class method.
+    This is a generic model class that will be instantiated as one of the model classes of the library---with a
+    sequence-to-sequence language modeling head---when created with the when created with the
+    :meth:`~transformers.AutoModelForSeq2SeqLM.from_pretrained` class method or the
+    :meth:`~transformers.AutoModelForSeq2SeqLM.from_config` class method.
 
-    This class cannot be instantiated using `__init__()` (throws an error).
+    This class cannot be instantiated directly using ``__init__()`` (throws an error).
     """
 
     def __init__(self):
@@ -1071,13 +990,14 @@ class AutoModelForSeq2SeqLM:
     @classmethod
     @replace_list_option_in_docstrings(MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING, use_model_types=False)
     def from_config(cls, config):
-        r"""Instantiates one of the base model classes of the library
-        from a configuration.
+        r"""
+        Instantiates one of the model classes of the library---with a sequence-to-sequence language modeling
+        head---from a configuration.
 
         Note:
             Loading a model from its configuration file does **not** load the model weights.
-            It only affects the model's configuration. Use :func:`~transformers.AutoModel.from_pretrained` to load
-            the model weights
+            It only affects the model's configuration. Use :meth:`~transformers.AutoModelForSeq2SeqLM.from_pretrained`
+            to load the model weights.
 
         Args:
             config (:class:`~transformers.PretrainedConfig`):
@@ -1087,8 +1007,10 @@ class AutoModelForSeq2SeqLM:
 
         Examples::
 
-            config = T5Config.from_pretrained('t5')
-            model = AutoModelForSeq2SeqLM.from_config(config)  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
+            from transformers import AutoConfig, AutoModelForSeq2SeqLM
+            # Download configuration from S3 and cache.
+            config = AutoConfig.from_pretrained('t5')
+            model = AutoModelForSeq2SeqLM.from_config(config)
         """
         for config_class, model_class in MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.items():
             if isinstance(config, config_class):
@@ -1104,65 +1026,28 @@ class AutoModelForSeq2SeqLM:
 
     @classmethod
     @replace_list_option_in_docstrings(MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING)
+    @add_start_docstrings(
+        "Instantiate one of the model classes of the library---with a sequence-to-sequence language modeling "
+        "head---from a pretrained model.",
+        AUTO_MODEL_PRETRAINED_DOCSTRING,
+    )
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
-        r"""Instantiates one of the language modeling model classes of the library
-        from a pre-trained model configuration.
-
-        The `from_pretrained()` method takes care of returning the correct model class instance
-        based on the `model_type` property of the config object, or when it's missing,
-        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
-
-        List options
-
-        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
-        To train the model, you should first set it back in training mode with `model.train()`
-
-        Args:
-            pretrained_model_name_or_path:
-                Either:
-
-                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
-                - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
-                - a path to a `directory` containing model weights saved using :func:`~transformers.PreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
-                - a path or url to a `tensorflow index checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In this case, ``from_tf`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.
-            model_args: (`optional`) Sequence of positional arguments:
-                All remaning positional arguments will be passed to the underlying model's ``__init__`` method
-            config: (`optional`) instance of a class derived from :class:`~transformers.PretrainedConfig`:
-                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:
-
-                - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
-                - the model was saved using :func:`~transformers.PreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
-                - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.
-
-            state_dict: (`optional`) dict:
-                an optional state dictionary for the model to use instead of a state dictionary loaded from saved weights file.
-                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
-                In this case though, you should check if using :func:`~transformers.PreTrainedModel.save_pretrained` and :func:`~transformers.PreTrainedModel.from_pretrained` is not a simpler option.
-            cache_dir: (`optional`) string:
-                Path to a directory in which a downloaded pre-trained model
-                configuration should be cached if the standard cache should not be used.
-            force_download: (`optional`) boolean, default False:
-                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.
-            resume_download: (`optional`) boolean, default False:
-                Do not delete incompletely received file. Attempt to resume the download if such a file exists.
-            proxies: (`optional`) dict, default None:
-                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
-                The proxies are used on each request.
-            output_loading_info: (`optional`) boolean:
-                Set to ``True`` to also return a dictionary containing missing keys, unexpected keys and error messages.
-            kwargs: (`optional`) Remaining dictionary of keyword arguments:
-                These arguments will be passed to the configuration and the model.
-
+        r"""
         Examples::
 
-            model = AutoModelForSeq2SeqLM.from_pretrained('t5-base')    # Download model and configuration from S3 and cache.
-            model = AutoModelForSeq2SeqLM.from_pretrained('./test/t5_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
-            model = AutoModelForSeq2SeqLM.from_pretrained('bert-base-uncased', output_attentions=True)  # Update configuration during loading
-            assert model.config.output_attentions == True
-            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
-            config = AutoConfig.from_json_file('./tf_model/t5_tf_model_config.json')
-            model =  AutoModelForSeq2SeqLM.from_pretrained('./tf_model/t5_tf_checkpoint.ckpt.index', from_tf=True, config=config)
+            >>> from transformers import AutoConfig, AutoModelForSeq2SeqLM
 
+            >>> # Download model and configuration from S3 and cache.
+            >>> model = AutoModelForSeq2SeqLM.from_pretrained('t5-base')
+
+            >>> # Update configuration during loading
+            >>> model = AutoModelForSeq2SeqLM.from_pretrained('t5-base', output_attentions=True)
+            >>> model.config.output_attentions
+            True
+
+            >>> # Loading from a TF checkpoint file instead of a PyTorch model (slower)
+            >>> config = AutoConfig.from_json_file('./tf_model/t5_tf_model_config.json')
+            >>> model = AutoModelForSeq2SeqLM.from_pretrained('./tf_model/t5_tf_checkpoint.ckpt.index', from_tf=True, config=config)
         """
         config = kwargs.pop("config", None)
         if not isinstance(config, PretrainedConfig):
@@ -1185,12 +1070,12 @@ class AutoModelForSeq2SeqLM:
 
 class AutoModelForSequenceClassification:
     r"""
-    :class:`~transformers.AutoModelForSequenceClassification` is a generic model class
-    that will be instantiated as one of the sequence classification model classes of the library
-    when created with the `AutoModelForSequenceClassification.from_pretrained(pretrained_model_name_or_path)`
-    class method.
+    This is a generic model class that will be instantiated as one of the model classes of the library---with a
+    sequence classification head---when created with the when created with the
+    :meth:`~transformers.AutoModelForSequenceClassification.from_pretrained` class method or the
+    :meth:`~transformers.AutoModelForSequenceClassification.from_config` class method.
 
-    This class cannot be instantiated using `__init__()` (throws an error).
+    This class cannot be instantiated directly using ``__init__()`` (throws an error).
     """
 
     def __init__(self):
@@ -1203,13 +1088,14 @@ class AutoModelForSequenceClassification:
     @classmethod
     @replace_list_option_in_docstrings(MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING, use_model_types=False)
     def from_config(cls, config):
-        r"""Instantiates one of the base model classes of the library
-        from a configuration.
+        r"""
+        Instantiates one of the model classes of the library---with a sequence classification head---from a
+        configuration.
 
         Note:
             Loading a model from its configuration file does **not** load the model weights.
-            It only affects the model's configuration. Use :func:`~transformers.AutoModel.from_pretrained` to load
-            the model weights
+            It only affects the model's configuration. Use
+            :meth:`~transformers.AutoModelForSequenceClassification.from_pretrained` to load the model weights.
 
         Args:
             config (:class:`~transformers.PretrainedConfig`):
@@ -1219,8 +1105,10 @@ class AutoModelForSequenceClassification:
 
         Examples::
 
-            config = BertConfig.from_pretrained('bert-base-uncased')    # Download configuration from S3 and cache.
-            model = AutoModelForSequenceClassification.from_config(config)  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
+            from transformers import AutoConfig, AutoModelForSequenceClassification
+            # Download configuration from S3 and cache.
+            config = AutoConfig.from_pretrained('bert-base-uncased')
+            model = AutoModelForSequenceClassification.from_config(config)
         """
         for config_class, model_class in MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.items():
             if isinstance(config, config_class):
@@ -1236,72 +1124,28 @@ class AutoModelForSequenceClassification:
 
     @classmethod
     @replace_list_option_in_docstrings(MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING)
+    @add_start_docstrings(
+        "Instantiate one of the model classes of the library---with a sequence classification head---from a "
+        "pretrained model.",
+        AUTO_MODEL_PRETRAINED_DOCSTRING,
+    )
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
-        r"""Instantiates one of the sequence classification model classes of the library
-        from a pre-trained model configuration.
-
-        The `from_pretrained()` method takes care of returning the correct model class instance
-        based on the `model_type` property of the config object, or when it's missing,
-        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
-
-        List options
-
-        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
-        To train the model, you should first set it back in training mode with `model.train()`
-
-        Args:
-            pretrained_model_name_or_path: either:
-
-                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
-                - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
-                - a path to a `directory` containing model weights saved using :func:`~transformers.PreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
-                - a path or url to a `tensorflow index checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In this case, ``from_tf`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.
-
-            model_args: (`optional`) Sequence of positional arguments:
-                All remaining positional arguments will be passed to the underlying model's ``__init__`` method
-
-            config: (`optional`) instance of a class derived from :class:`~transformers.PretrainedConfig`:
-                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:
-
-                - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
-                - the model was saved using :func:`~transformers.PreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
-                - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.
-
-            state_dict: (`optional`) dict:
-                an optional state dictionary for the model to use instead of a state dictionary loaded from saved weights file.
-                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
-                In this case though, you should check if using :func:`~transformers.PreTrainedModel.save_pretrained` and :func:`~transformers.PreTrainedModel.from_pretrained` is not a simpler option.
-
-            cache_dir: (`optional`) string:
-                Path to a directory in which a downloaded pre-trained model
-                configuration should be cached if the standard cache should not be used.
-
-            force_download: (`optional`) boolean, default False:
-                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.
-
-            resume_download: (`optional`) boolean, default False:
-                Do not delete incompletely recieved file. Attempt to resume the download if such a file exists.
-
-            proxies: (`optional`) dict, default None:
-                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
-                The proxies are used on each request.
-
-            output_loading_info: (`optional`) boolean:
-                Set to ``True`` to also return a dictionary containing missing keys, unexpected keys and error messages.
-
-            kwargs: (`optional`) Remaining dictionary of keyword arguments:
-                These arguments will be passed to the configuration and the model.
-
+        r"""
         Examples::
 
-            model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased')    # Download model and configuration from S3 and cache.
-            model = AutoModelForSequenceClassification.from_pretrained('./test/bert_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
-            model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', output_attentions=True)  # Update configuration during loading
-            assert model.config.output_attentions == True
-            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
-            config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
-            model = AutoModelForSequenceClassification.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
+            >>> from transformers import AutoConfig, AutoModelForSequenceClassification
 
+            >>> # Download model and configuration from S3 and cache.
+            >>> model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased')
+
+            >>> # Update configuration during loading
+            >>> model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', output_attentions=True)
+            >>> model.config.output_attentions
+            True
+
+            >>> # Loading from a TF checkpoint file instead of a PyTorch model (slower)
+            >>> config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
+            >>> model = AutoModelForSequenceClassification.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
         """
         config = kwargs.pop("config", None)
         if not isinstance(config, PretrainedConfig):
@@ -1324,12 +1168,12 @@ class AutoModelForSequenceClassification:
 
 class AutoModelForQuestionAnswering:
     r"""
-    :class:`~transformers.AutoModelForQuestionAnswering` is a generic model class
-    that will be instantiated as one of the question answering model classes of the library
-    when created with the `AutoModelForQuestionAnswering.from_pretrained(pretrained_model_name_or_path)`
-    class method.
+    This is a generic model class that will be instantiated as one of the model classes of the library---with a
+    question answering head---when created with the when created with the
+    :meth:`~transformers.AutoModeForQuestionAnswering.from_pretrained` class method or the
+    :meth:`~transformers.AutoModelForQuestionAnswering.from_config` class method.
 
-    This class cannot be instantiated using `__init__()` (throws an error).
+    This class cannot be instantiated directly using ``__init__()`` (throws an error).
     """
 
     def __init__(self):
@@ -1342,13 +1186,13 @@ class AutoModelForQuestionAnswering:
     @classmethod
     @replace_list_option_in_docstrings(MODEL_FOR_QUESTION_ANSWERING_MAPPING, use_model_types=False)
     def from_config(cls, config):
-        r"""Instantiates one of the base model classes of the library
-        from a configuration.
+        r"""
+        Instantiates one of the model classes of the library---with a question answering head---from a configuration.
 
         Note:
             Loading a model from its configuration file does **not** load the model weights.
-            It only affects the model's configuration. Use :func:`~transformers.AutoModel.from_pretrained` to load
-            the model weights
+            It only affects the model's configuration. Use
+            :meth:`~transformers.AutoModelForQuestionAnswering.from_pretrained` to load the model weights.
 
         Args:
             config (:class:`~transformers.PretrainedConfig`):
@@ -1358,8 +1202,10 @@ class AutoModelForQuestionAnswering:
 
         Examples::
 
-            config = BertConfig.from_pretrained('bert-base-uncased')    # Download configuration from S3 and cache.
-            model = AutoModelForQuestionAnswering.from_config(config)  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
+            from transformers import AutoConfig, AutoModelForQuestionAnswering
+            # Download configuration from S3 and cache.
+            config = AutoConfig.from_pretrained('bert-base-uncased')
+            model = AutoModelForQuestionAnswering.from_config(config)
         """
         for config_class, model_class in MODEL_FOR_QUESTION_ANSWERING_MAPPING.items():
             if isinstance(config, config_class):
@@ -1376,69 +1222,28 @@ class AutoModelForQuestionAnswering:
 
     @classmethod
     @replace_list_option_in_docstrings(MODEL_FOR_QUESTION_ANSWERING_MAPPING)
+    @add_start_docstrings(
+        "Instantiate one of the model classes of the library---with a question answering head---from a "
+        "pretrained model.",
+        AUTO_MODEL_PRETRAINED_DOCSTRING,
+    )
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
-        r"""Instantiates one of the question answering model classes of the library
-        from a pre-trained model configuration.
-
-        The `from_pretrained()` method takes care of returning the correct model class instance
-        based on the `model_type` property of the config object, or when it's missing,
-        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
-
-        List options
-
-        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
-        To train the model, you should first set it back in training mode with `model.train()`
-
-        Args:
-            pretrained_model_name_or_path: either:
-
-                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
-                - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
-                - a path to a `directory` containing model weights saved using :func:`~transformers.PreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
-                - a path or url to a `tensorflow index checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In this case, ``from_tf`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.
-
-            model_args: (`optional`) Sequence of positional arguments:
-                All remaning positional arguments will be passed to the underlying model's ``__init__`` method
-
-            config: (`optional`) instance of a class derived from :class:`~transformers.PretrainedConfig`:
-                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:
-
-                - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
-                - the model was saved using :func:`~transformers.PreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
-                - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.
-
-            state_dict: (`optional`) dict:
-                an optional state dictionary for the model to use instead of a state dictionary loaded from saved weights file.
-                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
-                In this case though, you should check if using :func:`~transformers.PreTrainedModel.save_pretrained` and :func:`~transformers.PreTrainedModel.from_pretrained` is not a simpler option.
-
-            cache_dir: (`optional`) string:
-                Path to a directory in which a downloaded pre-trained model
-                configuration should be cached if the standard cache should not be used.
-
-            force_download: (`optional`) boolean, default False:
-                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.
-
-            proxies: (`optional`) dict, default None:
-                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
-                The proxies are used on each request.
-
-            output_loading_info: (`optional`) boolean:
-                Set to ``True`` to also return a dictionary containing missing keys, unexpected keys and error messages.
-
-            kwargs: (`optional`) Remaining dictionary of keyword arguments:
-                These arguments will be passed to the configuration and the model.
-
+        r"""
         Examples::
 
-            model = AutoModelForQuestionAnswering.from_pretrained('bert-base-uncased')    # Download model and configuration from S3 and cache.
-            model = AutoModelForQuestionAnswering.from_pretrained('./test/bert_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
-            model = AutoModelForQuestionAnswering.from_pretrained('bert-base-uncased', output_attentions=True)  # Update configuration during loading
-            assert model.config.output_attentions == True
-            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
-            config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
-            model = AutoModelForQuestionAnswering.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
+            >>> from transformers import AutoConfig, AutoModelForQuestionAnswering
 
+            >>> # Download model and configuration from S3 and cache.
+            >>> model = AutoModelForQuestionAnswering.from_pretrained('bert-base-uncased')
+
+            >>> # Update configuration during loading
+            >>> model = AutoModelForQuestionAnswering.from_pretrained('bert-base-uncased', output_attentions=True)
+            >>> model.config.output_attentions
+            True
+
+            >>> # Loading from a TF checkpoint file instead of a PyTorch model (slower)
+            >>> config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
+            >>> model = AutoModelForQuestionAnswering.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
         """
         config = kwargs.pop("config", None)
         if not isinstance(config, PretrainedConfig):
@@ -1462,12 +1267,12 @@ class AutoModelForQuestionAnswering:
 
 class AutoModelForTokenClassification:
     r"""
-    :class:`~transformers.AutoModelForTokenClassification` is a generic model class
-    that will be instantiated as one of the token classification model classes of the library
-    when created with the `AutoModelForTokenClassification.from_pretrained(pretrained_model_name_or_path)`
-    class method.
+    This is a generic model class that will be instantiated as one of the model classes of the library---with a
+    token classification head---when created with the when created with the
+    :meth:`~transformers.AutoModelForTokenClassification.from_pretrained` class method or the
+    :meth:`~transformers.AutoModelForTokenClassification.from_config` class method.
 
-    This class cannot be instantiated using `__init__()` (throws an error).
+    This class cannot be instantiated directly using ``__init__()`` (throws an error).
     """
 
     def __init__(self):
@@ -1480,13 +1285,13 @@ class AutoModelForTokenClassification:
     @classmethod
     @replace_list_option_in_docstrings(MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, use_model_types=False)
     def from_config(cls, config):
-        r"""Instantiates one of the base model classes of the library
-        from a configuration.
+        r"""
+        Instantiates one of the model classes of the library---with a token classification head---from a configuration.
 
         Note:
             Loading a model from its configuration file does **not** load the model weights.
-            It only affects the model's configuration. Use :func:`~transformers.AutoModel.from_pretrained` to load
-            the model weights
+            It only affects the model's configuration. Use
+            :meth:`~transformers.AutoModelForTokenClassification.from_pretrained` to load the model weights.
 
         Args:
             config (:class:`~transformers.PretrainedConfig`):
@@ -1496,8 +1301,10 @@ class AutoModelForTokenClassification:
 
         Examples::
 
-            config = BertConfig.from_pretrained('bert-base-uncased')    # Download configuration from S3 and cache.
-            model = AutoModelForTokenClassification.from_config(config)  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
+            from transformers import AutoConfig, AutoModelForTokenClassification
+            # Download configuration from S3 and cache.
+            config = AutoConfig.from_pretrained('bert-base-uncased')
+            model = AutoModelForTokenClassification.from_config(config)
         """
         for config_class, model_class in MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.items():
             if isinstance(config, config_class):
@@ -1514,69 +1321,28 @@ class AutoModelForTokenClassification:
 
     @classmethod
     @replace_list_option_in_docstrings(MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING)
+    @add_start_docstrings(
+        "Instantiate one of the model classes of the library---with a token classification head---from a "
+        "pretrained model.",
+        AUTO_MODEL_PRETRAINED_DOCSTRING,
+    )
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
-        r"""Instantiates one of the question answering model classes of the library
-        from a pre-trained model configuration.
-
-        The `from_pretrained()` method takes care of returning the correct model class instance
-        based on the `model_type` property of the config object, or when it's missing,
-        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
-
-        List options
-
-        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
-        To train the model, you should first set it back in training mode with `model.train()`
-
-        Args:
-            pretrained_model_name_or_path:
-                Either:
-
-                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
-                - a path to a `directory` containing model weights saved using :func:`~transformers.PreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
-                - a path or url to a `tensorflow index checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In this case, ``from_tf`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.
-
-            model_args: (`optional`) Sequence of positional arguments:
-                All remaning positional arguments will be passed to the underlying model's ``__init__`` method
-
-            config: (`optional`) instance of a class derived from :class:`~transformers.PretrainedConfig`:
-                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:
-
-                - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
-                - the model was saved using :func:`~transformers.PreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
-                - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.
-
-            state_dict: (`optional`) dict:
-                an optional state dictionary for the model to use instead of a state dictionary loaded from saved weights file.
-                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
-                In this case though, you should check if using :func:`~transformers.PreTrainedModel.save_pretrained` and :func:`~transformers.PreTrainedModel.from_pretrained` is not a simpler option.
-
-            cache_dir: (`optional`) string:
-                Path to a directory in which a downloaded pre-trained model
-                configuration should be cached if the standard cache should not be used.
-
-            force_download: (`optional`) boolean, default False:
-                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.
-
-            proxies: (`optional`) dict, default None:
-                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
-                The proxies are used on each request.
-
-            output_loading_info: (`optional`) boolean:
-                Set to ``True`` to also return a dictionary containing missing keys, unexpected keys and error messages.
-
-            kwargs: (`optional`) Remaining dictionary of keyword arguments:
-                These arguments will be passed to the configuration and the model.
-
+        r"""
         Examples::
 
-            model = AutoModelForTokenClassification.from_pretrained('bert-base-uncased')    # Download model and configuration from S3 and cache.
-            model = AutoModelForTokenClassification.from_pretrained('./test/bert_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
-            model = AutoModelForTokenClassification.from_pretrained('bert-base-uncased', output_attentions=True)  # Update configuration during loading
-            assert model.config.output_attentions == True
-            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
-            config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
-            model = AutoModelForTokenClassification.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
+            >>> from transformers import AutoConfig, AutoModelForTokenClassification
 
+            >>> # Download model and configuration from S3 and cache.
+            >>> model = AutoModelForTokenClassification.from_pretrained('bert-base-uncased')
+
+            >>> # Update configuration during loading
+            >>> model = AutoModelForTokenClassification.from_pretrained('bert-base-uncased', output_attentions=True)
+            >>> model.config.output_attentions
+            True
+
+            >>> # Loading from a TF checkpoint file instead of a PyTorch model (slower)
+            >>> config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
+            >>> model = AutoModelForTokenClassification.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
         """
         config = kwargs.pop("config", None)
         if not isinstance(config, PretrainedConfig):
@@ -1600,12 +1366,12 @@ class AutoModelForTokenClassification:
 
 class AutoModelForMultipleChoice:
     r"""
-    :class:`~transformers.AutoModelForMultipleChoice` is a generic model class
-    that will be instantiated as one of the multiple choice model classes of the library
-    when created with the `AutoModelForMultipleChoice.from_pretrained(pretrained_model_name_or_path)`
-    class method.
+    This is a generic model class that will be instantiated as one of the model classes of the library---with a
+    multiple choice classifcation head---when created with the when created with the
+    :meth:`~transformers.AutoModelForMultipleChoice.from_pretrained` class method or the
+    :meth:`~transformers.AutoModelForMultipleChoice.from_config` class method.
 
-    This class cannot be instantiated using `__init__()` (throws an error).
+    This class cannot be instantiated directly using ``__init__()`` (throws an error).
     """
 
     def __init__(self):
@@ -1618,13 +1384,14 @@ class AutoModelForMultipleChoice:
     @classmethod
     @replace_list_option_in_docstrings(MODEL_FOR_MULTIPLE_CHOICE_MAPPING, use_model_types=False)
     def from_config(cls, config):
-        r"""Instantiates one of the base model classes of the library
-        from a configuration.
+        r"""
+        Instantiates one of the model classes of the library---with a multiple choice classification head---from a
+        configuration.
 
         Note:
             Loading a model from its configuration file does **not** load the model weights.
-            It only affects the model's configuration. Use :func:`~transformers.AutoModel.from_pretrained` to load
-            the model weights
+            It only affects the model's configuration. Use
+            :meth:`~transformers.AutoModelForMultipleChoice.from_pretrained` to load the model weights.
 
         Args:
             config (:class:`~transformers.PretrainedConfig`):
@@ -1634,8 +1401,10 @@ class AutoModelForMultipleChoice:
 
         Examples::
 
-            config = BertConfig.from_pretrained('bert-base-uncased')    # Download configuration from S3 and cache.
-            model = AutoModelForMultipleChoice.from_config(config)  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
+            from transformers import AutoConfig, AutoModelForMultipleChoice
+            # Download configuration from S3 and cache.
+            config = AutoConfig.from_pretrained('bert-base-uncased')
+            model = AutoModelForMultipleChoice.from_config(config)
         """
         for config_class, model_class in MODEL_FOR_MULTIPLE_CHOICE_MAPPING.items():
             if isinstance(config, config_class):
@@ -1652,69 +1421,28 @@ class AutoModelForMultipleChoice:
 
     @classmethod
     @replace_list_option_in_docstrings(MODEL_FOR_MULTIPLE_CHOICE_MAPPING)
+    @add_start_docstrings(
+        "Instantiate one of the model classes of the library---with a multiple choice classification head---from a "
+        "pretrained model.",
+        AUTO_MODEL_PRETRAINED_DOCSTRING,
+    )
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
-        r"""Instantiates one of the question answering model classes of the library
-        from a pre-trained model configuration.
-
-        The `from_pretrained()` method takes care of returning the correct model class instance
-        based on the `model_type` property of the config object, or when it's missing,
-        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
-
-        List options
-
-        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
-        To train the model, you should first set it back in training mode with `model.train()`
-
-        Args:
-            pretrained_model_name_or_path:
-                Either:
-
-                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
-                - a path to a `directory` containing model weights saved using :func:`~transformers.PreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
-                - a path or url to a `tensorflow index checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In this case, ``from_tf`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.
-
-            model_args: (`optional`) Sequence of positional arguments:
-                All remaning positional arguments will be passed to the underlying model's ``__init__`` method
-
-            config: (`optional`) instance of a class derived from :class:`~transformers.PretrainedConfig`:
-                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:
-
-                - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
-                - the model was saved using :func:`~transformers.PreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
-                - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.
-
-            state_dict: (`optional`) dict:
-                an optional state dictionary for the model to use instead of a state dictionary loaded from saved weights file.
-                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
-                In this case though, you should check if using :func:`~transformers.PreTrainedModel.save_pretrained` and :func:`~transformers.PreTrainedModel.from_pretrained` is not a simpler option.
-
-            cache_dir: (`optional`) string:
-                Path to a directory in which a downloaded pre-trained model
-                configuration should be cached if the standard cache should not be used.
-
-            force_download: (`optional`) boolean, default False:
-                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.
-
-            proxies: (`optional`) dict, default None:
-                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
-                The proxies are used on each request.
-
-            output_loading_info: (`optional`) boolean:
-                Set to ``True`` to also return a dictionary containing missing keys, unexpected keys and error messages.
-
-            kwargs: (`optional`) Remaining dictionary of keyword arguments:
-                These arguments will be passed to the configuration and the model.
-
+        r"""
         Examples::
 
-            model = AutoModelForForMultipleChoice.from_pretrained('bert-base-uncased')    # Download model and configuration from S3 and cache.
-            model = AutoModelForMultipleChoice.from_pretrained('./test/bert_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
-            model = AutoModelForMultipleChoice.from_pretrained('bert-base-uncased', output_attentions=True)  # Update configuration during loading
-            assert model.config.output_attentions == True
-            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
-            config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
-            model = AutoModelForMultipleChoice.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
+            >>> from transformers import AutoConfig, AutoModelForMultipleChoice
 
+            >>> # Download model and configuration from S3 and cache.
+            >>> model = AutoModelForMultipleChoice.from_pretrained('bert-base-uncased')
+
+            >>> # Update configuration during loading
+            >>> model = AutoModelForMultipleChoice.from_pretrained('bert-base-uncased', output_attentions=True)
+            >>> model.config.output_attentions
+            True
+
+            >>> # Loading from a TF checkpoint file instead of a PyTorch model (slower)
+            >>> config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
+            >>> model = AutoModelForMultipleChoice.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
         """
         config = kwargs.pop("config", None)
         if not isinstance(config, PretrainedConfig):
diff --git a/src/transformers/modeling_tf_auto.py b/src/transformers/modeling_tf_auto.py
index c2314c16d..e18ed2712 100644
--- a/src/transformers/modeling_tf_auto.py
+++ b/src/transformers/modeling_tf_auto.py
@@ -41,6 +41,7 @@ from .configuration_auto import (
     replace_list_option_in_docstrings,
 )
 from .configuration_utils import PretrainedConfig
+from .file_utils import add_start_docstrings
 from .modeling_tf_albert import (
     TFAlbertForMaskedLM,
     TFAlbertForMultipleChoice,
@@ -327,14 +328,98 @@ TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING = OrderedDict(
 )
 
 
+TF_AUTO_MODEL_PRETRAINED_DOCSTRING = r"""
+
+        The model class to instantiate is selected based on the :obj:`model_type` property of the config object
+        (either passed as an argument or loaded from :obj:`pretrained_model_name_or_path` if possible), or when it's
+        missing, by falling back to using pattern matching on :obj:`pretrained_model_name_or_path`:
+
+        List options
+
+        The model is set in evaluation mode by default using ``model.eval()`` (so for instance, dropout modules are
+        deactivated). To train the model, you should first set it back in training mode with ``model.train()``
+
+        Args:
+            pretrained_model_name_or_path:
+                Can be either:
+
+                    - A string with the `shortcut name` of a pretrained model to load from cache or download, e.g.,
+                      ``bert-base-uncased``.
+                    - A string with the `identifier name` of a pretrained model that was user-uploaded to our S3, e.g.,
+                      ``dbmdz/bert-base-german-cased``.
+                    - A path to a `directory` containing model weights saved using
+                      :func:`~transformers.PreTrainedModel.save_pretrained`, e.g., ``./my_model_directory/``.
+                    - A path or url to a `PyTorch state_dict save file` (e.g, ``./pt_model/pytorch_model.bin``). In
+                      this case, ``from_pt`` should be set to :obj:`True` and a configuration object should be provided
+                      as ``config`` argument. This loading path is slower than converting the PyTorch model in a
+                      TensorFlow model using the provided conversion scripts and loading the TensorFlow model
+                      afterwards.
+            model_args (additional positional arguments, `optional`):
+                Will be passed along to the underlying model ``__init__()`` method.
+            config (:class:`~transformers.PretrainedConfig`, `optional`):
+                Configuration for the model to use instead of an automatically loaded configuation. Configuration can
+                be automatically loaded when:
+
+                    - The model is a model provided by the library (loaded with the `shortcut name` string of a
+                      pretrained model).
+                    - The model was saved using :meth:`~transformers.PreTrainedModel.save_pretrained` and is reloaded
+                      by suppling the save directory.
+                    - The model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a
+                      configuration JSON file named `config.json` is found in the directory.
+            state_dict (`Dict[str, torch.Tensor]`, `optional`):
+                A state dictionary to use instead of a state dictionary loaded from saved weights file.
+
+                This option can be used if you want to create a model from a pretrained configuration but load your own
+                weights. In this case though, you should check if using
+                :func:`~transformers.PreTrainedModel.save_pretrained` and
+                :func:`~transformers.PreTrainedModel.from_pretrained` is not a simpler option.
+            cache_dir (:obj:`str`, `optional`):
+                Path to a directory in which a downloaded pretrained model configuration should be cached if the
+                standard cache should not be used.
+            from_tf (:obj:`bool`, `optional`, defaults to :obj:`False`):
+                Load the model weights from a TensorFlow checkpoint save file (see docstring of
+                ``pretrained_model_name_or_path`` argument).
+            force_download (:obj:`bool`, `optional`, defaults to :obj:`False`):
+                Whether or not to force the (re-)download of the model weights and configuration files, overriding the
+                cached versions if they exist.
+            resume_download (:obj:`bool`, `optional`, defaults to :obj:`False`):
+                Whether or not to delete incompletely received files. Will attempt to resume the download if such a
+                file exists.
+            proxies (:obj:`Dict[str, str], `optional`):
+                A dictionary of proxy servers to use by protocol or endpoint, e.g.,
+                :obj:`{'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each
+                request.
+            output_loading_info(:obj:`bool`, `optional`, defaults to :obj:`False`):
+                Whether ot not to also return a dictionnary containing missing keys, unexpected keys and error
+                messages.
+            local_files_only(:obj:`bool`, `optional`, defaults to :obj:`False`):
+                Whether or not to only look at local files (e.g., not try doanloading the model).
+            use_cdn(:obj:`bool`, `optional`, defaults to :obj:`True`):
+                Whether or not to use Cloudfront (a Content Delivery Network, or CDN) when searching for the model on
+                our S3 (faster). Should be set to :obj:`False` for checkpoints larger than 20GB.
+            kwargs (additional keyword arguments, `optional`):
+                Can be used to update the configuration object (after it being loaded) and initiate the model (e.g.,
+                :obj:`output_attentions=True`). Behaves differently depending on whether a ``config`` is provided or
+                automatically loaded:
+
+                    - If a configuration is provided with ``config``, ``**kwargs`` will be directly passed to the
+                      underlying model's ``__init__`` method (we assume all relevant updates to the configuration have
+                      already been done)
+                    - If a configuration is not provided, ``kwargs`` will be first passed to the configuration class
+                      initialization function (:func:`~transformers.PretrainedConfig.from_pretrained`). Each key of
+                      ``kwargs`` that corresponds to a configuration attribute will be used to override said attribute
+                      with the supplied ``kwargs`` value. Remaining keys that do not correspond to any configuration
+                      attribute will be passed to the underlying model's ``__init__`` function.
+"""
+
+
 class TFAutoModel(object):
     r"""
-    :class:`~transformers.TFAutoModel` is a generic model class
-    that will be instantiated as one of the base model classes of the library
-    when created with the `TFAutoModel.from_pretrained(pretrained_model_name_or_path)`
-    class method.
+    This is a generic model class that will be instantiated as one of the base model classes of the library
+    when created with the when created with the :meth:`~transformers.TFAutoModel.from_pretrained` class method or the
+    :meth:`~transformers.TFAutoModel.from_config` class methods.
 
-    This class cannot be instantiated using `__init__()` (throws an error).
+    This class cannot be instantiated directly using ``__init__()`` (throws an error).
     """
 
     def __init__(self):
@@ -347,24 +432,26 @@ class TFAutoModel(object):
     @classmethod
     @replace_list_option_in_docstrings(TF_MODEL_MAPPING, use_model_types=False)
     def from_config(cls, config):
-        r"""Instantiates one of the base model classes of the library
-        from a configuration.
+        r"""
+        Instantiates one of the base model classes of the library from a configuration.
 
         Note:
             Loading a model from its configuration file does **not** load the model weights.
-            It only affects the model's configuration. Use :func:`~transformers.TFAutoModel.from_pretrained` to load
-            the model weights
+            It only affects the model's configuration. Use :meth:`~transformers.TFAutoModel.from_pretrained` to load
+            the model weights.
 
         Args:
-            config: (`optional`) instance of a class derived from :class:`~transformers.TFPretrainedConfig`:
+            config (:class:`~transformers.PretrainedConfig`):
                 The model class to instantiate is selected based on the configuration class:
 
                 List options
 
         Examples::
 
-            config = BertConfig.from_pretrained('bert-base-uncased')    # Download configuration from S3 and cache.
-            model = TFAutoModel.from_config(config)  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
+            from transformers import AutoConfig, TFAutoModel
+            # Download configuration from S3 and cache.
+            config = TFAutoConfig.from_pretrained('bert-base-uncased')
+            model = TFAutoModel.from_config(config)
         """
         for config_class, model_class in TF_MODEL_MAPPING.items():
             if isinstance(config, config_class):
@@ -378,75 +465,28 @@ class TFAutoModel(object):
 
     @classmethod
     @replace_list_option_in_docstrings(TF_MODEL_MAPPING)
+    @add_start_docstrings(
+        "Instantiate one of the base model classes of the library from a pretrained model.",
+        TF_AUTO_MODEL_PRETRAINED_DOCSTRING,
+    )
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
-        r"""Instantiates one of the base model classes of the library
-        from a pre-trained model configuration.
-
-        The `from_pretrained()` method takes care of returning the correct model class instance
-        based on the `model_type` property of the config object, or when it's missing,
-        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
-
-        List options
-
-        Params:
-            pretrained_model_name_or_path: either:
-
-                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
-                - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
-                - a path to a `directory` containing model weights saved using :func:`~transformers.TFPreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
-                - a path or url to a `PyTorch, TF 1.X or TF 2.0 checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In the case of a PyTorch checkpoint, ``from_pt`` should be set to True and a configuration object should be provided as ``config`` argument.
-
-            from_pt: (`Optional`) Boolean
-                Set to True if the Checkpoint is a PyTorch checkpoint.
-
-            model_args: (`optional`) Sequence of positional arguments:
-                All remaning positional arguments will be passed to the underlying model's ``__init__`` method
-
-            config: (`optional`) instance of a class derived from :class:`~transformers.TFPretrainedConfig`:
-                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:
-
-                - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
-                - the model was saved using :func:`~transformers.TFPreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
-                - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.
-
-            state_dict: (`optional`) dict:
-                an optional state dictionnary for the model to use instead of a state dictionary loaded from saved weights file.
-                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
-                In this case though, you should check if using :func:`~transformers.TFPreTrainedModel.save_pretrained` and :func:`~transformers.TFPreTrainedModel.from_pretrained` is not a simpler option.
-
-            cache_dir: (`optional`) string:
-                Path to a directory in which a downloaded pre-trained model
-                configuration should be cached if the standard cache should not be used.
-
-            force_download: (`optional`) boolean, default False:
-                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.
-
-            resume_download: (`optional`) boolean, default False:
-                Do not delete incompletely recieved file. Attempt to resume the download if such a file exists.
-
-            proxies: (`optional`) dict, default None:
-                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
-                The proxies are used on each request.
-
-            output_loading_info: (`optional`) boolean:
-                Set to ``True`` to also return a dictionnary containing missing keys, unexpected keys and error messages.
-
-            kwargs: (`optional`) Remaining dictionary of keyword arguments:
-                Can be used to update the configuration object (after it being loaded) and initiate the model. (e.g. ``output_attentions=True``). Behave differently depending on whether a `config` is provided or automatically loaded:
-
-                - If a configuration is provided with ``config``, ``**kwargs`` will be directly passed to the underlying model's ``__init__`` method (we assume all relevant updates to the configuration have already been done)
-                - If a configuration is not provided, ``kwargs`` will be first passed to the configuration class initialization function (:func:`~transformers.TFPretrainedConfig.from_pretrained`). Each key of ``kwargs`` that corresponds to a configuration attribute will be used to override said attribute with the supplied ``kwargs`` value. Remaining keys that do not correspond to any configuration attribute will be passed to the underlying model's ``__init__`` function.
+        r"""
 
         Examples::
 
-            model = TFAutoModel.from_pretrained('bert-base-uncased')    # Download model and configuration from S3 and cache.
-            model = TFAutoModel.from_pretrained('./test/bert_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
-            model = TFAutoModel.from_pretrained('bert-base-uncased', output_attentions=True)  # Update configuration during loading
-            assert model.config.output_attentions == True
-            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
-            config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
-            model = TFAutoModel.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config)
+            >>> from transformers import AutoConfig, AutoModel
 
+            >>> # Download model and configuration from S3 and cache.
+            >>> model = TFAutoModel.from_pretrained('bert-base-uncased')
+
+            >>> # Update configuration during loading
+            >>> model = TFAutoModel.from_pretrained('bert-base-uncased', output_attentions=True)
+            >>> model.config.output_attentions
+            True
+
+            >>> # Loading from a PyTorch checkpoint file instead of a TensorFlow model (slower)
+            >>> config = AutoConfig.from_json_file('./pt_model/bert_pt_model_config.json')
+            >>> model = TFAutoModel.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config)
         """
         config = kwargs.pop("config", None)
         if not isinstance(config, PretrainedConfig):
@@ -467,11 +507,12 @@ class TFAutoModel(object):
 
 class TFAutoModelForPreTraining(object):
     r"""
-    :class:`~transformers.TFAutoModelForPreTraining` is a generic model class
-    that will be instantiated as one of the model classes of the library -with the architecture used for pretraining this model– when created with the `TFAutoModelForPreTraining.from_pretrained(pretrained_model_name_or_path)`
-    class method.
+    This is a generic model class that will be instantiated as one of the model classes of the library---with the
+    architecture used for pretraining this model---when created with the when created with the
+    :meth:`~transformers.TFAutoModelForPreTraining.from_pretrained` class method or the
+    :meth:`~transformers.TFAutoModelForPreTraining.from_config` class method.
 
-    This class cannot be instantiated using `__init__()` (throws an error).
+    This class cannot be instantiated directly using ``__init__()`` (throws an error).
     """
 
     def __init__(self):
@@ -484,24 +525,27 @@ class TFAutoModelForPreTraining(object):
     @classmethod
     @replace_list_option_in_docstrings(TF_MODEL_FOR_PRETRAINING_MAPPING, use_model_types=False)
     def from_config(cls, config):
-        r"""Instantiates one of the base model classes of the library
-        from a configuration.
+        r"""
+        Instantiates one of the model classes of the library---with the architecture used for pretraining this
+        model---from a configuration.
 
         Note:
             Loading a model from its configuration file does **not** load the model weights.
-            It only affects the model's configuration. Use :func:`~transformers.TFAutoModel.from_pretrained` to load
-            the model weights
+            It only affects the model's configuration. Use
+            :meth:`~transformers.TFAutoModelForPreTraining.from_pretrained` to load the model weights.
 
         Args:
-            config (:class:`~transformers.TFPretrainedConfig`):
+            config (:class:`~transformers.PretrainedConfig`):
                 The model class to instantiate is selected based on the configuration class:
 
                 List options
 
         Examples::
 
-            config = BertConfig.from_pretrained('bert-base-uncased')    # Download configuration from S3 and cache.
-            model = TFAutoModelForPreTraining.from_config(config)  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
+            from transformers import AutoConfig, TFAutoModelForPreTraining
+            # Download configuration from S3 and cache.
+            config = AutoConfig.from_pretrained('bert-base-uncased')
+            model = TFAutoModelForPreTraining.from_config(config)
         """
         for config_class, model_class in TF_MODEL_FOR_PRETRAINING_MAPPING.items():
             if isinstance(config, config_class):
@@ -515,75 +559,28 @@ class TFAutoModelForPreTraining(object):
 
     @classmethod
     @replace_list_option_in_docstrings(TF_MODEL_FOR_PRETRAINING_MAPPING)
+    @add_start_docstrings(
+        "Instantiate one of the model classes of the library---with the architecture used for pretraining this ",
+        "model---from a pretrained model.",
+        TF_AUTO_MODEL_PRETRAINED_DOCSTRING,
+    )
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
-        r"""Instantiates one of the model classes of the library -with the architecture used for pretraining this model– from a pre-trained model configuration.
-
-        The `from_pretrained()` method takes care of returning the correct model class instance
-        based on the `model_type` property of the config object, or when it's missing,
-        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
-
-        List options
-
-        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
-        To train the model, you should first set it back in training mode with `model.train()`
-
-        Args:
-            pretrained_model_name_or_path:
-                Either:
-
-                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
-                - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
-                - a path to a `directory` containing model weights saved using :func:`~transformers.TFPreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
-                - a path or url to a `tensorflow index checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In this case, ``from_tf`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.
-            model_args: (`optional`) Sequence of positional arguments:
-                All remaning positional arguments will be passed to the underlying model's ``__init__`` method
-            config: (`optional`) instance of a class derived from :class:`~transformers.TFPretrainedConfig`:
-                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:
-
-                - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
-                - the model was saved using :func:`~transformers.TFPreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
-                - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.
-
-            state_dict: (`optional`) dict:
-                an optional state dictionnary for the model to use instead of a state dictionary loaded from saved weights file.
-                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
-                In this case though, you should check if using :func:`~transformers.TFPreTrainedModel.save_pretrained` and :func:`~transformers.TFPreTrainedModel.from_pretrained` is not a simpler option.
-            cache_dir: (`optional`) string:
-                Path to a directory in which a downloaded pre-trained model
-                configuration should be cached if the standard cache should not be used.
-            force_download: (`optional`) boolean, default False:
-                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.
-            resume_download: (`optional`) boolean, default False:
-                Do not delete incompletely received file. Attempt to resume the download if such a file exists.
-            proxies: (`optional`) dict, default None:
-                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
-                The proxies are used on each request.
-            output_loading_info: (`optional`) boolean:
-                Set to ``True`` to also return a dictionnary containing missing keys, unexpected keys and error messages.
-            kwargs: (`optional`) Remaining dictionary of keyword arguments:
-                Can be used to update the configuration object (after it being loaded) and initiate the model.
-                (e.g. ``output_attentions=True``). Behave differently depending on whether a `config` is provided or
-                automatically loaded:
-
-                - If a configuration is provided with ``config``, ``**kwargs`` will be directly passed to the
-                  underlying model's ``__init__`` method (we assume all relevant updates to the configuration have
-                  already been done)
-                - If a configuration is not provided, ``kwargs`` will be first passed to the configuration class
-                  initialization function (:func:`~transformers.TFPretrainedConfig.from_pretrained`). Each key of
-                  ``kwargs`` that corresponds to a configuration attribute will be used to override said attribute
-                  with the supplied ``kwargs`` value. Remaining keys that do not correspond to any configuration
-                  attribute will be passed to the underlying model's ``__init__`` function.
-
+        r"""
         Examples::
 
-            model = TFAutoModelForPreTraining.from_pretrained('bert-base-uncased')    # Download model and configuration from S3 and cache.
-            model = TFAutoModelForPreTraining.from_pretrained('./test/bert_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
-            model = TFAutoModelForPreTraining.from_pretrained('bert-base-uncased', output_attentions=True)  # Update configuration during loading
-            assert model.config.output_attentions == True
-            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
-            config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
-            model = TFAutoModelForPreTraining.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
+            >>> from transformers import AutoConfig, TFAutoModelForPreTraining
 
+            >>> # Download model and configuration from S3 and cache.
+            >>> model = TFAutoModelForPreTraining.from_pretrained('bert-base-uncased')
+
+            >>> # Update configuration during loading
+            >>> model = TFAutoModelForPreTraining.from_pretrained('bert-base-uncased', output_attentions=True)
+            >>> model.config.output_attentions
+            True
+
+            >>> # Loading from a PyTorch checkpoint file instead of a TensorFlow model (slower)
+            >>> config = AutoConfig.from_json_file('./pt_model/bert_pt_model_config.json')
+            >>> model = TFAutoModelForPreTraining.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config)
         """
         config = kwargs.pop("config", None)
         if not isinstance(config, PretrainedConfig):
@@ -604,12 +601,19 @@ class TFAutoModelForPreTraining(object):
 
 class TFAutoModelWithLMHead(object):
     r"""
-    :class:`~transformers.TFAutoModelWithLMHead` is a generic model class
-    that will be instantiated as one of the language modeling model classes of the library
-    when created with the `TFAutoModelWithLMHead.from_pretrained(pretrained_model_name_or_path)`
-    class method.
+    This is a generic model class that will be instantiated as one of the model classes of the library---with a
+    language modeling head---when created with the when created with the
+    :meth:`~transformers.TFAutoModelWithLMHead.from_pretrained` class method or the
+    :meth:`~transformers.TFAutoModelWithLMHead.from_config` class method.
 
-    This class cannot be instantiated using `__init__()` (throws an error).
+    This class cannot be instantiated directly using ``__init__()`` (throws an error).
+
+    .. warning::
+
+        This class is deprecated and will be removed in a future version. Please use
+        :class:`~transformers.TFAutoModelForCausalLM` for causal language models,
+        :class:`~transformers.TFAutoModelForMaskedLM` for masked language models and
+        :class:`~transformers.TFAutoModelForSeq2SeqLM` for encoder-decoder models.
     """
 
     def __init__(self):
@@ -622,27 +626,31 @@ class TFAutoModelWithLMHead(object):
     @classmethod
     @replace_list_option_in_docstrings(TF_MODEL_WITH_LM_HEAD_MAPPING, use_model_types=False)
     def from_config(cls, config):
-        r"""Instantiates one of the base model classes of the library
-        from a configuration.
+        r"""
+        Instantiates one of the model classes of the library---with a language modeling head---from a configuration.
 
         Note:
             Loading a model from its configuration file does **not** load the model weights.
-            It only affects the model's configuration. Use :func:`~transformers.TFAutoModel.from_pretrained` to load
-            the model weights
+            It only affects the model's configuration. Use :meth:`~transformers.TFAutoModelWithLMHead.from_pretrained`
+            to load the model weights.
 
         Args:
-            config: (`optional`) instance of a class derived from :class:`~transformers.TFPretrainedConfig`:
+            config (:class:`~transformers.PretrainedConfig`):
                 The model class to instantiate is selected based on the configuration class:
 
                 List options
 
         Examples::
 
-            config = BertConfig.from_pretrained('bert-base-uncased')    # Download configuration from S3 and cache.
-            model = TFAutoModelWithLMHead.from_config(config)  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
+            from transformers import AutoConfig, TFAutoModelWithLMHead
+            # Download configuration from S3 and cache.
+            config = AutoConfig.from_pretrained('bert-base-uncased')
+            model = TFAutoModelWithLMHead.from_config(config)
         """
         warnings.warn(
-            "The class `TFAutoModelWithLMHead` is deprecated and will be removed in a future version. Please use `TFAutoModelForCausalLM` for causal language models, `TFAutoModelForMaskedLM` for masked language models and `TFAutoModelForSeq2SeqLM` for encoder-decoder models.",
+            "The class `TFAutoModelWithLMHead` is deprecated and will be removed in a future version. Please use "
+            "`TFAutoModelForCausalLM` for causal language models, `TFAutoModelForMaskedLM` for masked language models "
+            "and `TFAutoModelForSeq2SeqLM` for encoder-decoder models.",
             FutureWarning,
         )
         for config_class, model_class in TF_MODEL_WITH_LM_HEAD_MAPPING.items():
@@ -657,78 +665,33 @@ class TFAutoModelWithLMHead(object):
 
     @classmethod
     @replace_list_option_in_docstrings(TF_MODEL_WITH_LM_HEAD_MAPPING)
+    @add_start_docstrings(
+        "Instantiate one of the model classes of the library---with a language modeling head---from a pretrained ",
+        "model.",
+        TF_AUTO_MODEL_PRETRAINED_DOCSTRING,
+    )
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
-        r"""Instantiates one of the language modeling model classes of the library
-        from a pre-trained model configuration.
-
-        The `from_pretrained()` method takes care of returning the correct model class instance
-        based on the `model_type` property of the config object, or when it's missing,
-        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
-
-        List options
-
-        Params:
-            pretrained_model_name_or_path: either:
-
-                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
-                - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
-                - a path to a `directory` containing model weights saved using :func:`~transformers.TFPreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
-                - a path or url to a `PyTorch, TF 1.X or TF 2.0 checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In the case of a PyTorch checkpoint, ``from_pt`` should be set to True and a configuration object should be provided as ``config`` argument.
-
-            from_pt: (`Optional`) Boolean
-                Set to True if the Checkpoint is a PyTorch checkpoint.
-
-            model_args: (`optional`) Sequence of positional arguments:
-                All remaning positional arguments will be passed to the underlying model's ``__init__`` method
-
-            config: (`optional`) instance of a class derived from :class:`~transformers.TFPretrainedConfig`:
-                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:
-
-                - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
-                - the model was saved using :func:`~transformers.TFPreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
-                - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.
-
-            state_dict: (`optional`) dict:
-                an optional state dictionnary for the model to use instead of a state dictionary loaded from saved weights file.
-                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
-                In this case though, you should check if using :func:`~transformers.TFPreTrainedModel.save_pretrained` and :func:`~transformers.TFPreTrainedModel.from_pretrained` is not a simpler option.
-
-            cache_dir: (`optional`) string:
-                Path to a directory in which a downloaded pre-trained model
-                configuration should be cached if the standard cache should not be used.
-
-            force_download: (`optional`) boolean, default False:
-                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.
-
-            resume_download: (`optional`) boolean, default False:
-                Do not delete incompletely recieved file. Attempt to resume the download if such a file exists.
-
-            proxies: (`optional`) dict, default None:
-                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
-                The proxies are used on each request.
-
-            output_loading_info: (`optional`) boolean:
-                Set to ``True`` to also return a dictionnary containing missing keys, unexpected keys and error messages.
-
-            kwargs: (`optional`) Remaining dictionary of keyword arguments:
-                Can be used to update the configuration object (after it being loaded) and initiate the model. (e.g. ``output_attentions=True``). Behave differently depending on whether a `config` is provided or automatically loaded:
-
-                - If a configuration is provided with ``config``, ``**kwargs`` will be directly passed to the underlying model's ``__init__`` method (we assume all relevant updates to the configuration have already been done)
-                - If a configuration is not provided, ``kwargs`` will be first passed to the configuration class initialization function (:func:`~transformers.TFPretrainedConfig.from_pretrained`). Each key of ``kwargs`` that corresponds to a configuration attribute will be used to override said attribute with the supplied ``kwargs`` value. Remaining keys that do not correspond to any configuration attribute will be passed to the underlying model's ``__init__`` function.
-
+        r"""
         Examples::
 
-            model = TFAutoModelWithLMHead.from_pretrained('bert-base-uncased')    # Download model and configuration from S3 and cache.
-            model = TFAutoModelWithLMHead.from_pretrained('./test/bert_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
-            model = TFAutoModelWithLMHead.from_pretrained('bert-base-uncased', output_attentions=True)  # Update configuration during loading
-            assert model.config.output_attentions == True
-            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
-            config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
-            model = TFAutoModelWithLMHead.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config)
+            >>> from transformers import AutoConfig, TFAutoModelWithLMHead
 
+            >>> # Download model and configuration from S3 and cache.
+            >>> model = TFAutoModelWithLMHead.from_pretrained('bert-base-uncased')
+
+            >>> # Update configuration during loading
+            >>> model = TFAutoModelWithLMHead.from_pretrained('bert-base-uncased', output_attentions=True)
+            >>> model.config.output_attentions
+            True
+
+            >>> # Loading from a PyTorch checkpoint file instead of a TensorFlow model (slower)
+            >>> config = AutoConfig.from_json_file('./pt_model/bert_pt_model_config.json')
+            >>> model = TFAutoModelWithLMHead.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config)
         """
         warnings.warn(
-            "The class `TFAutoModelWithLMHead` is deprecated and will be removed in a future version. Please use `TFAutoModelForCausalLM` for causal language models, `TFAutoModelForMaskedLM` for masked language models and `TFAutoModelForSeq2SeqLM` for encoder-decoder models.",
+            "The class `TFAutoModelWithLMHead` is deprecated and will be removed in a future version. Please use "
+            "`TFAutoModelForCausalLM` for causal language models, `TFAutoModelForMaskedLM` for masked language models "
+            "and `TFAutoModelForSeq2SeqLM` for encoder-decoder models.",
             FutureWarning,
         )
         config = kwargs.pop("config", None)
@@ -750,159 +713,14 @@ class TFAutoModelWithLMHead(object):
         )
 
 
-class TFAutoModelForMultipleChoice:
-    r"""
-    :class:`~transformers.TFAutoModelForMultipleChoice` is a generic model class
-    that will be instantiated as one of the multiple choice model classes of the library
-    when created with the `TFAutoModelForMultipleChoice.from_pretrained(pretrained_model_name_or_path)`
-    class method.
-
-    This class cannot be instantiated using `__init__()` (throws an error).
-    """
-
-    def __init__(self):
-        raise EnvironmentError(
-            "TFAutoModelForMultipleChoice is designed to be instantiated "
-            "using the `TFAutoModelForMultipleChoice.from_pretrained(pretrained_model_name_or_path)` or "
-            "`TFAutoModelForMultipleChoice.from_config(config)` methods."
-        )
-
-    @classmethod
-    @replace_list_option_in_docstrings(TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING, use_model_types=False)
-    def from_config(cls, config):
-        r"""Instantiates one of the base model classes of the library
-        from a configuration.
-
-        Note:
-            Loading a model from its configuration file does **not** load the model weights.
-            It only affects the model's configuration. Use :func:`~transformers.TFAutoModel.from_pretrained` to load
-            the model weights
-
-        Args:
-            config: (`optional`) instance of a class derived from :class:`~transformers.TFPretrainedConfig`:
-                The model class to instantiate is selected based on the configuration class:
-
-                List options
-
-        Examples::
-
-            config = BertConfig.from_pretrained('bert-base-uncased')    # Download configuration from S3 and cache.
-            model = TFAutoModelForMulitpleChoice.from_config(config)  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
-        """
-        for config_class, model_class in TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING.items():
-            if isinstance(config, config_class):
-                return model_class(config)
-        raise ValueError(
-            "Unrecognized configuration class {} for this kind of TFAutoModel: {}.\n"
-            "Model type should be one of {}.".format(
-                config.__class__,
-                cls.__name__,
-                ", ".join(c.__name__ for c in TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING.keys()),
-            )
-        )
-
-    @classmethod
-    @replace_list_option_in_docstrings(TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING)
-    def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
-        r"""Instantiates one of the multiple choice model classes of the library
-        from a pre-trained model configuration.
-
-        The `from_pretrained()` method takes care of returning the correct model class instance
-        based on the `model_type` property of the config object, or when it's missing,
-        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
-
-        List options
-
-        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
-        To train the model, you should first set it back in training mode with `model.train()`
-
-        Params:
-            pretrained_model_name_or_path: either:
-
-                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
-                - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
-                - a path to a `directory` containing model weights saved using :func:`~transformers.TFPreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
-                - a path or url to a `PyTorch, TF 1.X or TF 2.0 checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In the case of a PyTorch checkpoint, ``from_pt`` should be set to True and a configuration object should be provided as ``config`` argument.
-
-            from_pt: (`Optional`) Boolean
-                Set to True if the Checkpoint is a PyTorch checkpoint.
-
-            model_args: (`optional`) Sequence of positional arguments:
-                All remaning positional arguments will be passed to the underlying model's ``__init__`` method
-
-            config: (`optional`) instance of a class derived from :class:`~transformers.TFPretrainedConfig`:
-                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:
-
-                - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
-                - the model was saved using :func:`~transformers.TFPreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
-                - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.
-
-            state_dict: (`optional`) dict:
-                an optional state dictionnary for the model to use instead of a state dictionary loaded from saved weights file.
-                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
-                In this case though, you should check if using :func:`~transformers.TFPreTrainedModel.save_pretrained` and :func:`~transformers.TFPreTrainedModel.from_pretrained` is not a simpler option.
-
-            cache_dir: (`optional`) string:
-                Path to a directory in which a downloaded pre-trained model
-                configuration should be cached if the standard cache should not be used.
-
-            force_download: (`optional`) boolean, default False:
-                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.
-
-            resume_download: (`optional`) boolean, default False:
-                Do not delete incompletely recieved file. Attempt to resume the download if such a file exists.
-
-            proxies: (`optional`) dict, default None:
-                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
-                The proxies are used on each request.
-
-            output_loading_info: (`optional`) boolean:
-                Set to ``True`` to also return a dictionnary containing missing keys, unexpected keys and error messages.
-
-            kwargs: (`optional`) Remaining dictionary of keyword arguments:
-                Can be used to update the configuration object (after it being loaded) and initiate the model. (e.g. ``output_attentions=True``). Behave differently depending on whether a `config` is provided or automatically loaded:
-
-                - If a configuration is provided with ``config``, ``**kwargs`` will be directly passed to the underlying model's ``__init__`` method (we assume all relevant updates to the configuration have already been done)
-                - If a configuration is not provided, ``kwargs`` will be first passed to the configuration class initialization function (:func:`~transformers.TFPretrainedConfig.from_pretrained`). Each key of ``kwargs`` that corresponds to a configuration attribute will be used to override said attribute with the supplied ``kwargs`` value. Remaining keys that do not correspond to any configuration attribute will be passed to the underlying model's ``__init__`` function.
-
-        Examples::
-
-            model = TFAutoModelFormultipleChoice.from_pretrained('bert-base-uncased')    # Download model and configuration from S3 and cache.
-            model = TFAutoModelFormultipleChoice.from_pretrained('./test/bert_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
-            model = TFAutoModelFormultipleChoice.from_pretrained('bert-base-uncased', output_attentions=True)  # Update configuration during loading
-            assert model.config.output_attentions == True
-            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
-            config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
-            model = TFAutoModelFormultipleChoice.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config)
-
-        """
-        config = kwargs.pop("config", None)
-        if not isinstance(config, PretrainedConfig):
-            config, kwargs = AutoConfig.from_pretrained(
-                pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
-            )
-
-        for config_class, model_class in TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING.items():
-            if isinstance(config, config_class):
-                return model_class.from_pretrained(pretrained_model_name_or_path, *model_args, config=config, **kwargs)
-        raise ValueError(
-            "Unrecognized configuration class {} for this kind of TFAutoModel: {}.\n"
-            "Model type should be one of {}.".format(
-                config.__class__,
-                cls.__name__,
-                ", ".join(c.__name__ for c in TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING.keys()),
-            )
-        )
-
-
 class TFAutoModelForCausalLM:
     r"""
-    :class:`~transformers.TFAutoModelForCausalLM` is a generic model class
-    that will be instantiated as one of the language modeling model classes of the library
-    when created with the `TFAutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path)`
-    class method.
+    This is a generic model class that will be instantiated as one of the model classes of the library---with a
+    causal language modeling head---when created with the when created with the
+    :meth:`~transformers.TFAutoModelForCausalLM.from_pretrained` class method or the
+    :meth:`~transformers.TFAutoModelForCausalLM.from_config` class method.
 
-    This class cannot be instantiated using `__init__()` (throws an error).
+    This class cannot be instantiated directly using ``__init__()`` (throws an error).
     """
 
     def __init__(self):
@@ -915,24 +733,27 @@ class TFAutoModelForCausalLM:
     @classmethod
     @replace_list_option_in_docstrings(TF_MODEL_FOR_CAUSAL_LM_MAPPING, use_model_types=False)
     def from_config(cls, config):
-        r"""Instantiates one of the base model classes of the library
-        from a configuration.
+        r"""
+        Instantiates one of the model classes of the library---with a causal language modeling head---from a
+        configuration.
 
         Note:
             Loading a model from its configuration file does **not** load the model weights.
-            It only affects the model's configuration. Use :func:`~transformers.TFAutoModel.from_pretrained` to load
-            the model weights
+            It only affects the model's configuration. Use :meth:`~transformers.TFAutoModelForCausalLM.from_pretrained`
+            to load the model weights.
 
         Args:
-            config (:class:`~transformers.TFPretrainedConfig`):
+            config (:class:`~transformers.PretrainedConfig`):
                 The model class to instantiate is selected based on the configuration class:
 
                 List options
 
         Examples::
 
-            config = GPT2Config.from_pretrained('gpt2')    # Download configuration from S3 and cache.
-            model = TFAutoModelForCausalLM.from_config(config)  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
+            from transformers import AutoConfig, TFAutoModelForCausalLM
+            # Download configuration from S3 and cache.
+            config = AutoConfig.from_pretrained('gpt2')
+            model = TFAutoModelForCausalLM.from_config(config)
         """
         for config_class, model_class in TF_MODEL_FOR_CAUSAL_LM_MAPPING.items():
             if isinstance(config, config_class):
@@ -946,65 +767,28 @@ class TFAutoModelForCausalLM:
 
     @classmethod
     @replace_list_option_in_docstrings(TF_MODEL_FOR_CAUSAL_LM_MAPPING)
+    @add_start_docstrings(
+        "Instantiate one of the model classes of the library---with a causal language modeling head---from a "
+        "pretrained model.",
+        TF_AUTO_MODEL_PRETRAINED_DOCSTRING,
+    )
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
-        r"""Instantiates one of the language modeling model classes of the library
-        from a pre-trained model configuration.
-
-        The `from_pretrained()` method takes care of returning the correct model class instance
-        based on the `model_type` property of the config object, or when it's missing,
-        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
-
-        List options
-
-        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
-        To train the model, you should first set it back in training mode with `model.train()`
-
-        Args:
-            pretrained_model_name_or_path:
-                Either:
-
-                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
-                - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
-                - a path to a `directory` containing model weights saved using :func:`~transformers.TFPreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
-                - a path or url to a `tensorflow index checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In this case, ``from_tf`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.
-            model_args: (`optional`) Sequence of positional arguments:
-                All remaning positional arguments will be passed to the underlying model's ``__init__`` method
-            config: (`optional`) instance of a class derived from :class:`~transformers.TFPretrainedConfig`:
-                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:
-
-                - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
-                - the model was saved using :func:`~transformers.TFPreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
-                - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.
-
-            state_dict: (`optional`) dict:
-                an optional state dictionary for the model to use instead of a state dictionary loaded from saved weights file.
-                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
-                In this case though, you should check if using :func:`~transformers.TFPreTrainedModel.save_pretrained` and :func:`~transformers.TFPreTrainedModel.from_pretrained` is not a simpler option.
-            cache_dir: (`optional`) string:
-                Path to a directory in which a downloaded pre-trained model
-                configuration should be cached if the standard cache should not be used.
-            force_download: (`optional`) boolean, default False:
-                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.
-            resume_download: (`optional`) boolean, default False:
-                Do not delete incompletely received file. Attempt to resume the download if such a file exists.
-            proxies: (`optional`) dict, default None:
-                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
-                The proxies are used on each request.
-            output_loading_info: (`optional`) boolean:
-                Set to ``True`` to also return a dictionary containing missing keys, unexpected keys and error messages.
-            kwargs: (`optional`) Remaining dictionary of keyword arguments:
-                These arguments will be passed to the configuration and the model.
-
+        r"""
         Examples::
 
-            model = TFAutoModelForCausalLM.from_pretrained('gpt2')    # Download model and configuration from S3 and cache.
-            model = TFAutoModelForCausalLM.from_pretrained('./test/gpt2_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
-            model = TFAutoModelForCausalLM.from_pretrained('gpt2', output_attentions=True)  # Update configuration during loading
-            assert model.config.output_attentions == True
-            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
-            config = AutoConfig.from_json_file('./tf_model/gpt2_tf_model_config.json')
-            model =  TFAutoModelForCausalLM.from_pretrained('./tf_model/gpt2_tf_checkpoint.ckpt.index', from_tf=True, config=config)
+            >>> from transformers import AutoConfig, TFAutoModelForCausalLM
 
+            >>> # Download model and configuration from S3 and cache.
+            >>> model = TFAutoModelForCausalLM.from_pretrained('gpt2')
+
+            >>> # Update configuration during loading
+            >>> model = TFAutoModelForCausalLM.from_pretrained('gpt2', output_attentions=True)
+            >>> model.config.output_attentions
+            True
+
+            >>> # Loading from a PyTorch checkpoint file instead of a TensorFlow model (slower)
+            >>> config = AutoConfig.from_json_file('./pt_model/gpt2_pt_model_config.json')
+            >>> model = TFAutoModelForCausalLM.from_pretrained('./pt_model/gpt2_pytorch_model.bin', from_pt=True, config=config)
         """
         config = kwargs.pop("config", None)
         if not isinstance(config, PretrainedConfig):
@@ -1025,12 +809,12 @@ class TFAutoModelForCausalLM:
 
 class TFAutoModelForMaskedLM:
     r"""
-    :class:`~transformers.TFAutoModelForMaskedLM` is a generic model class
-    that will be instantiated as one of the language modeling model classes of the library
-    when created with the `TFAutoModelForMaskedLM.from_pretrained(pretrained_model_name_or_path)`
-    class method.
+    This is a generic model class that will be instantiated as one of the model classes of the library---with a
+    masked language modeling head---when created with the when created with the
+    :meth:`~transformers.TFAutoModelForMaskedLM.from_pretrained` class method or the
+    :meth:`~transformers.TFAutoModelForMasedLM.from_config` class method.
 
-    This class cannot be instantiated using `__init__()` (throws an error).
+    This class cannot be instantiated directly using ``__init__()`` (throws an error).
     """
 
     def __init__(self):
@@ -1043,24 +827,27 @@ class TFAutoModelForMaskedLM:
     @classmethod
     @replace_list_option_in_docstrings(TF_MODEL_FOR_MASKED_LM_MAPPING, use_model_types=False)
     def from_config(cls, config):
-        r"""Instantiates one of the base model classes of the library
-        from a configuration.
+        r"""
+        Instantiates one of the model classes of the library---with a masked language modeling head---from a
+        configuration.
 
         Note:
             Loading a model from its configuration file does **not** load the model weights.
-            It only affects the model's configuration. Use :func:`~transformers.TFAutoModel.from_pretrained` to load
-            the model weights
+            It only affects the model's configuration. Use :meth:`~transformers.TFAutoModelForMaskedLM.from_pretrained`
+            to load the model weights.
 
         Args:
-            config (:class:`~transformers.TFPretrainedConfig`):
+            config (:class:`~transformers.PretrainedConfig`):
                 The model class to instantiate is selected based on the configuration class:
 
                 List options
 
         Examples::
 
-            config = BertConfig.from_pretrained('bert-base-uncased')    # Download configuration from S3 and cache.
-            model = TFAutoModelForMaskedLM.from_config(config)  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
+            from transformers import AutoConfig, TFAutoModelForMaskedLM
+            # Download configuration from S3 and cache.
+            config = AutoConfig.from_pretrained('bert-base-uncased')
+            model = TFAutoModelForMaskedLM.from_config(config)
         """
         for config_class, model_class in TF_MODEL_FOR_MASKED_LM_MAPPING.items():
             if isinstance(config, config_class):
@@ -1074,65 +861,28 @@ class TFAutoModelForMaskedLM:
 
     @classmethod
     @replace_list_option_in_docstrings(TF_MODEL_FOR_MASKED_LM_MAPPING)
+    @add_start_docstrings(
+        "Instantiate one of the model classes of the library---with a masked language modeling head---from a "
+        "pretrained model.",
+        TF_AUTO_MODEL_PRETRAINED_DOCSTRING,
+    )
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
-        r"""Instantiates one of the language modeling model classes of the library
-        from a pre-trained model configuration.
-
-        The `from_pretrained()` method takes care of returning the correct model class instance
-        based on the `model_type` property of the config object, or when it's missing,
-        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
-
-        List options
-
-        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
-        To train the model, you should first set it back in training mode with `model.train()`
-
-        Args:
-            pretrained_model_name_or_path:
-                Either:
-
-                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
-                - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
-                - a path to a `directory` containing model weights saved using :func:`~transformers.TFPreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
-                - a path or url to a `tensorflow index checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In this case, ``from_tf`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.
-            model_args: (`optional`) Sequence of positional arguments:
-                All remaning positional arguments will be passed to the underlying model's ``__init__`` method
-            config: (`optional`) instance of a class derived from :class:`~transformers.TFPretrainedConfig`:
-                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:
-
-                - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
-                - the model was saved using :func:`~transformers.TFPreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
-                - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.
-
-            state_dict: (`optional`) dict:
-                an optional state dictionary for the model to use instead of a state dictionary loaded from saved weights file.
-                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
-                In this case though, you should check if using :func:`~transformers.TFPreTrainedModel.save_pretrained` and :func:`~transformers.TFPreTrainedModel.from_pretrained` is not a simpler option.
-            cache_dir: (`optional`) string:
-                Path to a directory in which a downloaded pre-trained model
-                configuration should be cached if the standard cache should not be used.
-            force_download: (`optional`) boolean, default False:
-                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.
-            resume_download: (`optional`) boolean, default False:
-                Do not delete incompletely received file. Attempt to resume the download if such a file exists.
-            proxies: (`optional`) dict, default None:
-                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
-                The proxies are used on each request.
-            output_loading_info: (`optional`) boolean:
-                Set to ``True`` to also return a dictionary containing missing keys, unexpected keys and error messages.
-            kwargs: (`optional`) Remaining dictionary of keyword arguments:
-                These arguments will be passed to the configuration and the model.
-
+        r"""
         Examples::
 
-            model = TFAutoModelForMaskedLM.from_pretrained(('bert-base-uncased')    # Download model and configuration from S3 and cache.
-            model = TFAutoModelForMaskedLM.from_pretrained('./test/bert_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
-            model = TFAutoModelForMaskedLM.from_pretrained(('bert-base-uncased', output_attentions=True)  # Update configuration during loading
-            assert model.config.output_attentions == True
-            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
-            config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
-            model =  TFAutoModelForMaskedLM.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
+            >>> from transformers import AutoConfig, TFAutoModelForMaskedLM
 
+            >>> # Download model and configuration from S3 and cache.
+            >>> model = TFAutoModelForMaskedLM.from_pretrained('bert-base-uncased')
+
+            >>> # Update configuration during loading
+            >>> model = TFAutoModelForMaskedLM.from_pretrained('bert-base-uncased', output_attentions=True)
+            >>> model.config.output_attentions
+            True
+
+            >>> # Loading from a PyTorch checkpoint file instead of a TensorFlow model (slower)
+            >>> config = AutoConfig.from_json_file('./pt_model/bert_pt_model_config.json')
+            >>> model = TFAutoModelForMaskedLM.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config)
         """
         config = kwargs.pop("config", None)
         if not isinstance(config, PretrainedConfig):
@@ -1153,12 +903,12 @@ class TFAutoModelForMaskedLM:
 
 class TFAutoModelForSeq2SeqLM:
     r"""
-    :class:`~transformers.TFAutoModelForSeq2SeqLM` is a generic model class
-    that will be instantiated as one of the language modeling model classes of the library
-    when created with the `TFAutoModelForSeq2SeqLM.from_pretrained(pretrained_model_name_or_path)`
-    class method.
+    This is a generic model class that will be instantiated as one of the model classes of the library---with a
+    sequence-to-sequence language modeling head---when created with the when created with the
+    :meth:`~transformers.TFAutoModelForSeq2SeqLM.from_pretrained` class method or the
+    :meth:`~transformers.TFAutoModelForSeq2SeqLM.from_config` class method.
 
-    This class cannot be instantiated using `__init__()` (throws an error).
+    This class cannot be instantiated directly using ``__init__()`` (throws an error).
     """
 
     def __init__(self):
@@ -1171,24 +921,27 @@ class TFAutoModelForSeq2SeqLM:
     @classmethod
     @replace_list_option_in_docstrings(TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING, use_model_types=False)
     def from_config(cls, config):
-        r"""Instantiates one of the base model classes of the library
-        from a configuration.
+        r"""
+        Instantiates one of the model classes of the library---with a sequence-to-sequence language modeling
+        head---from a configuration.
 
         Note:
             Loading a model from its configuration file does **not** load the model weights.
-            It only affects the model's configuration. Use :func:`~transformers.TFAutoModel.from_pretrained` to load
-            the model weights
+            It only affects the model's configuration. Use
+            :meth:`~transformers.TFAutoModelForSeq2SeqLM.from_pretrained` to load the model weights.
 
         Args:
-            config (:class:`~transformers.TFPretrainedConfig`):
+            config (:class:`~transformers.PretrainedConfig`):
                 The model class to instantiate is selected based on the configuration class:
 
                 List options
 
         Examples::
 
-            config = T5Config.from_pretrained('t5')
-            model = TFAutoModelForSeq2SeqLM.from_config(config)  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
+            from transformers import AutoConfig, TFAutoModelForSeq2SeqLM
+            # Download configuration from S3 and cache.
+            config = AutoConfig.from_pretrained('t5')
+            model = TFAutoModelForSeq2SeqLM.from_config(config)
         """
         for config_class, model_class in TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.items():
             if isinstance(config, config_class):
@@ -1204,65 +957,28 @@ class TFAutoModelForSeq2SeqLM:
 
     @classmethod
     @replace_list_option_in_docstrings(TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING, use_model_types=False)
+    @add_start_docstrings(
+        "Instantiate one of the model classes of the library---with a sequence-to-sequence language modeling "
+        "head---from a pretrained model.",
+        TF_AUTO_MODEL_PRETRAINED_DOCSTRING,
+    )
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
-        r"""Instantiates one of the language modeling model classes of the library
-        from a pre-trained model configuration.
-
-        The `from_pretrained()` method takes care of returning the correct model class instance
-        based on the `model_type` property of the config object, or when it's missing,
-        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
-
-        List options
-
-        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
-        To train the model, you should first set it back in training mode with `model.train()`
-
-        Args:
-            pretrained_model_name_or_path:
-                Either:
-
-                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
-                - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
-                - a path to a `directory` containing model weights saved using :func:`~transformers.TFPreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
-                - a path or url to a `tensorflow index checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In this case, ``from_tf`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.
-            model_args: (`optional`) Sequence of positional arguments:
-                All remaning positional arguments will be passed to the underlying model's ``__init__`` method
-            config: (`optional`) instance of a class derived from :class:`~transformers.TFPretrainedConfig`:
-                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:
-
-                - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
-                - the model was saved using :func:`~transformers.TFPreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
-                - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.
-
-            state_dict: (`optional`) dict:
-                an optional state dictionary for the model to use instead of a state dictionary loaded from saved weights file.
-                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
-                In this case though, you should check if using :func:`~transformers.TFPreTrainedModel.save_pretrained` and :func:`~transformers.TFPreTrainedModel.from_pretrained` is not a simpler option.
-            cache_dir: (`optional`) string:
-                Path to a directory in which a downloaded pre-trained model
-                configuration should be cached if the standard cache should not be used.
-            force_download: (`optional`) boolean, default False:
-                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.
-            resume_download: (`optional`) boolean, default False:
-                Do not delete incompletely received file. Attempt to resume the download if such a file exists.
-            proxies: (`optional`) dict, default None:
-                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
-                The proxies are used on each request.
-            output_loading_info: (`optional`) boolean:
-                Set to ``True`` to also return a dictionary containing missing keys, unexpected keys and error messages.
-            kwargs: (`optional`) Remaining dictionary of keyword arguments:
-                These arguments will be passed to the configuration and the model.
-
+        r"""
         Examples::
 
-            model = TFAutoModelForSeq2SeqLM.from_pretrained('t5-base')    # Download model and configuration from S3 and cache.
-            model = TFAutoModelForSeq2SeqLM.from_pretrained('./test/t5_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
-            model = TFAutoModelForSeq2SeqLM.from_pretrained('t5-base', output_attentions=True)  # Update configuration during loading
-            assert model.config.output_attentions == True
-            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
-            config = AutoConfig.from_json_file('./tf_model/t5_tf_model_config.json')
-            model =  TFAutoModelForSeq2SeqLM.from_pretrained('./tf_model/t5_tf_checkpoint.ckpt.index', from_tf=True, config=config)
+            >>> from transformers import AutoConfig, TFAutoModelForSeq2SeqLM
 
+            >>> # Download model and configuration from S3 and cache.
+            >>> model = TFAutoModelForSeq2SeqLM.from_pretrained('t5-base')
+
+            >>> # Update configuration during loading
+            >>> model = TFAutoModelForSeq2SeqLM.from_pretrained('t5-base', output_attentions=True)
+            >>> model.config.output_attentions
+            True
+
+            >>> # Loading from a PyTorch checkpoint file instead of a TensorFlow model (slower)
+            >>> config = AutoConfig.from_json_file('./pt_model/t5_pt_model_config.json')
+            >>> model = TFAutoModelForSeq2SeqLM.from_pretrained('./pt_model/t5_pytorch_model.bin', from_pt=True, config=config)
         """
         config = kwargs.pop("config", None)
         if not isinstance(config, PretrainedConfig):
@@ -1285,12 +1001,12 @@ class TFAutoModelForSeq2SeqLM:
 
 class TFAutoModelForSequenceClassification(object):
     r"""
-    :class:`~transformers.TFAutoModelForSequenceClassification` is a generic model class
-    that will be instantiated as one of the sequence classification model classes of the library
-    when created with the `TFAutoModelForSequenceClassification.from_pretrained(pretrained_model_name_or_path)`
-    class method.
+    This is a generic model class that will be instantiated as one of the model classes of the library---with a
+    sequence classification head---when created with the when created with the
+    :meth:`~transformers.TFAutoModelForSequenceClassification.from_pretrained` class method or the
+    :meth:`~transformers.TFAutoModelForSequenceClassification.from_config` class method.
 
-    This class cannot be instantiated using `__init__()` (throws an error).
+    This class cannot be instantiated directly using ``__init__()`` (throws an error).
     """
 
     def __init__(self):
@@ -1303,24 +1019,27 @@ class TFAutoModelForSequenceClassification(object):
     @classmethod
     @replace_list_option_in_docstrings(TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING, use_model_types=False)
     def from_config(cls, config):
-        r"""Instantiates one of the base model classes of the library
-        from a configuration.
+        r"""
+        Instantiates one of the model classes of the library---with a sequence classification head---from a
+        configuration.
 
         Note:
             Loading a model from its configuration file does **not** load the model weights.
-            It only affects the model's configuration. Use :func:`~transformers.TFAutoModel.from_pretrained` to load
-            the model weights
+            It only affects the model's configuration. Use
+            :meth:`~transformers.TFAutoModelForSequenceClassification.from_pretrained` to load the model weights.
 
         Args:
-            config: (`optional`) instance of a class derived from :class:`~transformers.TFPretrainedConfig`:
+            config (:class:`~transformers.PretrainedConfig`):
                 The model class to instantiate is selected based on the configuration class:
 
                 List options
 
         Examples::
 
-            config = BertConfig.from_pretrained('bert-base-uncased')    # Download configuration from S3 and cache.
-            model = TFAutoModelForSequenceClassification.from_config(config)  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
+            from transformers import AutoConfig, TFAutoModelForSequenceClassification
+            # Download configuration from S3 and cache.
+            config = AutoConfig.from_pretrained('bert-base-uncased')
+            model = TFAutoModelForSequenceClassification.from_config(config)
         """
         for config_class, model_class in TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.items():
             if isinstance(config, config_class):
@@ -1336,78 +1055,28 @@ class TFAutoModelForSequenceClassification(object):
 
     @classmethod
     @replace_list_option_in_docstrings(TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING)
+    @add_start_docstrings(
+        "Instantiate one of the model classes of the library---with a sequence classification head---from a "
+        "pretrained model.",
+        TF_AUTO_MODEL_PRETRAINED_DOCSTRING,
+    )
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
-        r"""Instantiates one of the sequence classification model classes of the library
-        from a pre-trained model configuration.
-
-        The `from_pretrained()` method takes care of returning the correct model class instance
-        based on the `model_type` property of the config object, or when it's missing,
-        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
-
-        List options
-
-        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
-        To train the model, you should first set it back in training mode with `model.train()`
-
-        Params:
-            pretrained_model_name_or_path: either:
-
-                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
-                - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
-                - a path to a `directory` containing model weights saved using :func:`~transformers.TFPreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
-                - a path or url to a `PyTorch, TF 1.X or TF 2.0 checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In the case of a PyTorch checkpoint, ``from_pt`` should be set to True and a configuration object should be provided as ``config`` argument.
-
-            from_pt: (`Optional`) Boolean
-                Set to True if the Checkpoint is a PyTorch checkpoint.
-
-            model_args: (`optional`) Sequence of positional arguments:
-                All remaning positional arguments will be passed to the underlying model's ``__init__`` method
-
-            config: (`optional`) instance of a class derived from :class:`~transformers.TFPretrainedConfig`:
-                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:
-
-                - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
-                - the model was saved using :func:`~transformers.TFPreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
-                - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.
-
-            state_dict: (`optional`) dict:
-                an optional state dictionnary for the model to use instead of a state dictionary loaded from saved weights file.
-                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
-                In this case though, you should check if using :func:`~transformers.TFPreTrainedModel.save_pretrained` and :func:`~transformers.TFPreTrainedModel.from_pretrained` is not a simpler option.
-
-            cache_dir: (`optional`) string:
-                Path to a directory in which a downloaded pre-trained model
-                configuration should be cached if the standard cache should not be used.
-
-            force_download: (`optional`) boolean, default False:
-                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.
-
-            resume_download: (`optional`) boolean, default False:
-                Do not delete incompletely recieved file. Attempt to resume the download if such a file exists.
-
-            proxies: (`optional`) dict, default None:
-                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
-                The proxies are used on each request.
-
-            output_loading_info: (`optional`) boolean:
-                Set to ``True`` to also return a dictionnary containing missing keys, unexpected keys and error messages.
-
-            kwargs: (`optional`) Remaining dictionary of keyword arguments:
-                Can be used to update the configuration object (after it being loaded) and initiate the model. (e.g. ``output_attentions=True``). Behave differently depending on whether a `config` is provided or automatically loaded:
-
-                - If a configuration is provided with ``config``, ``**kwargs`` will be directly passed to the underlying model's ``__init__`` method (we assume all relevant updates to the configuration have already been done)
-                - If a configuration is not provided, ``kwargs`` will be first passed to the configuration class initialization function (:func:`~transformers.TFPretrainedConfig.from_pretrained`). Each key of ``kwargs`` that corresponds to a configuration attribute will be used to override said attribute with the supplied ``kwargs`` value. Remaining keys that do not correspond to any configuration attribute will be passed to the underlying model's ``__init__`` function.
-
+        r"""
         Examples::
 
-            model = TFAutoModelForSequenceClassification.from_pretrained('bert-base-uncased')    # Download model and configuration from S3 and cache.
-            model = TFAutoModelForSequenceClassification.from_pretrained('./test/bert_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
-            model = TFAutoModelForSequenceClassification.from_pretrained('bert-base-uncased', output_attentions=True)  # Update configuration during loading
-            assert model.config.output_attentions == True
-            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
-            config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
-            model = TFAutoModelForSequenceClassification.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config)
+            >>> from transformers import AutoConfig, TFAutoModelForSequenceClassification
 
+            >>> # Download model and configuration from S3 and cache.
+            >>> model = TFAutoModelForSequenceClassification.from_pretrained('bert-base-uncased')
+
+            >>> # Update configuration during loading
+            >>> model = TFAutoModelForSequenceClassification.from_pretrained('bert-base-uncased', output_attentions=True)
+            >>> model.config.output_attentions
+            True
+
+            >>> # Loading from a PyTorch checkpoint file instead of a TensorFlow model (slower)
+            >>> config = AutoConfig.from_json_file('./pt_model/bert_pt_model_config.json')
+            >>> model = TFAutoModelForSequenceClassification.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config)
         """
         config = kwargs.pop("config", None)
         if not isinstance(config, PretrainedConfig):
@@ -1430,12 +1099,12 @@ class TFAutoModelForSequenceClassification(object):
 
 class TFAutoModelForQuestionAnswering(object):
     r"""
-    :class:`~transformers.TFAutoModelForQuestionAnswering` is a generic model class
-    that will be instantiated as one of the question answering model classes of the library
-    when created with the `TFAutoModelForQuestionAnswering.from_pretrained(pretrained_model_name_or_path)`
-    class method.
+    This is a generic model class that will be instantiated as one of the model classes of the library---with a
+    question answering head---when created with the when created with the
+    :meth:`~transformers.TFAutoModeForQuestionAnswering.from_pretrained` class method or the
+    :meth:`~transformers.TFAutoModelForQuestionAnswering.from_config` class method.
 
-    This class cannot be instantiated using `__init__()` (throws an error).
+    This class cannot be instantiated directly using ``__init__()`` (throws an error).
     """
 
     def __init__(self):
@@ -1448,24 +1117,26 @@ class TFAutoModelForQuestionAnswering(object):
     @classmethod
     @replace_list_option_in_docstrings(TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING, use_model_types=False)
     def from_config(cls, config):
-        r"""Instantiates one of the base model classes of the library
-        from a configuration.
+        r"""
+        Instantiates one of the model classes of the library---with a question answering head---from a configuration.
 
         Note:
             Loading a model from its configuration file does **not** load the model weights.
-            It only affects the model's configuration. Use :func:`~transformers.TFAutoModel.from_pretrained` to load
-            the model weights
+            It only affects the model's configuration. Use
+            :meth:`~transformers.TFAutoModelForQuestionAnswering.from_pretrained` to load the model weights.
 
         Args:
-            config: (`optional`) instance of a class derived from :class:`~transformers.TFPretrainedConfig`:
+            config (:class:`~transformers.PretrainedConfig`):
                 The model class to instantiate is selected based on the configuration class:
 
                 List options
 
         Examples::
 
-            config = BertConfig.from_pretrained('bert-base-uncased')    # Download configuration from S3 and cache.
-            model = TFAutoModelForQuestionAnswering.from_config(config)  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
+            from transformers import AutoConfig, TFAutoModelForQuestionAnswering
+            # Download configuration from S3 and cache.
+            config = AutoConfig.from_pretrained('bert-base-uncased')
+            model = TFAutoModelForQuestionAnswering.from_config(config)
         """
         for config_class, model_class in TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING.items():
             if isinstance(config, config_class):
@@ -1481,78 +1152,28 @@ class TFAutoModelForQuestionAnswering(object):
 
     @classmethod
     @replace_list_option_in_docstrings(TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING)
+    @add_start_docstrings(
+        "Instantiate one of the model classes of the library---with a question answering head---from a "
+        "pretrained model.",
+        TF_AUTO_MODEL_PRETRAINED_DOCSTRING,
+    )
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
-        r"""Instantiates one of the question answering model classes of the library
-        from a pre-trained model configuration.
-
-        The `from_pretrained()` method takes care of returning the correct model class instance
-        based on the `model_type` property of the config object, or when it's missing,
-        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
-
-        List options
-
-        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
-        To train the model, you should first set it back in training mode with `model.train()`
-
-        Params:
-            pretrained_model_name_or_path: either:
-
-                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
-                - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
-                - a path to a `directory` containing model weights saved using :func:`~transformers.TFPreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
-                - a path or url to a `PyTorch, TF 1.X or TF 2.0 checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In the case of a PyTorch checkpoint, ``from_pt`` should be set to True and a configuration object should be provided as ``config`` argument.
-
-            from_pt: (`Optional`) Boolean
-                Set to True if the Checkpoint is a PyTorch checkpoint.
-
-            model_args: (`optional`) Sequence of positional arguments:
-                All remaning positional arguments will be passed to the underlying model's ``__init__`` method
-
-            config: (`optional`) instance of a class derived from :class:`~transformers.TFPretrainedConfig`:
-                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:
-
-                - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
-                - the model was saved using :func:`~transformers.TFPreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
-                - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.
-
-            state_dict: (`optional`) dict:
-                an optional state dictionnary for the model to use instead of a state dictionary loaded from saved weights file.
-                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
-                In this case though, you should check if using :func:`~transformers.TFPreTrainedModel.save_pretrained` and :func:`~transformers.TFPreTrainedModel.from_pretrained` is not a simpler option.
-
-            cache_dir: (`optional`) string:
-                Path to a directory in which a downloaded pre-trained model
-                configuration should be cached if the standard cache should not be used.
-
-            force_download: (`optional`) boolean, default False:
-                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.
-
-            resume_download: (`optional`) boolean, default False:
-                Do not delete incompletely recieved file. Attempt to resume the download if such a file exists.
-
-            proxies: (`optional`) dict, default None:
-                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
-                The proxies are used on each request.
-
-            output_loading_info: (`optional`) boolean:
-                Set to ``True`` to also return a dictionnary containing missing keys, unexpected keys and error messages.
-
-            kwargs: (`optional`) Remaining dictionary of keyword arguments:
-                Can be used to update the configuration object (after it being loaded) and initiate the model. (e.g. ``output_attentions=True``). Behave differently depending on whether a `config` is provided or automatically loaded:
-
-                - If a configuration is provided with ``config``, ``**kwargs`` will be directly passed to the underlying model's ``__init__`` method (we assume all relevant updates to the configuration have already been done)
-                - If a configuration is not provided, ``kwargs`` will be first passed to the configuration class initialization function (:func:`~transformers.TFPretrainedConfig.from_pretrained`). Each key of ``kwargs`` that corresponds to a configuration attribute will be used to override said attribute with the supplied ``kwargs`` value. Remaining keys that do not correspond to any configuration attribute will be passed to the underlying model's ``__init__`` function.
-
+        r"""
         Examples::
 
-            model = TFAutoModelForQuestionAnswering.from_pretrained('bert-base-uncased')    # Download model and configuration from S3 and cache.
-            model = TFAutoModelForQuestionAnswering.from_pretrained('./test/bert_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
-            model = TFAutoModelForQuestionAnswering.from_pretrained('bert-base-uncased', output_attentions=True)  # Update configuration during loading
-            assert model.config.output_attentions == True
-            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
-            config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
-            model = TFAutoModelForQuestionAnswering.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config)
+            >>> from transformers import AutoConfig, TFAutoModelForQuestionAnswering
 
+            >>> # Download model and configuration from S3 and cache.
+            >>> model = TFAutoModelForQuestionAnswering.from_pretrained('bert-base-uncased')
+
+            >>> # Update configuration during loading
+            >>> model = TFAutoModelForQuestionAnswering.from_pretrained('bert-base-uncased', output_attentions=True)
+            >>> model.config.output_attentions
+            True
+
+            >>> # Loading from a PyTorch checkpoint file instead of a TensorFlow model (slower)
+            >>> config = AutoConfig.from_json_file('./pt_model/bert_pt_model_config.json')
+            >>> model = TFAutoModelForQuestionAnswering.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config)
         """
         config = kwargs.pop("config", None)
         if not isinstance(config, PretrainedConfig):
@@ -1574,6 +1195,15 @@ class TFAutoModelForQuestionAnswering(object):
 
 
 class TFAutoModelForTokenClassification:
+    r"""
+    This is a generic model class that will be instantiated as one of the model classes of the library---with a
+    token classification head---when created with the when created with the
+    :meth:`~transformers.TFAutoModelForTokenClassification.from_pretrained` class method or the
+    :meth:`~transformers.TFAutoModelForTokenClassification.from_config` class method.
+
+    This class cannot be instantiated directly using ``__init__()`` (throws an error).
+    """
+
     def __init__(self):
         raise EnvironmentError(
             "TFAutoModelForTokenClassification is designed to be instantiated "
@@ -1584,24 +1214,26 @@ class TFAutoModelForTokenClassification:
     @classmethod
     @replace_list_option_in_docstrings(TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, use_model_types=False)
     def from_config(cls, config):
-        r"""Instantiates one of the base model classes of the library
-        from a configuration.
+        r"""
+        Instantiates one of the model classes of the library---with a token classification head---from a configuration.
 
         Note:
             Loading a model from its configuration file does **not** load the model weights.
-            It only affects the model's configuration. Use :func:`~transformers.TFAutoModel.from_pretrained` to load
-            the model weights
+            It only affects the model's configuration. Use
+            :meth:`~transformers.TFAutoModelForTokenClassification.from_pretrained` to load the model weights.
 
         Args:
-            config: (`optional`) instance of a class derived from :class:`~transformers.TFPretrainedConfig`:
+            config (:class:`~transformers.PretrainedConfig`):
                 The model class to instantiate is selected based on the configuration class:
 
                 List options
 
         Examples::
 
-            config = BertConfig.from_pretrained('bert-base-uncased')    # Download configuration from S3 and cache.
-            model = TFAutoModelForTokenClassification.from_config(config)  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
+            from transformers import AutoConfig, TFAutoModelForTokenClassification
+            # Download configuration from S3 and cache.
+            config = AutoConfig.from_pretrained('bert-base-uncased')
+            model = TFAutoModelForTokenClassification.from_config(config)
         """
         for config_class, model_class in TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.items():
             if isinstance(config, config_class):
@@ -1617,71 +1249,28 @@ class TFAutoModelForTokenClassification:
 
     @classmethod
     @replace_list_option_in_docstrings(TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING)
+    @add_start_docstrings(
+        "Instantiate one of the model classes of the library---with a token classification head---from a "
+        "pretrained model.",
+        TF_AUTO_MODEL_PRETRAINED_DOCSTRING,
+    )
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
-        r"""Instantiates one of the question answering model classes of the library
-        from a pre-trained model configuration.
-
-        The `from_pretrained()` method takes care of returning the correct model class instance
-        based on the `model_type` property of the config object, or when it's missing,
-        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
-
-        List options
-
-        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
-        To train the model, you should first set it back in training mode with `model.train()`
-
-        Params:
-            pretrained_model_name_or_path: either:
-
-                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
-                - a path to a `directory` containing model weights saved using :func:`~transformers.TFPreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
-                - a path or url to a `tensorflow index checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In this case, ``from_tf`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.
-
-            model_args: (`optional`) Sequence of positional arguments:
-                All remaning positional arguments will be passed to the underlying model's ``__init__`` method
-
-            config: (`optional`) instance of a class derived from :class:`~transformers.TFPretrainedConfig`:
-                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:
-
-                - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
-                - the model was saved using :func:`~transformers.TFPreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
-                - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.
-
-            state_dict: (`optional`) dict:
-                an optional state dictionnary for the model to use instead of a state dictionary loaded from saved weights file.
-                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
-                In this case though, you should check if using :func:`~transformers.TFPreTrainedModel.save_pretrained` and :func:`~transformers.TFPreTrainedModel.from_pretrained` is not a simpler option.
-
-            cache_dir: (`optional`) string:
-                Path to a directory in which a downloaded pre-trained model
-                configuration should be cached if the standard cache should not be used.
-
-            force_download: (`optional`) boolean, default False:
-                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.
-
-            proxies: (`optional`) dict, default None:
-                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
-                The proxies are used on each request.
-
-            output_loading_info: (`optional`) boolean:
-                Set to ``True`` to also return a dictionnary containing missing keys, unexpected keys and error messages.
-
-            kwargs: (`optional`) Remaining dictionary of keyword arguments:
-                Can be used to update the configuration object (after it being loaded) and initiate the model. (e.g. ``output_attentions=True``). Behave differently depending on whether a `config` is provided or automatically loaded:
-
-                - If a configuration is provided with ``config``, ``**kwargs`` will be directly passed to the underlying model's ``__init__`` method (we assume all relevant updates to the configuration have already been done)
-                - If a configuration is not provided, ``kwargs`` will be first passed to the configuration class initialization function (:func:`~transformers.TFPretrainedConfig.from_pretrained`). Each key of ``kwargs`` that corresponds to a configuration attribute will be used to override said attribute with the supplied ``kwargs`` value. Remaining keys that do not correspond to any configuration attribute will be passed to the underlying model's ``__init__`` function.
-
+        r"""
         Examples::
 
-            model = TFAutoModelForTokenClassification.from_pretrained('bert-base-uncased')    # Download model and configuration from S3 and cache.
-            model = TFAutoModelForTokenClassification.from_pretrained('./test/bert_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
-            model = TFAutoModelForTokenClassification.from_pretrained('bert-base-uncased', output_attentions=True)  # Update configuration during loading
-            assert model.config.output_attentions == True
-            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
-            config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
-            model = TFAutoModelForTokenClassification.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
+            >>> from transformers import AutoConfig, TFAutoModelForTokenClassification
 
+            >>> # Download model and configuration from S3 and cache.
+            >>> model = TFAutoModelForTokenClassification.from_pretrained('bert-base-uncased')
+
+            >>> # Update configuration during loading
+            >>> model = TFAutoModelForTokenClassification.from_pretrained('bert-base-uncased', output_attentions=True)
+            >>> model.config.output_attentions
+            True
+
+            >>> # Loading from a PyTorch checkpoint file instead of a TensorFlow model (slower)
+            >>> config = AutoConfig.from_json_file('./pt_model/bert_pt_model_config.json')
+            >>> model = TFAutoModelForTokenClassification.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config)
         """
         config = kwargs.pop("config", None)
         if not isinstance(config, PretrainedConfig):
@@ -1700,3 +1289,101 @@ class TFAutoModelForTokenClassification:
                 ", ".join(c.__name__ for c in TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.keys()),
             )
         )
+
+
+class TFAutoModelForMultipleChoice:
+    r"""
+    This is a generic model class that will be instantiated as one of the model classes of the library---with a
+    multiple choice classifcation head---when created with the when created with the
+    :meth:`~transformers.TFAutoModelForMultipleChoice.from_pretrained` class method or the
+    :meth:`~transformers.TFAutoModelForMultipleChoice.from_config` class method.
+
+    This class cannot be instantiated directly using ``__init__()`` (throws an error).
+    """
+
+    def __init__(self):
+        raise EnvironmentError(
+            "TFAutoModelForMultipleChoice is designed to be instantiated "
+            "using the `TFAutoModelForMultipleChoice.from_pretrained(pretrained_model_name_or_path)` or "
+            "`TFAutoModelForMultipleChoice.from_config(config)` methods."
+        )
+
+    @classmethod
+    @replace_list_option_in_docstrings(TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING, use_model_types=False)
+    def from_config(cls, config):
+        r"""
+        Instantiates one of the model classes of the library---with a multiple choice classification head---from a
+        configuration.
+
+        Note:
+            Loading a model from its configuration file does **not** load the model weights.
+            It only affects the model's configuration. Use
+            :meth:`~transformers.TFAutoModelForMultipleChoice.from_pretrained` to load the model weights.
+
+        Args:
+            config (:class:`~transformers.PretrainedConfig`):
+                The model class to instantiate is selected based on the configuration class:
+
+                List options
+
+        Examples::
+
+            from transformers import AutoConfig, TFAutoModelForMultipleChoice
+            # Download configuration from S3 and cache.
+            config = AutoConfig.from_pretrained('bert-base-uncased')
+            model = TFAutoModelForMultipleChoice.from_config(config)
+        """
+        for config_class, model_class in TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING.items():
+            if isinstance(config, config_class):
+                return model_class(config)
+        raise ValueError(
+            "Unrecognized configuration class {} for this kind of TFAutoModel: {}.\n"
+            "Model type should be one of {}.".format(
+                config.__class__,
+                cls.__name__,
+                ", ".join(c.__name__ for c in TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING.keys()),
+            )
+        )
+
+    @classmethod
+    @replace_list_option_in_docstrings(TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING)
+    @add_start_docstrings(
+        "Instantiate one of the model classes of the library---with a multiple choice classification head---from a "
+        "pretrained model.",
+        TF_AUTO_MODEL_PRETRAINED_DOCSTRING,
+    )
+    def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
+        r"""
+        Examples::
+
+            >>> from transformers import AutoConfig, TFAutoModelForMultipleChoice
+
+            >>> # Download model and configuration from S3 and cache.
+            >>> model = TFAutoModelForMultipleChoice.from_pretrained('bert-base-uncased')
+
+            >>> # Update configuration during loading
+            >>> model = TFAutoModelForMultipleChoice.from_pretrained('bert-base-uncased', output_attentions=True)
+            >>> model.config.output_attentions
+            True
+
+            >>> # Loading from a PyTorch checkpoint file instead of a TensorFlow model (slower)
+            >>> config = AutoConfig.from_json_file('./pt_model/bert_pt_model_config.json')
+            >>> model = TFAutoModelForMultipleChoice.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config)
+        """
+        config = kwargs.pop("config", None)
+        if not isinstance(config, PretrainedConfig):
+            config, kwargs = AutoConfig.from_pretrained(
+                pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
+            )
+
+        for config_class, model_class in TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING.items():
+            if isinstance(config, config_class):
+                return model_class.from_pretrained(pretrained_model_name_or_path, *model_args, config=config, **kwargs)
+        raise ValueError(
+            "Unrecognized configuration class {} for this kind of TFAutoModel: {}.\n"
+            "Model type should be one of {}.".format(
+                config.__class__,
+                cls.__name__,
+                ", ".join(c.__name__ for c in TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING.keys()),
+            )
+        )
diff --git a/src/transformers/tokenization_auto.py b/src/transformers/tokenization_auto.py
index d9197eb09..d450c76d0 100644
--- a/src/transformers/tokenization_auto.py
+++ b/src/transformers/tokenization_auto.py
@@ -117,12 +117,11 @@ SLOW_TOKENIZER_MAPPING = {k: v[0] for k, v in TOKENIZER_MAPPING.items()}
 
 
 class AutoTokenizer:
-    r""":class:`~transformers.AutoTokenizer` is a generic tokenizer class
-    that will be instantiated as one of the tokenizer classes of the library
-    when created with the `AutoTokenizer.from_pretrained(pretrained_model_name_or_path)`
-    class method.
+    r"""
+    This is a generic tokenizer class that will be instantiated as one of the tokenizer classes of the library
+    when created with the :meth:`AutoTokenizer.from_pretrained` class method.
 
-    This class cannot be instantiated using `__init__()` (throw an error).
+    This class cannot be instantiated directly using ``__init__()`` (throws an error).
     """
 
     def __init__(self):
@@ -134,45 +133,57 @@ class AutoTokenizer:
     @classmethod
     @replace_list_option_in_docstrings(SLOW_TOKENIZER_MAPPING)
     def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
-        r"""Instantiate one of the tokenizer classes of the library
-        from a pre-trained model vocabulary.
+        r"""
+        Instantiate one of the tokenizer classes of the library from a pretrained model vocabulary.
 
-        The tokenizer class to instantiate is selected
-        based on the `model_type` property of the config object, or when it's missing,
-        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
+        The tokenizer class to instantiate is selected based on the :obj:`model_type` property of the config object
+        (either passed as an argument or loaded from :obj:`pretrained_model_name_or_path` if possible), or when it's
+        missing, by falling back to using pattern matching on :obj:`pretrained_model_name_or_path`:
 
         List options
 
         Params:
-            pretrained_model_name_or_path: either:
+            pretrained_model_name_or_path (:obj:`str`):
+                Can be either:
 
-                - a string with the `shortcut name` of a predefined tokenizer to load from cache or download, e.g.: ``bert-base-uncased``.
-                - a string with the `identifier name` of a predefined tokenizer that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
-                - a path to a `directory` containing vocabulary files required by the tokenizer, for instance saved using the :func:`~transformers.PreTrainedTokenizer.save_pretrained` method, e.g.: ``./my_model_directory/``.
-                - (not applicable to all derived classes) a path or url to a single saved vocabulary file if and only if the tokenizer only requires a single vocabulary file (e.g. Bert, XLNet), e.g.: ``./my_model_directory/vocab.txt``.
-
-            cache_dir: (`optional`) string:
-                Path to a directory in which a downloaded predefined tokenizer vocabulary files should be cached if the standard cache should not be used.
-
-            force_download: (`optional`) boolean, default False:
-                Force to (re-)download the vocabulary files and override the cached versions if they exists.
-
-            resume_download: (`optional`) boolean, default False:
-                Do not delete incompletely recieved file. Attempt to resume the download if such a file exists.
-
-            proxies: (`optional`) dict, default None:
-                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
-                The proxies are used on each request.
-
-            use_fast: (`optional`) boolean, default False:
-                Indicate if transformers should try to load the fast version of the tokenizer (True) or use the Python one (False).
-
-            inputs: (`optional`) positional arguments: will be passed to the Tokenizer ``__init__`` method.
-
-            kwargs: (`optional`) keyword arguments: will be passed to the Tokenizer ``__init__`` method. Can be used to set special tokens like ``bos_token``, ``eos_token``, ``unk_token``, ``sep_token``, ``pad_token``, ``cls_token``, ``mask_token``, ``additional_special_tokens``. See parameters in the doc string of :class:`~transformers.PreTrainedTokenizer` for details.
+                    - A string with the `shortcut name` of a predefined tokenizer to load from cache or download, e.g.,
+                      ``bert-base-uncased``.
+                    - A string with the `identifier name` of a predefined tokenizer that was user-uploaded to our S3,
+                      e.g., ``dbmdz/bert-base-german-cased``.
+                    - A path to a `directory` containing vocabulary files required by the tokenizer, for instance saved
+                      using the :func:`~transformers.PreTrainedTokenizer.save_pretrained` method, e.g.,
+                      ``./my_model_directory/``.
+                    - A path or url to a single saved vocabulary file if and only if the tokenizer only requires a
+                      single vocabulary file (like Bert or XLNet), e.g.: ``./my_model_directory/vocab.txt``.
+                      (Not applicable to all derived classes)
+            inputs (additional positional arguments, `optional`):
+                Will be passed along to the Tokenizer ``__init__()`` method.
+            config (:class:`~transformers.PreTrainedConfig`, `optional`)
+                The configuration object used to dertermine the tokenizer class to instantiate.
+            cache_dir (:obj:`str`, `optional`):
+                Path to a directory in which a downloaded pretrained model configuration should be cached if the
+                standard cache should not be used.
+            force_download (:obj:`bool`, `optional`, defaults to :obj:`False`):
+                Whether or not to force the (re-)download the model weights and configuration files and override the
+                cached versions if they exist.
+            resume_download (:obj:`bool`, `optional`, defaults to :obj:`False`):
+                Whether or not to delete incompletely received files. Will attempt to resume the download if such a
+                file exists.
+            proxies (:obj:`Dict[str, str]`, `optional`):
+                A dictionary of proxy servers to use by protocol or endpoint, e.g.,
+                :obj:`{'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each
+                request.
+            use_fast (:obj:`bool`, `optional`, defaults to :obj:`False`):
+                Whether or not to try to load the fast version of the tokenizer.
+            kwargs (additional keyword arguments, `optional`):
+                Will be passed to the Tokenizer ``__init__()`` method. Can be used to set special tokens like
+                ``bos_token``, ``eos_token``, ``unk_token``, ``sep_token``, ``pad_token``, ``cls_token``,
+                ``mask_token``, ``additional_special_tokens``. See parameters in the ``__init__()`` for more details.
 
         Examples::
 
+            from transformers import AutoTokenizer
+
             # Download vocabulary from S3 and cache.
             tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
 
@@ -208,7 +219,10 @@ class AutoTokenizer:
         if isinstance(config, EncoderDecoderConfig):
             if type(config.decoder) is not type(config.encoder):  # noqa: E721
                 logger.warn(
-                    f"The encoder model config class: {config.encoder.__class__} is different from the decoder model config class: {config.decoder.__class}. It is not recommended to use the `AutoTokenizer.from_pretrained(..)` method in this case. Please use the encoder and decoder specific tokenizer classes."
+                    f"The encoder model config class: {config.encoder.__class__} is different from the decoder model "
+                    f"config class: {config.decoder.__class}. It is not recommended to use the "
+                    "`AutoTokenizer.from_pretrained()` method in this case. Please use the encoder and decoder "
+                    "specific tokenizer classes."
                 )
             config = config.encoder