From 23bffc4f149916aff57e9fe31924a59baf6752c0 Mon Sep 17 00:00:00 2001
From: zou3519 <zou3519@gmail.com>
Date: Sun, 13 Oct 2019 10:32:28 -0700
Subject: [PATCH] Fix most documentation warnings (#27782)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/27782

Warnings show up when running `make html` to build documentation. All of
the warnings are very reasonable and point to bugs in our docs. This PR
attempts to fix most of those warnings.

In the future we will add something to the CI that asserts that there
are no warnings in our docs.

Test Plan: - build and view changes locally

Differential Revision: D17887067

Pulled By: zou3519

fbshipit-source-id: 6bf4d08764759133b20983d6cd7f5d27e5ee3166
---
 docs/source/autograd.rst                      |  2 ++
 docs/source/community/contribution_guide.rst  |  2 +-
 docs/source/community/persons_of_interest.rst |  3 +--
 docs/source/cuda.rst                          |  4 +++-
 docs/source/distributed.rst                   |  2 +-
 docs/source/index.rst                         |  1 +
 docs/source/multiprocessing.rst               |  3 +++
 docs/source/quantization.rst                  |  2 +-
 docs/source/random.rst                        |  7 +++---
 docs/source/tensors.rst                       | 13 ++++++++---
 docs/source/torch.rst                         | 14 +++++++-----
 torch/_tensor_docs.py                         |  1 +
 torch/_torch_docs.py                          |  6 ++---
 torch/cuda/streams.py                         | 22 +++++++------------
 .../lowrank_multivariate_normal.py            |  2 ++
 .../quantized/modules/functional_modules.py   |  4 ++--
 torch/quantization/observer.py                |  2 +-
 17 files changed, 51 insertions(+), 39 deletions(-)

diff --git a/docs/source/autograd.rst b/docs/source/autograd.rst
index ed09e4c3084..335d15e7b6f 100644
--- a/docs/source/autograd.rst
+++ b/docs/source/autograd.rst
@@ -67,6 +67,8 @@ Tensor autograd functions
 .. autoclass:: torch.Tensor
    :members: grad, requires_grad, is_leaf, backward, detach, detach_, register_hook, retain_grad
 
+   :noindex:
+
 :hidden:`Function`
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/docs/source/community/contribution_guide.rst b/docs/source/community/contribution_guide.rst
index a09a149ae35..628aec71e0a 100644
--- a/docs/source/community/contribution_guide.rst
+++ b/docs/source/community/contribution_guide.rst
@@ -49,7 +49,7 @@ here is the basic process.
       operator/optimizer?” Giving evidence for its utility, e.g., usage
       in peer reviewed papers, or existence in other frameworks, helps a
       bit when making this case.
-      - **Adding operators / algorithms from recently-released research** 
+      - **Adding operators / algorithms from recently-released research**
         is generally not accepted, unless there is overwhelming evidence that
         this newly published work has ground-breaking results and will eventually
         become a standard in the field. If you are not sure where your method falls,
diff --git a/docs/source/community/persons_of_interest.rst b/docs/source/community/persons_of_interest.rst
index 6e6a90d63d9..959275502cd 100644
--- a/docs/source/community/persons_of_interest.rst
+++ b/docs/source/community/persons_of_interest.rst
@@ -63,8 +63,7 @@ Distributed
 
 -  Pieter Noordhuis (`pietern <https://github.com/pietern>`__)
 -  Shen Li (`mrshenli <https://github.com/mrshenli>`__)
-..
- -  (proposed) Pritam Damania
+-  (proposed) Pritam Damania
    (`pritamdamania87 <https://github.com/pritamdamania87>`__)
 
 Multiprocessing and DataLoaders
diff --git a/docs/source/cuda.rst b/docs/source/cuda.rst
index e3dac806090..056ee3da0a5 100644
--- a/docs/source/cuda.rst
+++ b/docs/source/cuda.rst
@@ -52,7 +52,9 @@ Memory management
 .. autofunction:: reset_max_memory_allocated
 .. autofunction:: memory_reserved
 .. autofunction:: max_memory_reserved
-.. autofunction:: reset_max_memory_reserved
+.. FIXME The following doesn't seem to exist. Is it supposed to?
+   https://github.com/pytorch/pytorch/issues/27785
+   .. autofunction:: reset_max_memory_reserved
 .. autofunction:: memory_cached
 .. autofunction:: max_memory_cached
 .. autofunction:: reset_max_memory_cached
diff --git a/docs/source/distributed.rst b/docs/source/distributed.rst
index f0d35df46ea..c38b26fc05c 100644
--- a/docs/source/distributed.rst
+++ b/docs/source/distributed.rst
@@ -410,7 +410,7 @@ both python2 and python3.
 Spawn utility
 -------------
 
-The :doc:`torch.multiprocessing` package also provides a ``spawn``
+The :ref:`multiprocessing-doc` package also provides a ``spawn``
 function in :func:`torch.multiprocessing.spawn`. This helper function
 can be used to spawn multiple processes. It works by passing in the
 function that you want to run and spawns N processes to run it. This
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 357b8088b6f..cb9a044b983 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -16,6 +16,7 @@ PyTorch is an optimized tensor library for deep learning using GPUs and CPUs.
    :caption: Notes
 
    notes/*
+
 * `PyTorch on XLA Devices <http://pytorch.org/xla/>`_
 
 .. toctree::
diff --git a/docs/source/multiprocessing.rst b/docs/source/multiprocessing.rst
index f45563e23b6..c057dc38121 100644
--- a/docs/source/multiprocessing.rst
+++ b/docs/source/multiprocessing.rst
@@ -1,3 +1,6 @@
+:orphan:
+.. _multiprocessing-doc:
+
 Multiprocessing package - torch.multiprocessing
 ===============================================
 
diff --git a/docs/source/quantization.rst b/docs/source/quantization.rst
index 6e3c04e87e6..5b5bf3301e8 100644
--- a/docs/source/quantization.rst
+++ b/docs/source/quantization.rst
@@ -373,7 +373,7 @@ Top-level quantization APIs
 .. autofunction:: convert
 .. autoclass:: QConfig
 .. autoclass:: QConfigDynamic
-.. autoattr:: default_qconfig
+.. autoattribute:: default_qconfig
 
 Preparing model for quantization
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/docs/source/random.rst b/docs/source/random.rst
index c7eaa945a72..150a8e5f484 100644
--- a/docs/source/random.rst
+++ b/docs/source/random.rst
@@ -8,14 +8,13 @@ torch.random
 
 Random Number Generator
 -------------------------
+.. FIXME: We're missing torch.random.cuda docs.
+   https://github.com/pytorch/pytorch/issues/27778
+
 .. autofunction:: get_rng_state
-.. autofunction:: get_rng_state_all
 .. autofunction:: set_rng_state
-.. autofunction:: set_rng_state_all
 .. autofunction:: manual_seed
-.. autofunction:: manual_seed_all
 .. autofunction:: seed
-.. autofunction:: seed_all
 .. autofunction:: initial_seed
 .. autofunction:: fork_rng
 
diff --git a/docs/source/tensors.rst b/docs/source/tensors.rst
index 285d647fb9e..e103c5150db 100644
--- a/docs/source/tensors.rst
+++ b/docs/source/tensors.rst
@@ -148,6 +148,7 @@ view of a storage and defines numeric operations on it.
    .. autoattribute:: is_cuda
    .. autoattribute:: device
    .. autoattribute:: grad
+      :noindex:
    .. autoattribute:: ndim
    .. autoattribute:: T
 
@@ -183,6 +184,7 @@ view of a storage and defines numeric operations on it.
    .. automethod:: atan2_
    .. automethod:: atan_
    .. automethod:: backward
+      :noindex:
    .. automethod:: baddbmm
    .. automethod:: baddbmm_
    .. automethod:: bernoulli
@@ -222,7 +224,9 @@ view of a storage and defines numeric operations on it.
    .. automethod:: det
    .. automethod:: dense_dim
    .. automethod:: detach
+      :noindex:
    .. automethod:: detach_
+      :noindex:
    .. automethod:: diag
    .. automethod:: diag_embed
    .. automethod:: diagflat
@@ -295,12 +299,13 @@ view of a storage and defines numeric operations on it.
    .. automethod:: irfft
    .. automethod:: is_contiguous
    .. automethod:: is_floating_point
-   .. automethod:: is_leaf
+   .. autoattribute:: is_leaf
+      :noindex:
    .. automethod:: is_pinned
    .. automethod:: is_set_to
    .. automethod:: is_shared
    .. automethod:: is_signed
-   .. automethod:: is_sparse
+   .. autoattribute:: is_sparse
    .. automethod:: item
    .. automethod:: kthvalue
    .. automethod:: le
@@ -382,6 +387,7 @@ view of a storage and defines numeric operations on it.
    .. automethod:: reciprocal_
    .. automethod:: record_stream
    .. automethod:: register_hook
+      :noindex:
    .. automethod:: remainder
    .. automethod:: remainder_
    .. automethod:: real
@@ -389,13 +395,14 @@ view of a storage and defines numeric operations on it.
    .. automethod:: renorm_
    .. automethod:: repeat
    .. automethod:: repeat_interleave
-   .. automethod:: requires_grad
+   .. autoattribute:: requires_grad
    .. automethod:: requires_grad_
    .. automethod:: reshape
    .. automethod:: reshape_as
    .. automethod:: resize_
    .. automethod:: resize_as_
    .. automethod:: retain_grad
+      :noindex:
    .. automethod:: rfft
    .. automethod:: roll
    .. automethod:: rot90
diff --git a/docs/source/torch.rst b/docs/source/torch.rst
index bb19a6fd069..8fa0103450a 100644
--- a/docs/source/torch.rst
+++ b/docs/source/torch.rst
@@ -93,10 +93,13 @@ Random sampling
 .. autofunction:: set_rng_state
 .. autoattribute:: torch.default_generator
    :annotation:  Returns the default CPU torch.Generator
-.. autoattribute:: torch.cuda.default_generators
-   :annotation:  If cuda is available, returns a tuple of default CUDA torch.Generator-s.
-                 The number of CUDA torch.Generator-s returned is equal to the number of
-                 GPUs available in the system.
+
+.. The following doesn't actually seem to exist.
+   https://github.com/pytorch/pytorch/issues/27780
+   .. autoattribute:: torch.cuda.default_generators
+      :annotation:  If cuda is available, returns a tuple of default CUDA torch.Generator-s.
+                    The number of CUDA torch.Generator-s returned is equal to the number of
+                    GPUs available in the system.
 .. autofunction:: bernoulli
 .. autofunction:: multinomial
 .. autofunction:: normal
@@ -151,8 +154,7 @@ The context managers :func:`torch.no_grad`, :func:`torch.enable_grad`, and
 :func:`torch.set_grad_enabled` are helpful for locally disabling and enabling
 gradient computation. See :ref:`locally-disable-grad` for more details on
 their usage.  These context managers are thread local, so they won't
-work if you send work to another thread using the :module:`threading`
-module, etc.
+work if you send work to another thread using the ``threading`` module, etc.
 
 Examples::
 
diff --git a/torch/_tensor_docs.py b/torch/_tensor_docs.py
index 2b6f6e20f89..e56e57c5806 100644
--- a/torch/_tensor_docs.py
+++ b/torch/_tensor_docs.py
@@ -399,6 +399,7 @@ Example::
     tensor([[False, True]], dtype=torch.bool)
     >>> a.any()
     tensor(True, dtype=torch.bool)
+
 .. function:: any(dim, keepdim=False, out=None) -> Tensor
 
 Returns True if any elements in each row of the tensor in the given
diff --git a/torch/_torch_docs.py b/torch/_torch_docs.py
index 4fe0e1a92a7..a999f833ed7 100644
--- a/torch/_torch_docs.py
+++ b/torch/_torch_docs.py
@@ -6167,7 +6167,7 @@ Calculates determinant of a square matrix or batches of square matrices.
     :meth:`~torch.svd` for details.
 
 Arguments:
-    input (Tensor): the input tensor of size (*, n, n) where `*` is zero or more
+    input (Tensor): the input tensor of size ``(*, n, n)`` where ``*`` is zero or more
                 batch dimensions.
 
 Example::
@@ -6254,7 +6254,7 @@ Calculates log determinant of a square matrix or batches of square matrices.
     :meth:`~torch.svd` for details.
 
 Arguments:
-    input (Tensor): the input tensor of size (*, n, n) where `*` is zero or more
+    input (Tensor): the input tensor of size ``(*, n, n)`` where ``*`` is zero or more
                 batch dimensions.
 
 Example::
@@ -6295,7 +6295,7 @@ Calculates the sign and log absolute value of the determinant(s) of a square mat
     See :meth:`~torch.svd` for details.
 
 Arguments:
-    input (Tensor): the input tensor of size (*, n, n) where `*` is zero or more
+    input (Tensor): the input tensor of size ``(*, n, n)`` where ``*`` is zero or more
                 batch dimensions.
 
 Returns:
diff --git a/torch/cuda/streams.py b/torch/cuda/streams.py
index 1a69a4f9660..96f6c0383b1 100644
--- a/torch/cuda/streams.py
+++ b/torch/cuda/streams.py
@@ -27,13 +27,13 @@ class Stream(torch._C._CudaStreamBase):
         Arguments:
             event (Event): an event to wait for.
 
-        .. note:: This is a wrapper around ``cudaStreamWaitEvent()``: see `CUDA
-           documentation`_ for more info.
+        .. note:: This is a wrapper around ``cudaStreamWaitEvent()``: see
+           `CUDA Stream documentation`_ for more info.
 
            This function returns without waiting for :attr:`event`: only future
            operations are affected.
 
-        .. _CUDA documentation:
+        .. _CUDA Stream documentation:
            http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html
         """
         event.wait(self)
@@ -78,10 +78,7 @@ class Stream(torch._C._CudaStreamBase):
         r"""Wait for all the kernels in this stream to complete.
 
         .. note:: This is a wrapper around ``cudaStreamSynchronize()``: see
-           `CUDA documentation`_ for more info.
-
-        .. _CUDA documentation:
-           http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html
+           `CUDA Stream documentation`_ for more info.
         """
         super(Stream, self).synchronize()
 
@@ -121,8 +118,8 @@ class Event(torch._C._CudaEventBase):
         interprocess (bool): if ``True``, the event can be shared between processes
             (default: ``False``)
 
-       .. _CUDA documentation:
-       https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EVENT.html
+    .. _CUDA Event Documentation:
+    https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EVENT.html
     """
 
     def __new__(cls, enable_timing=False, blocking=False, interprocess=False):
@@ -174,11 +171,8 @@ class Event(torch._C._CudaEventBase):
         Waits until the completion of all work currently captured in this event.
         This prevents the CPU thread from proceeding until the event completes.
 
-         .. note:: This is a wrapper around ``cudaEventSynchronize()``: see `CUDA
-           documentation`_ for more info.
-
-        .. _CUDA documentation:
-           https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EVENT.html
+         .. note:: This is a wrapper around ``cudaEventSynchronize()``: see
+            `CUDA Event documentation`_ for more info.
         """
         super(Event, self).synchronize()
 
diff --git a/torch/distributions/lowrank_multivariate_normal.py b/torch/distributions/lowrank_multivariate_normal.py
index 56822a04a99..78246c5a269 100644
--- a/torch/distributions/lowrank_multivariate_normal.py
+++ b/torch/distributions/lowrank_multivariate_normal.py
@@ -47,6 +47,7 @@ class LowRankMultivariateNormal(Distribution):
     r"""
     Creates a multivariate normal distribution with covariance matrix having a low-rank form
     parameterized by :attr:`cov_factor` and :attr:`cov_diag`::
+
         covariance_matrix = cov_factor @ cov_factor.T + cov_diag
 
     Example:
@@ -69,6 +70,7 @@ class LowRankMultivariateNormal(Distribution):
         `matrix determinant lemma <https://en.wikipedia.org/wiki/Matrix_determinant_lemma>`_.
         Thanks to these formulas, we just need to compute the determinant and inverse of
         the small size "capacitance" matrix::
+
             capacitance = I + cov_factor.T @ inv(cov_diag) @ cov_factor
     """
     arg_constraints = {"loc": constraints.real,
diff --git a/torch/nn/quantized/modules/functional_modules.py b/torch/nn/quantized/modules/functional_modules.py
index f917e1bfb2d..3cb3e7bc8d8 100644
--- a/torch/nn/quantized/modules/functional_modules.py
+++ b/torch/nn/quantized/modules/functional_modules.py
@@ -13,7 +13,7 @@ class FloatFunctional(torch.nn.Module):
         This class does not provide a ``forward`` hook. Instead, you must use
         one of the underlying functions (e.g. ``add``).
 
-    .. Examples::
+    Examples::
 
         >>> f_add = FloatFunctional()
         >>> a = torch.tensor(3.0)
@@ -91,7 +91,7 @@ class QFunctional(torch.nn.Module):
         This class does not provide a ``forward`` hook. Instead, you must use
         one of the underlying functions (e.g. ``add``).
 
-    .. Examples::
+    Examples::
 
         >>> q_add = QFunctional('add')
         >>> a = torch.quantize_per_tensor(torch.tensor(3.0), 1.0, 0, torch.qint32)
diff --git a/torch/quantization/observer.py b/torch/quantization/observer.py
index 6aebb046d7f..b18c6d1ef52 100644
--- a/torch/quantization/observer.py
+++ b/torch/quantization/observer.py
@@ -639,7 +639,7 @@ class RecordingObserver(_ObserverBase):
 class NoopObserver(Observer):
     r"""
     Observer that doesn't do anything and just passes its configuration to the
-    quantized module's ``.from_float()`.
+    quantized module's ``.from_float()``.
 
     Primarily used for quantization to float16 which doesn't require determining
     ranges.