From b02afd6ee3c583fa0fb189e31fff6e56c8c95344 Mon Sep 17 00:00:00 2001 From: Antonin RAFFIN Date: Mon, 11 May 2020 12:28:43 +0200 Subject: [PATCH] Doc update (#15) --- .readthedocs.yml | 22 ++++++++++++++++++++++ docs/conf.py | 2 +- docs/guide/developer.rst | 22 +++++++++++----------- docs/misc/changelog.rst | 1 + docs/requirements.txt | 3 +++ 5 files changed, 38 insertions(+), 12 deletions(-) create mode 100644 .readthedocs.yml create mode 100644 docs/requirements.txt diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 0000000..39d81bd --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,22 @@ +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: docs/conf.py + +# Build documentation with MkDocs +#mkdocs: +# configuration: mkdocs.yml + +# Optionally build your docs in additional formats such as PDF and ePub +formats: all + +# Optionally set the version of Python and requirements required to build your docs +python: + version: 3.7 + install: + - requirements: docs/requirements.txt diff --git a/docs/conf.py b/docs/conf.py index 5768fc6..41df582 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -77,6 +77,7 @@ extensions = [ 'sphinx.ext.ifconfig', 'sphinx.ext.viewcode', # 'sphinx.ext.intersphinx', + # 'sphinx.ext.doctest' ] if enable_spell_check: @@ -218,5 +219,4 @@ texinfo_documents = [ # kornia's hack to get rtd builder to install latest pytorch if on_rtd: - os.system('pip install gym') os.system('pip install torch==1.5.0+cpu torchvision==0.6.0+cpu -f https://download.pytorch.org/whl/torch_stable.html') diff --git a/docs/guide/developer.rst b/docs/guide/developer.rst index 145a49f..17a0d54 100644 --- a/docs/guide/developer.rst +++ b/docs/guide/developer.rst @@ -4,7 +4,7 @@ Developer Guide ================ -This guide is meant for those who wants to understand the internals and the design choices of Stable-Baselines3. +This guide is meant for those who want to understand the internals and the design choices of Stable-Baselines3. At first, you should read the two issues where the design choices were discussed: @@ -19,14 +19,14 @@ The library is not meant to be modular, although inheritance is used to reduce c Algorithms Structure ==================== -Each algorithm (on-policy and off-policy ones) follow a common structure. +Each algorithm (on-policy and off-policy ones) follows a common structure. There is one folder per algorithm, and in that folder there is the algorithm and the policy definition (``policies.py``). -Each algorithm had two main methods: +Each algorithm has two main methods: - ``.collect_rollouts()`` which defines how new samples are collected, usually inherited from the base class. Those samples are then stored in a ``RolloutBuffer`` (discarded after the gradient update) or ``ReplayBuffer`` -- ``.train()`` which update the parameters using samples from the buffer +- ``.train()`` which updates the parameters using samples from the buffer Where to start? @@ -34,16 +34,16 @@ Where to start? The first thing you need to read and understand are the base classes in the ``common/`` folder: -- ``BaseRLModel`` in ``base_class.py`` which defines how an RL class should look like - it contains also all the "glue code" for saving/loading and the common operations (wrapping environments) +- ``BaseRLModel`` in ``base_class.py`` which defines how an RL class should look like. + It contains also all the "glue code" for saving/loading and the common operations (wrapping environments) -- ``BasePolicy`` in ``policies.py`` which defines how a policy class should look like - it contains also all the magic for the ``.predict()`` method, to handle as many cases as possible +- ``BasePolicy`` in ``policies.py`` which defines how a policy class should look like. + It contains also all the magic for the ``.predict()`` method, to handle as many cases as possible - ``OffPolicyRLModel`` in ``base_class.py`` that contains the implementation of ``collect_rollouts()`` for the off-policy algorithms -All the environments handled internally are assume to be ``VecEnv`` (``gym.Env`` are automatically wrapped). +All the environments handled internally are assumed to be ``VecEnv`` (``gym.Env`` are automatically wrapped). Pre-Processing @@ -59,7 +59,7 @@ Policy Structure ================ When we refer to "policy" in Stable-Baselines3, this is usually an abuse of language compared to RL terminology. -In SB3, "Policy" refers to the class that handle all the networks useful for training, +In SB3, "policy" refers to the class that handles all the networks useful for training, so not only the network used to predict actions (the "learned controller"). For instance, the ``TD3`` policy contains the actor, the critic and the target networks. @@ -69,7 +69,7 @@ Probability distributions When needed, the policies handle the different probability distributions. All distributions are located in ``common/distributions.py`` and follow the same interface. -Each distribution correspond to a type of action space (e.g. ``Categorical`` is the one used for discrete actions. +Each distribution corresponds to a type of action space (e.g. ``Categorical`` is the one used for discrete actions. For continuous actions, we can use multiple distributions ("DiagGaussian", "SquashedGaussian" or "StateDependentDistribution") State-Dependent Exploration diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst index 21d5f1f..c91bbf0 100644 --- a/docs/misc/changelog.rst +++ b/docs/misc/changelog.rst @@ -33,6 +33,7 @@ Others: - Added Dockerfile - Sync ``VecEnvs`` with Stable-Baselines - Update requirement: ``gym>=0.17`` +- Added ``.readthedoc.yml`` file Documentation: ^^^^^^^^^^^^^^ diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..0878e54 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,3 @@ +gym>=0.17 +pandas +matplotlib