From 88e1be9ff5e04b7688efa44951f845b7daf5717f Mon Sep 17 00:00:00 2001 From: Antonin RAFFIN Date: Sun, 23 May 2021 13:13:11 +0200 Subject: [PATCH] Documentation update (#450) * Update migration guide * Add sanity check * Removed parameter ``channels_last`` from ``is_image_space`` * Pin docutils * Clarify callback `save_freq` definition * Update docs/misc/changelog.rst * Update docs/misc/changelog.rst * Fix typos Co-authored-by: Anssi --- docs/conda_env.yml | 3 +++ docs/guide/callbacks.rst | 9 ++++++++- docs/guide/migration.rst | 3 ++- docs/misc/changelog.rst | 8 +++++++- stable_baselines3/common/callbacks.py | 17 +++++++++++++++-- stable_baselines3/common/preprocessing.py | 11 ++++------- stable_baselines3/common/torch_layers.py | 2 +- stable_baselines3/ppo/ppo.py | 9 ++++++++- tests/test_cnn.py | 7 ++++++- 9 files changed, 54 insertions(+), 15 deletions(-) diff --git a/docs/conda_env.yml b/docs/conda_env.yml index 9ea054e..290b499 100644 --- a/docs/conda_env.yml +++ b/docs/conda_env.yml @@ -15,3 +15,6 @@ dependencies: - numpy - matplotlib - sphinx_autodoc_typehints + # Tmp fix, docutils==0.17 breaks rtd theme + # See https://github.com/readthedocs/sphinx_rtd_theme/issues/1115 + - docutils==0.16 diff --git a/docs/guide/callbacks.rst b/docs/guide/callbacks.rst index f5d9d02..810e367 100644 --- a/docs/guide/callbacks.rst +++ b/docs/guide/callbacks.rst @@ -155,9 +155,16 @@ Stable Baselines provides you with a set of common callbacks for: CheckpointCallback ^^^^^^^^^^^^^^^^^^ -Callback for saving a model every ``save_freq`` steps, you must specify a log folder (``save_path``) +Callback for saving a model every ``save_freq`` calls to ``env.step()``, you must specify a log folder (``save_path``) and optionally a prefix for the checkpoints (``rl_model`` by default). +.. warning:: + + When using multiple environments, each call to ``env.step()`` will effectively correspond to ``n_envs`` steps. + If you want the ``save_freq`` to be similar when using different number of environments, + you need to account for it using ``save_freq = max(save_freq // n_envs, 1)``. + The same goes for the other callbacks. + .. code-block:: python diff --git a/docs/guide/migration.rst b/docs/guide/migration.rst index 9b25f95..c9c15cd 100644 --- a/docs/guide/migration.rst +++ b/docs/guide/migration.rst @@ -46,7 +46,8 @@ Breaking Changes - SB3 requires python 3.6+ (instead of python 3.5+ for SB2) - Dropped MPI support -- Dropped layer normalized policies (e.g. ``LnMlpPolicy``) +- Dropped layer normalized policies (``MlpLnLstmPolicy``, ``CnnLnLstmPolicy``) +- LSTM policies (```MlpLstmPolicy```, ```CnnLstmPolicy```) are not supported for the time being - Dropped parameter noise for DDPG and DQN - PPO is now closer to the original implementation (no clipping of the value function by default), cf PPO section below - Orthogonal initialization is only used by A2C/PPO diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst index 6173221..0794a3f 100644 --- a/docs/misc/changelog.rst +++ b/docs/misc/changelog.rst @@ -27,6 +27,7 @@ Breaking Changes: - Updated the KL Divergence estimator in the PPO algorithm to be positive definite and have lower variance (@09tangriro) - Updated the KL Divergence check in the PPO algorithm to be before the gradient update step rather than after end of epoch (@09tangriro) +- Removed parameter ``channels_last`` from ``is_image_space`` as it can be inferred. New Features: ^^^^^^^^^^^^^ @@ -62,6 +63,7 @@ Others: - Added Code of Conduct - Added tests for GAE and lambda return computation - Updated distribution entropy test (thanks @09tangriro) +- Added sanity check ``batch_size > 1`` in PPO to avoid NaN in advantage normalization Documentation: ^^^^^^^^^^^^^^ @@ -75,6 +77,10 @@ Documentation: - Added example for using ``ProcgenEnv`` - Added note about advanced custom policy example for off-policy algorithms - Fixed DQN unicode checkmarks +- Updated migration guide (@juancroldan) +- Pinned ``docutils==0.16`` to avoid issue with rtd theme +- Clarified callback ``save_freq`` definition + Release 1.0 (2021-03-15) ------------------------ @@ -690,4 +696,4 @@ And all the contributors: @tirafesi @blurLake @koulakis @joeljosephjin @shwang @rk37 @andyshih12 @RaphaelWag @xicocaio @diditforlulz273 @liorcohen5 @ManifoldFR @mloo3 @SwamyDev @wmmc88 @megan-klaiber @thisray @tfederico @hn2 @LucasAlegre @AptX395 @zampanteymedio @JadenTravnik @decodyng @ardabbour @lorenz-h @mschweizer @lorepieri8 @vwxyzjn -@ShangqunYu @PierreExeter @JacopoPan @ltbd78 @tom-doerr @Atlis @liusida @09tangriro @amy12xx +@ShangqunYu @PierreExeter @JacopoPan @ltbd78 @tom-doerr @Atlis @liusida @09tangriro @amy12xx @juancroldan diff --git a/stable_baselines3/common/callbacks.py b/stable_baselines3/common/callbacks.py index fa806a8..4684383 100644 --- a/stable_baselines3/common/callbacks.py +++ b/stable_baselines3/common/callbacks.py @@ -212,7 +212,14 @@ class CallbackList(BaseCallback): class CheckpointCallback(BaseCallback): """ - Callback for saving a model every ``save_freq`` steps + Callback for saving a model every ``save_freq`` calls + to ``env.step()``. + + .. warning:: + + When using multiple environments, each call to ``env.step()`` + will effectively correspond to ``n_envs`` steps. + To account for that, you can use ``save_freq = max(save_freq // n_envs, 1)`` :param save_freq: :param save_path: Path to the folder where the model will be saved. @@ -262,11 +269,17 @@ class EvalCallback(EventCallback): """ Callback for evaluating an agent. + .. warning:: + + When using multiple environments, each call to ``env.step()`` + will effectively correspond to ``n_envs`` steps. + To account for that, you can use ``eval_freq = max(eval_freq // n_envs, 1)`` + :param eval_env: The environment used for initialization :param callback_on_new_best: Callback to trigger when there is a new best model according to the ``mean_reward`` :param n_eval_episodes: The number of episodes to test the agent - :param eval_freq: Evaluate the agent every eval_freq call of the callback. + :param eval_freq: Evaluate the agent every ``eval_freq`` call of the callback. :param log_path: Path to a folder where the evaluations (``evaluations.npz``) will be saved. It will be updated at each evaluation. :param best_model_save_path: Path to a folder where the best model diff --git a/stable_baselines3/common/preprocessing.py b/stable_baselines3/common/preprocessing.py index 7aaeb12..01422aa 100644 --- a/stable_baselines3/common/preprocessing.py +++ b/stable_baselines3/common/preprocessing.py @@ -26,19 +26,16 @@ def is_image_space_channels_first(observation_space: spaces.Box) -> bool: def is_image_space( observation_space: spaces.Space, - channels_last: bool = True, check_channels: bool = False, ) -> bool: """ Check if a observation space has the shape, limits and dtype of a valid image. - The check is conservative, so that it returns False - if there is a doubt. + The check is conservative, so that it returns False if there is a doubt. Valid images: RGB, RGBD, GrayScale with values in [0, 255] :param observation_space: - :param channels_last: :param check_channels: Whether to do or not the check for the number of channels. e.g., with frame-stacking, the observation space may have more channels than expected. :return: @@ -56,10 +53,10 @@ def is_image_space( if not check_channels: return True # Check the number of channels - if channels_last: - n_channels = observation_space.shape[-1] - else: + if is_image_space_channels_first(observation_space): n_channels = observation_space.shape[0] + else: + n_channels = observation_space.shape[-1] # RGB, RGBD, GrayScale return n_channels in [1, 3, 4] return False diff --git a/stable_baselines3/common/torch_layers.py b/stable_baselines3/common/torch_layers.py index 16088f3..71b647b 100644 --- a/stable_baselines3/common/torch_layers.py +++ b/stable_baselines3/common/torch_layers.py @@ -64,7 +64,7 @@ class NatureCNN(BaseFeaturesExtractor): super(NatureCNN, self).__init__(observation_space, features_dim) # We assume CxHxW images (channels first) # Re-ordering will be done by pre-preprocessing or wrapper - assert is_image_space(observation_space), ( + assert is_image_space(observation_space, check_channels=False), ( "You should use NatureCNN " f"only with images not with {observation_space}\n" "(you are probably using `CnnPolicy` instead of `MlpPolicy` or `MultiInputPolicy`)\n" diff --git a/stable_baselines3/ppo/ppo.py b/stable_baselines3/ppo/ppo.py index 34737c7..660eb32 100644 --- a/stable_baselines3/ppo/ppo.py +++ b/stable_baselines3/ppo/ppo.py @@ -118,6 +118,13 @@ class PPO(OnPolicyAlgorithm): spaces.MultiBinary, ), ) + + # Sanity check, otherwise it will lead to noisy gradient and NaN + # because of the advantage normalization + assert ( + batch_size > 1 + ), "`batch_size` must be greater than 1. See https://github.com/DLR-RM/stable-baselines3/issues/440" + if self.env is not None: # Check that `n_steps * n_envs > 1` to avoid NaN # when doing advantage normalization @@ -133,7 +140,7 @@ class PPO(OnPolicyAlgorithm): f" but because the `RolloutBuffer` is of size `n_steps * n_envs = {buffer_size}`," f" after every {untruncated_batches} untruncated mini-batches," f" there will be a truncated mini-batch of size {buffer_size % batch_size}\n" - f"We recommend using a `batch_size` that is a multiple of `n_steps * n_envs`.\n" + f"We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n" f"Info: (n_steps={self.n_steps} and n_envs={self.env.num_envs})" ) self.batch_size = batch_size diff --git a/tests/test_cnn.py b/tests/test_cnn.py index 5cac58e..8ec33fb 100644 --- a/tests/test_cnn.py +++ b/tests/test_cnn.py @@ -247,7 +247,12 @@ def test_image_space_checks(): assert not is_image_space(not_image_space) an_image_space = spaces.Box(0, 255, shape=(10, 10, 3), dtype=np.uint8) - assert is_image_space(an_image_space) + assert is_image_space(an_image_space, check_channels=False) + assert is_image_space(an_image_space, check_channels=True) + + channel_first_image_space = spaces.Box(0, 255, shape=(3, 10, 10), dtype=np.uint8) + assert is_image_space(channel_first_image_space, check_channels=False) + assert is_image_space(channel_first_image_space, check_channels=True) an_image_space_with_odd_channels = spaces.Box(0, 255, shape=(10, 10, 5), dtype=np.uint8) assert is_image_space(an_image_space_with_odd_channels)