From 4a5dfaedfc8e7b85b30daf4a12bc6b48eea708cf Mon Sep 17 00:00:00 2001 From: Antonin RAFFIN Date: Wed, 29 Dec 2021 14:25:09 +0100 Subject: [PATCH] Update SB3 contrib doc (+ fix backward compat) (#707) * Fix `VecNormalize` load for SB3<= 1.3.0 * Update SB3 contrib doc * Bump version --- README.md | 3 ++- docs/guide/algos.rst | 3 ++- docs/guide/sb3_contrib.rst | 5 +++-- docs/misc/changelog.rst | 4 ++-- stable_baselines3/common/vec_env/vec_normalize.py | 2 +- stable_baselines3/version.txt | 2 +- 6 files changed, 11 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 4bd5364..f87bc6e 100644 --- a/README.md +++ b/README.md @@ -165,10 +165,11 @@ All the following examples can be executed online using Google colab notebooks: | DQN | :x: | :x: | :heavy_check_mark: | :x: | :x: | :heavy_check_mark: | | HER | :x: | :heavy_check_mark: | :heavy_check_mark: | :x: | :x: | :x: | | PPO | :x: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | +| QR-DQN[1](#f1) | :x: | :x: | :heavy_check_mark: | :x: | :x: | :heavy_check_mark: | | SAC | :x: | :heavy_check_mark: | :x: | :x: | :x: | :heavy_check_mark: | | TD3 | :x: | :heavy_check_mark: | :x: | :x: | :x: | :heavy_check_mark: | -| QR-DQN[1](#f1) | :x: | :x: | :heavy_check_mark: | :x: | :x: | :heavy_check_mark: | | TQC[1](#f1) | :x: | :heavy_check_mark: | :x: | :x: | :x: | :heavy_check_mark: | +| TRPO[1](#f1) | :x: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | | Maskable PPO[1](#f1) | :x: | :x: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | 1: Implemented in [SB3 Contrib](https://github.com/Stable-Baselines-Team/stable-baselines3-contrib) GitHub repository. diff --git a/docs/guide/algos.rst b/docs/guide/algos.rst index e362f8a..7503ebd 100644 --- a/docs/guide/algos.rst +++ b/docs/guide/algos.rst @@ -13,10 +13,11 @@ DDPG ✔️ ❌ ❌ ❌ DQN ❌ ✔️ ❌ ❌ ✔️ HER ✔️ ✔️ ❌ ❌ ❌ PPO ✔️ ✔️ ✔️ ✔️ ✔️ +QR-DQN [#f1]_ ❌ ️ ✔️ ❌ ❌ ✔️ SAC ✔️ ❌ ❌ ❌ ✔️ TD3 ✔️ ❌ ❌ ❌ ✔️ -QR-DQN [#f1]_ ❌ ️ ✔️ ❌ ❌ ✔️ TQC [#f1]_ ✔️ ❌ ❌ ❌ ✔️ +TRPO [#f1]_ ✔️ ✔️ ✔️ ✔️ ✔️ Maskable PPO [#f1]_ ❌ ✔️ ✔️ ✔️ ✔️ =================== =========== ============ ================= =============== ================ diff --git a/docs/guide/sb3_contrib.rst b/docs/guide/sb3_contrib.rst index 3057564..6d0ab13 100644 --- a/docs/guide/sb3_contrib.rst +++ b/docs/guide/sb3_contrib.rst @@ -8,7 +8,7 @@ We implement experimental features in a separate contrib repository: `SB3-Contrib`_ This allows Stable-Baselines3 (SB3) to maintain a stable and compact core, while still -providing the latest features, like Truncated Quantile Critics (TQC) or +providing the latest features, like Truncated Quantile Critics (TQC), Trust Region Policy Optimization (TRPO) or Quantile Regression DQN (QR-DQN). Why create this repository? @@ -36,8 +36,9 @@ See documentation for the full list of included features. **RL Algorithms**: -- `Truncated Quantile Critics (TQC)`_ - `Quantile Regression DQN (QR-DQN)`_ +- `Truncated Quantile Critics (TQC)`_ +- `Trust Region Policy Optimization (TRPO) `_ - `PPO with invalid action masking (Maskable PPO) `_ **Gym Wrappers**: diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst index 989e021..85598f7 100644 --- a/docs/misc/changelog.rst +++ b/docs/misc/changelog.rst @@ -4,7 +4,7 @@ Changelog ========== -Release 1.3.1a7 (WIP) +Release 1.3.1a8 (WIP) --------------------------- Breaking Changes: @@ -53,7 +53,7 @@ Documentation: - Added link to RL Tips and Tricks video - Updated ``BaseAlgorithm.load`` docstring (@Demetrio92) - Added a note on ``load`` behavior in the examples (@Demetrio92) - +- Updated SB3 Contrib doc Release 1.3.0 (2021-10-23) --------------------------- diff --git a/stable_baselines3/common/vec_env/vec_normalize.py b/stable_baselines3/common/vec_env/vec_normalize.py index 0448adf..3adf0e7 100644 --- a/stable_baselines3/common/vec_env/vec_normalize.py +++ b/stable_baselines3/common/vec_env/vec_normalize.py @@ -117,7 +117,7 @@ class VecNormalize(VecEnvWrapper): :param state:""" # Backward compatibility - if "norm_obs_keys" not in state: + if "norm_obs_keys" not in state and isinstance(state["observation_space"], gym.spaces.Dict): state["norm_obs_keys"] = list(state["observation_space"].spaces.keys()) self.__dict__.update(state) assert "venv" not in state diff --git a/stable_baselines3/version.txt b/stable_baselines3/version.txt index f625807..b750066 100644 --- a/stable_baselines3/version.txt +++ b/stable_baselines3/version.txt @@ -1 +1 @@ -1.3.1a7 +1.3.1a8