From cd6e04705bfc9606a518d8ebee7084045389f3ff Mon Sep 17 00:00:00 2001 From: Antonin RAFFIN Date: Tue, 18 Jan 2022 15:10:25 +0100 Subject: [PATCH] Update SB3 Contrib doc (ARS) and W&B integration (#726) * Add ARS to SB3 contrib * Add integration page --- README.md | 1 + docs/guide/algos.rst | 1 + docs/guide/integrations.rst | 49 +++++++++++++++++++++++++++++++++++++ docs/guide/sb3_contrib.rst | 3 ++- docs/index.rst | 1 + docs/misc/changelog.rst | 6 +++++ 6 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 docs/guide/integrations.rst diff --git a/README.md b/README.md index f87bc6e..8c71a86 100644 --- a/README.md +++ b/README.md @@ -160,6 +160,7 @@ All the following examples can be executed online using Google colab notebooks: | **Name** | **Recurrent** | `Box` | `Discrete` | `MultiDiscrete` | `MultiBinary` | **Multi Processing** | | ------------------- | ------------------ | ------------------ | ------------------ | ------------------- | ------------------ | --------------------------------- | +| ARS[1](#f1) | :x: | :heavy_check_mark: | :heavy_check_mark: | :x: | :x: | :heavy_check_mark: | | A2C | :x: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | | DDPG | :x: | :heavy_check_mark: | :x: | :x: | :x: | :heavy_check_mark: | | DQN | :x: | :x: | :heavy_check_mark: | :x: | :x: | :heavy_check_mark: | diff --git a/docs/guide/algos.rst b/docs/guide/algos.rst index 7503ebd..474a047 100644 --- a/docs/guide/algos.rst +++ b/docs/guide/algos.rst @@ -8,6 +8,7 @@ along with some useful characteristics: support for discrete/continuous actions, =================== =========== ============ ================= =============== ================ Name ``Box`` ``Discrete`` ``MultiDiscrete`` ``MultiBinary`` Multi Processing =================== =========== ============ ================= =============== ================ +ARS [#f1]_ ✔️ ✔️ ❌ ❌ ✔️ A2C ✔️ ✔️ ✔️ ✔️ ✔️ DDPG ✔️ ❌ ❌ ❌ ✔️ DQN ❌ ✔️ ❌ ❌ ✔️ diff --git a/docs/guide/integrations.rst b/docs/guide/integrations.rst new file mode 100644 index 0000000..97d197f --- /dev/null +++ b/docs/guide/integrations.rst @@ -0,0 +1,49 @@ +.. _integrations: + +============ +Integrations +============ + +Weights & Biases +================ + +Weights & Biases provides a callback for experiment tracking that allows to visualize and share results. + +The full documentation is available here: https://docs.wandb.ai/guides/integrations/other/stable-baselines-3 + +.. code-block:: python + + import gym + import wandb + from wandb.integration.sb3 import WandbCallback + + from stable_baselines3 import PPO + + config = { + "policy_type": "MlpPolicy", + "total_timesteps": 25000, + "env_name": "CartPole-v1", + } + run = wandb.init( + project="sb3", + config=config, + sync_tensorboard=True, # auto-upload sb3's tensorboard metrics + # monitor_gym=True, # auto-upload the videos of agents playing the game + # save_code=True, # optional + ) + + model = PPO(config["policy_type"], config["env_name"], verbose=1, tensorboard_log=f"runs/{run.id}") + model.learn( + total_timesteps=config["total_timesteps"], + callback=WandbCallback( + model_save_path=f"models/{run.id}", + verbose=2, + ), + ) + run.finish() + + +Hugging Face +============ + +To be added. diff --git a/docs/guide/sb3_contrib.rst b/docs/guide/sb3_contrib.rst index 6d0ab13..1dfa912 100644 --- a/docs/guide/sb3_contrib.rst +++ b/docs/guide/sb3_contrib.rst @@ -8,7 +8,7 @@ We implement experimental features in a separate contrib repository: `SB3-Contrib`_ This allows Stable-Baselines3 (SB3) to maintain a stable and compact core, while still -providing the latest features, like Truncated Quantile Critics (TQC), Trust Region Policy Optimization (TRPO) or +providing the latest features, like Truncated Quantile Critics (TQC), Augmented Random Search (ARS), Trust Region Policy Optimization (TRPO) or Quantile Regression DQN (QR-DQN). Why create this repository? @@ -36,6 +36,7 @@ See documentation for the full list of included features. **RL Algorithms**: +- `Augmented Random Search (ARS) `_ - `Quantile Regression DQN (QR-DQN)`_ - `Truncated Quantile Critics (TQC)`_ - `Trust Region Policy Optimization (TRPO) `_ diff --git a/docs/index.rst b/docs/index.rst index 48746b7..b3ec4cc 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -48,6 +48,7 @@ Main Features guide/custom_policy guide/callbacks guide/tensorboard + guide/integrations guide/rl_zoo guide/sb3_contrib guide/imitation diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst index dfde978..7056ae1 100644 --- a/docs/misc/changelog.rst +++ b/docs/misc/changelog.rst @@ -23,6 +23,11 @@ New Features: - Added ``skip`` option to ``VecTransposeImage`` to skip transforming the channel order when the heuristic is wrong - Added ``copy()`` and ``combine()`` methods to ``RunningMeanStd`` +SB3-Contrib +^^^^^^^^^^^ +- Added Trust Region Policy Optimization (TRPO) (@cyprienc) +- Added Augmented Random Search (ARS) (@sgillen) + Bug Fixes: ^^^^^^^^^^ - Fixed a bug where ``set_env()`` with ``VecNormalize`` would result in an error with off-policy algorithms (thanks @cleversonahum) @@ -57,6 +62,7 @@ Documentation: - Updated SB3 Contrib doc - Fixed A2C and migration guide guidance on how to set epsilon with RMSpropTFLike (@thomasgubler) - Fixed custom policy documentation (@IperGiove) +- Added doc on Weights & Biases integration Release 1.3.0 (2021-10-23) ---------------------------