mirror of
https://github.com/saymrwulf/stable-baselines3.git
synced 2026-05-14 20:58:03 +00:00
Add policy documentation links to policy_kwargs parameter (#2050)
* docs: Add policy documentation links to policy_kwargs parameter * Fix missing references, update changelog --------- Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
This commit is contained in:
parent
897d01d225
commit
9caa168686
9 changed files with 16 additions and 11 deletions
|
|
@ -38,6 +38,7 @@ Documentation:
|
|||
^^^^^^^^^^^^^^
|
||||
- Added Decisions and Dragons to resources. (@jmacglashan)
|
||||
- Updated PyBullet example, now compatible with Gymnasium
|
||||
- Added link to policies for ``policy_kwargs`` parameter (@kplers)
|
||||
|
||||
Release 2.4.0 (2024-11-18)
|
||||
--------------------------
|
||||
|
|
@ -1738,4 +1739,4 @@ And all the contributors:
|
|||
@DavyMorgan @luizapozzobon @Bonifatius94 @theSquaredError @harveybellini @DavyMorgan @FieteO @jonasreiher @npit @WeberSamuel @troiganto
|
||||
@lutogniew @lbergmann1 @lukashass @BertrandDecoster @pseudo-rnd-thoughts @stefanbschneider @kyle-he @PatrickHelm @corentinlger
|
||||
@marekm4 @stagoverflow @rushitnshah @markscsmith @NickLucche @cschindlbeck @peteole @jak3122 @will-maclean
|
||||
@brn-dev @jmacglashan
|
||||
@brn-dev @jmacglashan @kplers
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ Train a A2C agent on ``CartPole-v1`` using 4 environments.
|
|||
|
||||
A2C is meant to be run primarily on the CPU, especially when you are not using a CNN. To improve CPU utilization, try turning off the GPU and using ``SubprocVecEnv`` instead of the default ``DummyVecEnv``:
|
||||
|
||||
.. code-block::
|
||||
.. code-block:: python
|
||||
|
||||
from stable_baselines3 import A2C
|
||||
from stable_baselines3.common.env_util import make_vec_env
|
||||
|
|
@ -88,7 +88,7 @@ Train a A2C agent on ``CartPole-v1`` using 4 environments.
|
|||
env = make_vec_env("CartPole-v1", n_envs=8, vec_env_cls=SubprocVecEnv)
|
||||
model = A2C("MlpPolicy", env, device="cpu")
|
||||
model.learn(total_timesteps=25_000)
|
||||
|
||||
|
||||
For more information, see :ref:`Vectorized Environments <vec_env>`, `Issue #1245 <https://github.com/DLR-RM/stable-baselines3/issues/1245>`_ or the `Multiprocessing notebook <https://colab.research.google.com/github/Stable-Baselines-Team/rl-colab-notebooks/blob/sb3/multiprocessing_rl.ipynb>`_.
|
||||
|
||||
|
||||
|
|
@ -165,6 +165,8 @@ Parameters
|
|||
:inherited-members:
|
||||
|
||||
|
||||
.. _a2c_policies:
|
||||
|
||||
A2C Policies
|
||||
-------------
|
||||
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ Train a PPO agent on ``CartPole-v1`` using 4 environments.
|
|||
|
||||
PPO is meant to be run primarily on the CPU, especially when you are not using a CNN. To improve CPU utilization, try turning off the GPU and using ``SubprocVecEnv`` instead of the default ``DummyVecEnv``:
|
||||
|
||||
.. code-block::
|
||||
.. code-block:: python
|
||||
|
||||
from stable_baselines3 import PPO
|
||||
from stable_baselines3.common.env_util import make_vec_env
|
||||
|
|
@ -102,7 +102,7 @@ Train a PPO agent on ``CartPole-v1`` using 4 environments.
|
|||
env = make_vec_env("CartPole-v1", n_envs=8, vec_env_cls=SubprocVecEnv)
|
||||
model = PPO("MlpPolicy", env, device="cpu")
|
||||
model.learn(total_timesteps=25_000)
|
||||
|
||||
|
||||
For more information, see :ref:`Vectorized Environments <vec_env>`, `Issue #1245 <https://github.com/DLR-RM/stable-baselines3/issues/1245#issuecomment-1435766949>`_ or the `Multiprocessing notebook <https://colab.research.google.com/github/Stable-Baselines-Team/rl-colab-notebooks/blob/sb3/multiprocessing_rl.ipynb>`_.
|
||||
|
||||
Results
|
||||
|
|
@ -178,6 +178,8 @@ Parameters
|
|||
:inherited-members:
|
||||
|
||||
|
||||
.. _ppo_policies:
|
||||
|
||||
PPO Policies
|
||||
-------------
|
||||
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ class A2C(OnPolicyAlgorithm):
|
|||
:param stats_window_size: Window size for the rollout logging, specifying the number of episodes to average
|
||||
the reported success rate, mean episode length, and mean reward over
|
||||
:param tensorboard_log: the log location for tensorboard (if None, no logging)
|
||||
:param policy_kwargs: additional arguments to be passed to the policy on creation
|
||||
:param policy_kwargs: additional arguments to be passed to the policy on creation. See :ref:`a2c_policies`
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for
|
||||
debug messages
|
||||
:param seed: Seed for the pseudo random generators
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ class DDPG(TD3):
|
|||
:param optimize_memory_usage: Enable a memory efficient variant of the replay buffer
|
||||
at a cost of more complexity.
|
||||
See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195
|
||||
:param policy_kwargs: additional arguments to be passed to the policy on creation
|
||||
:param policy_kwargs: additional arguments to be passed to the policy on creation. See :ref:`ddpg_policies`
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for
|
||||
debug messages
|
||||
:param seed: Seed for the pseudo random generators
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ class DQN(OffPolicyAlgorithm):
|
|||
:param stats_window_size: Window size for the rollout logging, specifying the number of episodes to average
|
||||
the reported success rate, mean episode length, and mean reward over
|
||||
:param tensorboard_log: the log location for tensorboard (if None, no logging)
|
||||
:param policy_kwargs: additional arguments to be passed to the policy on creation
|
||||
:param policy_kwargs: additional arguments to be passed to the policy on creation. See :ref:`dqn_policies`
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for
|
||||
debug messages
|
||||
:param seed: Seed for the pseudo random generators
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ class PPO(OnPolicyAlgorithm):
|
|||
:param stats_window_size: Window size for the rollout logging, specifying the number of episodes to average
|
||||
the reported success rate, mean episode length, and mean reward over
|
||||
:param tensorboard_log: the log location for tensorboard (if None, no logging)
|
||||
:param policy_kwargs: additional arguments to be passed to the policy on creation
|
||||
:param policy_kwargs: additional arguments to be passed to the policy on creation. See :ref:`ppo_policies`
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for
|
||||
debug messages
|
||||
:param seed: Seed for the pseudo random generators
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ class SAC(OffPolicyAlgorithm):
|
|||
:param stats_window_size: Window size for the rollout logging, specifying the number of episodes to average
|
||||
the reported success rate, mean episode length, and mean reward over
|
||||
:param tensorboard_log: the log location for tensorboard (if None, no logging)
|
||||
:param policy_kwargs: additional arguments to be passed to the policy on creation
|
||||
:param policy_kwargs: additional arguments to be passed to the policy on creation. See :ref:`sac_policies`
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for
|
||||
debug messages
|
||||
:param seed: Seed for the pseudo random generators
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ class TD3(OffPolicyAlgorithm):
|
|||
:param stats_window_size: Window size for the rollout logging, specifying the number of episodes to average
|
||||
the reported success rate, mean episode length, and mean reward over
|
||||
:param tensorboard_log: the log location for tensorboard (if None, no logging)
|
||||
:param policy_kwargs: additional arguments to be passed to the policy on creation
|
||||
:param policy_kwargs: additional arguments to be passed to the policy on creation. See :ref:`td3_policies`
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for
|
||||
debug messages
|
||||
:param seed: Seed for the pseudo random generators
|
||||
|
|
|
|||
Loading…
Reference in a new issue