mirror of
https://github.com/saymrwulf/stable-baselines3.git
synced 2026-07-03 03:59:13 +00:00
Docs: Env attributes should be modified using env setters (#1789)
* add: paragraph on how to modify vec envs attributes via setters (solves DLR-RM#1573) * Update vec env doc * Update callback doc and SB3 version * Fix indentation --------- Co-authored-by: Antonin Raffin <antonin.raffin@dlr.de>
This commit is contained in:
parent
373166d6ac
commit
a653aec10d
4 changed files with 127 additions and 9 deletions
|
|
@ -29,24 +29,25 @@ You can find two examples of custom callbacks in the documentation: one for savi
|
|||
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
|
||||
"""
|
||||
def __init__(self, verbose=0):
|
||||
def __init__(self, verbose: int = 0):
|
||||
super().__init__(verbose)
|
||||
# Those variables will be accessible in the callback
|
||||
# (they are defined in the base class)
|
||||
# The RL model
|
||||
# self.model = None # type: BaseAlgorithm
|
||||
# An alias for self.model.get_env(), the environment used for training
|
||||
# self.training_env = None # type: Union[gym.Env, VecEnv, None]
|
||||
# self.training_env # type: VecEnv
|
||||
# Number of time the callback was called
|
||||
# self.n_calls = 0 # type: int
|
||||
# num_timesteps = n_envs * n times env.step() was called
|
||||
# self.num_timesteps = 0 # type: int
|
||||
# local and global variables
|
||||
# self.locals = None # type: Dict[str, Any]
|
||||
# self.globals = None # type: Dict[str, Any]
|
||||
# self.locals = {} # type: Dict[str, Any]
|
||||
# self.globals = {} # type: Dict[str, Any]
|
||||
# The logger object, used to report things in the terminal
|
||||
# self.logger = None # stable_baselines3.common.logger
|
||||
# # Sometimes, for event callback, it is useful
|
||||
# # to have access to the parent object
|
||||
# self.logger # type: stable_baselines3.common.logger.Logger
|
||||
# Sometimes, for event callback, it is useful
|
||||
# to have access to the parent object
|
||||
# self.parent = None # type: Optional[BaseCallback]
|
||||
|
||||
def _on_training_start(self) -> None:
|
||||
|
|
|
|||
|
|
@ -96,6 +96,90 @@ SB3 VecEnv API is actually close to Gym 0.21 API but differs to Gym 0.26+ API:
|
|||
``vec_env.env_method("method_name", args1, args2, kwargs1=kwargs1)`` and ``vec_env.set_attr("attribute_name", new_value)``.
|
||||
|
||||
|
||||
Modifying Vectorized Environments Attributes
|
||||
--------------------------------------------
|
||||
|
||||
If you plan to `modify the attributes of an environment <https://github.com/DLR-RM/stable-baselines3/issues/1573>`_ while it is used (e.g., modifying an attribute specifying the task carried out for a portion of training when doing multi-task learning, or
|
||||
a parameter of the environment dynamics), you must expose a setter method.
|
||||
In fact, directly accessing the environment attribute in the callback can lead to unexpected behavior because environments can be wrapped (using gym or VecEnv wrappers, the ``Monitor`` wrapper being one example).
|
||||
|
||||
Consider the following example for a custom env:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import gymnasium as gym
|
||||
from gymnasium import spaces
|
||||
|
||||
from stable_baselines3.common.env_util import make_vec_env
|
||||
|
||||
|
||||
class MyMultiTaskEnv(gym.Env):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
"""
|
||||
A state and action space for robotic locomotion.
|
||||
The multi-task twist is that the policy would need to adapt to different terrains, each with its own
|
||||
friction coefficient, mu.
|
||||
The friction coefficient is the only parameter that changes between tasks.
|
||||
mu is a scalar between 0 and 1, and during training a callback is used to update mu.
|
||||
"""
|
||||
...
|
||||
|
||||
def step(self, action):
|
||||
# Do something, depending on the action and current value of mu the next state is computed
|
||||
return self._get_obs(), reward, done, truncated, info
|
||||
|
||||
def set_mu(self, new_mu: float) -> None:
|
||||
# Note: this value should be used only at the next reset
|
||||
self.mu = new_mu
|
||||
|
||||
# Example of wrapped env
|
||||
# env is of type <TimeLimit<OrderEnforcing<PassiveEnvChecker<CartPoleEnv<CartPole-v1>>>>>
|
||||
env = gym.make("CartPole-v1")
|
||||
# To access the base env, without wrapper, you should use `.unwrapped`
|
||||
# or env.get_wrapper_attr("gravity") to include wrappers
|
||||
env.unwrapped.gravity
|
||||
# SB3 uses VecEnv for training, where `env.unwrapped.x = new_value` cannot be used to set an attribute
|
||||
# therefore, you should expose a setter like `set_mu` to properly set an attribute
|
||||
vec_env = make_vec_env(MyMultiTaskEnv)
|
||||
# Print current mu value
|
||||
# Note: you should use vec_env.env_method("get_wrapper_attr", "mu") in Gymnasium v1.0
|
||||
print(vec_env.env_method("get_wrapper_attr", "mu"))
|
||||
# Change `mu` attribute via the setter
|
||||
vec_env.env_method("set_mu", "mu", 0.1)
|
||||
|
||||
|
||||
In this example ``env.mu`` cannot be accessed/changed directly because it is wrapped in a ``VecEnv`` and because it could be wrapped with other wrappers (see `GH#1573 <https://github.com/DLR-RM/stable-baselines3/issues/1573>`_ for a longer explanation).
|
||||
Instead, the callback should use the ``set_mu`` method via the ``env_method`` method for Vectorized Environments.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from itertools import cycle
|
||||
|
||||
class ChangeMuCallback(BaseCallback):
|
||||
"""
|
||||
This callback changes the value of mu during training looping
|
||||
through a list of values until training is aborted.
|
||||
The environment is implemented so that the impact of changing
|
||||
the value of mu mid-episode is visible only after the episode is over
|
||||
and the reset method has been called.
|
||||
""""
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
# An iterator that contains the different of the friction coefficient
|
||||
self.mus = cycle([0.1, 0.2, 0.5, 0.13, 0.9])
|
||||
|
||||
def _on_step(self):
|
||||
# Note: in practice, you should not change this value at every step
|
||||
# but rather depending on some events/metrics like agent performance/episode termination
|
||||
# both accessible via the `self.logger` or `self.locals` variables
|
||||
self.training_env.env_method("set_mu", next(self.mus))
|
||||
|
||||
This callback can then be used to safely modify environment attributes during training since
|
||||
it calls the environment setter method.
|
||||
|
||||
|
||||
Vectorized Environments Wrappers
|
||||
--------------------------------
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,39 @@
|
|||
Changelog
|
||||
==========
|
||||
|
||||
|
||||
Release 2.3.0a0 (WIP)
|
||||
--------------------------
|
||||
|
||||
Breaking Changes:
|
||||
^^^^^^^^^^^^^^^^^
|
||||
|
||||
New Features:
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
Bug Fixes:
|
||||
^^^^^^^^^^
|
||||
|
||||
`SB3-Contrib`_
|
||||
^^^^^^^^^^^^^^
|
||||
|
||||
`RL Zoo`_
|
||||
^^^^^^^^^
|
||||
|
||||
`SBX`_ (SB3 + Jax)
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Deprecations:
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
Others:
|
||||
^^^^^^^
|
||||
|
||||
Documentation:
|
||||
^^^^^^^^^^^^^^
|
||||
- Added a paragraph on modifying vectorized environment parameters via setters (@fracapuano)
|
||||
- Updated callback code example
|
||||
|
||||
Release 2.2.1 (2023-11-17)
|
||||
--------------------------
|
||||
**Support for options at reset, bug fixes and better error messages**
|
||||
|
|
@ -1490,7 +1523,7 @@ And all the contributors:
|
|||
@flodorner @KuKuXia @NeoExtended @PartiallyTyped @mmcenta @richardwu @kinalmehta @rolandgvc @tkelestemur @mloo3
|
||||
@tirafesi @blurLake @koulakis @joeljosephjin @shwang @rk37 @andyshih12 @RaphaelWag @xicocaio
|
||||
@diditforlulz273 @liorcohen5 @ManifoldFR @mloo3 @SwamyDev @wmmc88 @megan-klaiber @thisray
|
||||
@tfederico @hn2 @LucasAlegre @AptX395 @zampanteymedio @JadenTravnik @decodyng @ardabbour @lorenz-h @mschweizer @lorepieri8 @vwxyzjn
|
||||
@tfederico @hn2 @LucasAlegre @AptX395 @zampanteymedio @fracapuano @JadenTravnik @decodyng @ardabbour @lorenz-h @mschweizer @lorepieri8 @vwxyzjn
|
||||
@ShangqunYu @PierreExeter @JacopoPan @ltbd78 @tom-doerr @Atlis @liusida @09tangriro @amy12xx @juancroldan
|
||||
@benblack769 @bstee615 @c-rizz @skandermoalla @MihaiAnca13 @davidblom603 @ayeright @cyprienc
|
||||
@wkirgsn @AechPro @CUN-bjy @batu @IljaAvadiev @timokau @kachayev @cleversonahum
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
2.2.1
|
||||
2.3.0a0
|
||||
|
|
|
|||
Loading…
Reference in a new issue