mirror of
https://github.com/saymrwulf/stable-baselines3.git
synced 2026-05-31 23:28:05 +00:00
Fix set_env when using VecNormalize (#638)
* Fix `set_env` when using `VecNormalize` * Update version
This commit is contained in:
parent
6daf82bf74
commit
2bb4500948
4 changed files with 19 additions and 3 deletions
|
|
@ -4,7 +4,7 @@ Changelog
|
|||
==========
|
||||
|
||||
|
||||
Release 1.3.1a0 (WIP)
|
||||
Release 1.3.1a1 (WIP)
|
||||
---------------------------
|
||||
|
||||
Breaking Changes:
|
||||
|
|
@ -16,8 +16,10 @@ New Features:
|
|||
|
||||
Bug Fixes:
|
||||
^^^^^^^^^^
|
||||
- Fixed a bug where ``set_env()`` with ``VecNormalize`` would result in an error with off-policy algorithms (thanks @cleversonahum)
|
||||
- FPS calculation is now performed based on number of steps performed during last ``learn`` call, even when ``reset_num_timesteps`` is set to ``False`` (@kachayev)
|
||||
|
||||
|
||||
Deprecations:
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
|
|
@ -830,4 +832,7 @@ And all the contributors:
|
|||
@tirafesi @blurLake @koulakis @joeljosephjin @shwang @rk37 @andyshih12 @RaphaelWag @xicocaio
|
||||
@diditforlulz273 @liorcohen5 @ManifoldFR @mloo3 @SwamyDev @wmmc88 @megan-klaiber @thisray
|
||||
@tfederico @hn2 @LucasAlegre @AptX395 @zampanteymedio @JadenTravnik @decodyng @ardabbour @lorenz-h @mschweizer @lorepieri8 @vwxyzjn
|
||||
@ShangqunYu @PierreExeter @JacopoPan @ltbd78 @tom-doerr @Atlis @liusida @09tangriro @amy12xx @juancroldan @benblack769 @bstee615 @c-rizz @skandermoalla @MihaiAnca13 @davidblom603 @ayeright @cyprienc @wkirgsn @AechPro @CUN-bjy @batu @IljaAvadiev @timokau @kachayev @eleurent @ac-93
|
||||
@ShangqunYu @PierreExeter @JacopoPan @ltbd78 @tom-doerr @Atlis @liusida @09tangriro @amy12xx @juancroldan
|
||||
@benblack769 @bstee615 @c-rizz @skandermoalla @MihaiAnca13 @davidblom603 @ayeright @cyprienc
|
||||
@wkirgsn @AechPro @CUN-bjy @batu @IljaAvadiev @timokau @kachayev @cleversonahum
|
||||
@eleurent @ac-93
|
||||
|
|
|
|||
|
|
@ -499,6 +499,10 @@ class BaseAlgorithm(ABC):
|
|||
env = self._wrap_env(env, self.verbose)
|
||||
# Check that the observation spaces match
|
||||
check_for_correct_spaces(env, self.observation_space, self.action_space)
|
||||
# Update VecNormalize object
|
||||
# otherwise the wrong env may be used, see https://github.com/DLR-RM/stable-baselines3/issues/637
|
||||
self._vec_normalize_env = unwrap_vec_normalize(env)
|
||||
|
||||
# Discard `_last_obs`, this will force the env to reset before training
|
||||
# See issue https://github.com/DLR-RM/stable-baselines3/issues/597
|
||||
if force_reset:
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
1.3.1a0
|
||||
1.3.1a1
|
||||
|
|
|
|||
|
|
@ -341,6 +341,13 @@ def test_offpolicy_normalization(model_class, online_sampling):
|
|||
else:
|
||||
model = model_class("MlpPolicy", env, verbose=1, learning_starts=100, policy_kwargs=dict(net_arch=[64]))
|
||||
|
||||
# Check that VecNormalize object is correctly updated
|
||||
assert model.get_vec_normalize_env() is env
|
||||
model.set_env(eval_env)
|
||||
assert model.get_vec_normalize_env() is eval_env
|
||||
model.learn(total_timesteps=10)
|
||||
model.set_env(env)
|
||||
|
||||
model.learn(total_timesteps=150, eval_env=eval_env, eval_freq=75)
|
||||
# Check getter
|
||||
assert isinstance(model.get_vec_normalize_env(), VecNormalize)
|
||||
|
|
|
|||
Loading…
Reference in a new issue