mirror of
https://github.com/saymrwulf/stable-baselines3.git
synced 2026-05-18 21:30:19 +00:00
Fix VectorizedActionNoise in OffPolicyAlgorithm (#1657)
* moves VectorizedActionNoise into _setup_learn() * update changelog --------- Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org> Co-authored-by: Antonin Raffin <antonin.raffin@dlr.de>
This commit is contained in:
parent
5c93e9f426
commit
c99d65c664
2 changed files with 10 additions and 5 deletions
|
|
@ -20,7 +20,8 @@ New Features:
|
|||
|
||||
Bug Fixes:
|
||||
^^^^^^^^^^
|
||||
- Prevents OOB error on Windows if no seed is passed (@PatrickHelm)
|
||||
- Moves VectorizedActionNoise into ``_setup_learn()`` in OffPolicyAlgorithm (@PatrickHelm)
|
||||
- Prevents out of bound error on Windows if no seed is passed (@PatrickHelm)
|
||||
|
||||
Deprecations:
|
||||
^^^^^^^^^^^^^
|
||||
|
|
|
|||
|
|
@ -281,6 +281,14 @@ class OffPolicyAlgorithm(BaseAlgorithm):
|
|||
pos = (replay_buffer.pos - 1) % replay_buffer.buffer_size
|
||||
replay_buffer.dones[pos] = True
|
||||
|
||||
# Vectorize action noise if needed
|
||||
if (
|
||||
self.action_noise is not None
|
||||
and self.env.num_envs > 1
|
||||
and not isinstance(self.action_noise, VectorizedActionNoise)
|
||||
):
|
||||
self.action_noise = VectorizedActionNoise(self.action_noise, self.env.num_envs)
|
||||
|
||||
return super()._setup_learn(
|
||||
total_timesteps,
|
||||
callback,
|
||||
|
|
@ -523,10 +531,6 @@ class OffPolicyAlgorithm(BaseAlgorithm):
|
|||
if env.num_envs > 1:
|
||||
assert train_freq.unit == TrainFrequencyUnit.STEP, "You must use only one env when doing episodic training."
|
||||
|
||||
# Vectorize action noise if needed
|
||||
if action_noise is not None and env.num_envs > 1 and not isinstance(action_noise, VectorizedActionNoise):
|
||||
action_noise = VectorizedActionNoise(action_noise, env.num_envs)
|
||||
|
||||
if self.use_sde:
|
||||
self.actor.reset_noise(env.num_envs)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue