Fix VectorizedActionNoise in OffPolicyAlgorithm (#1657)

* moves VectorizedActionNoise into _setup_learn()

* update changelog

---------

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
Co-authored-by: Antonin Raffin <antonin.raffin@dlr.de>
This commit is contained in:
PatrickHelm 2023-08-30 12:37:14 +02:00 committed by GitHub
parent 5c93e9f426
commit c99d65c664
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 10 additions and 5 deletions

View file

@ -20,7 +20,8 @@ New Features:
Bug Fixes:
^^^^^^^^^^
- Prevents OOB error on Windows if no seed is passed (@PatrickHelm)
- Moves VectorizedActionNoise into ``_setup_learn()`` in OffPolicyAlgorithm (@PatrickHelm)
- Prevents out of bound error on Windows if no seed is passed (@PatrickHelm)
Deprecations:
^^^^^^^^^^^^^

View file

@ -281,6 +281,14 @@ class OffPolicyAlgorithm(BaseAlgorithm):
pos = (replay_buffer.pos - 1) % replay_buffer.buffer_size
replay_buffer.dones[pos] = True
# Vectorize action noise if needed
if (
self.action_noise is not None
and self.env.num_envs > 1
and not isinstance(self.action_noise, VectorizedActionNoise)
):
self.action_noise = VectorizedActionNoise(self.action_noise, self.env.num_envs)
return super()._setup_learn(
total_timesteps,
callback,
@ -523,10 +531,6 @@ class OffPolicyAlgorithm(BaseAlgorithm):
if env.num_envs > 1:
assert train_freq.unit == TrainFrequencyUnit.STEP, "You must use only one env when doing episodic training."
# Vectorize action noise if needed
if action_noise is not None and env.num_envs > 1 and not isinstance(action_noise, VectorizedActionNoise):
action_noise = VectorizedActionNoise(action_noise, env.num_envs)
if self.use_sde:
self.actor.reset_noise(env.num_envs)