mirror of
https://github.com/saymrwulf/stable-baselines3.git
synced 2026-05-19 21:40:19 +00:00
Prohibit simultaneous use of optimize_memory_usage and handle_timeout_termination (#948)
* Prohibit simultaneous use of optimize_memory_buffer and handle_timeout_termination * Modify test to avoid unsupported buffer configuration * Change from assertion to raising of ValueError * Update changelog * Update style for consistency * Use handle_timeout_termination when possible Co-authored-by: Anssi <kaneran21@hotmail.com> Co-authored-by: Antonin Raffin <antonin.raffin@ensta.org>
This commit is contained in:
parent
d64bcb401a
commit
ef10189d80
3 changed files with 12 additions and 1 deletions
|
|
@ -33,6 +33,7 @@ Bug Fixes:
|
|||
- Added a check for unbounded actions
|
||||
- Fixed issues due to newer version of protobuf (tensorboard) and sphinx
|
||||
- Fix exception causes all over the codebase (@cool-RR)
|
||||
- Prohibit simultaneous use of optimize_memory_usage and handle_timeout_termination due to a bug (@MWeltevrede)
|
||||
|
||||
Deprecations:
|
||||
^^^^^^^^^^^^^
|
||||
|
|
@ -979,4 +980,4 @@ And all the contributors:
|
|||
@wkirgsn @AechPro @CUN-bjy @batu @IljaAvadiev @timokau @kachayev @cleversonahum
|
||||
@eleurent @ac-93 @cove9988 @theDebugger811 @hsuehch @Demetrio92 @thomasgubler @IperGiove @ScheiklP
|
||||
@simoninithomas @armandpl @manuel-delverme @Gautam-J @gianlucadecola @buoyancy99 @caburu @xy9485
|
||||
@Gregwar @ycheng517 @quantitative-technologies @bcollazo @git-thor @TibiGG @cool-RR
|
||||
@Gregwar @ycheng517 @quantitative-technologies @bcollazo @git-thor @TibiGG @cool-RR @MWeltevrede
|
||||
|
|
|
|||
|
|
@ -164,6 +164,7 @@ class ReplayBuffer(BaseBuffer):
|
|||
at a cost of more complexity.
|
||||
See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195
|
||||
and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274
|
||||
Cannot be used in combination with handle_timeout_termination.
|
||||
:param handle_timeout_termination: Handle timeout termination (due to timelimit)
|
||||
separately and treat the task as infinite horizon task.
|
||||
https://github.com/DLR-RM/stable-baselines3/issues/284
|
||||
|
|
@ -188,6 +189,12 @@ class ReplayBuffer(BaseBuffer):
|
|||
if psutil is not None:
|
||||
mem_available = psutil.virtual_memory().available
|
||||
|
||||
# there is a bug if both optimize_memory_usage and handle_timeout_termination are true
|
||||
# see https://github.com/DLR-RM/stable-baselines3/issues/934
|
||||
if optimize_memory_usage and handle_timeout_termination:
|
||||
raise ValueError(
|
||||
"ReplayBuffer does not support optimize_memory_usage = True and handle_timeout_termination = True simultaneously."
|
||||
)
|
||||
self.optimize_memory_usage = optimize_memory_usage
|
||||
|
||||
self.observations = np.zeros((self.buffer_size, self.n_envs) + self.obs_shape, dtype=observation_space.dtype)
|
||||
|
|
|
|||
|
|
@ -375,6 +375,9 @@ def test_warn_buffer(recwarn, model_class, optimize_memory_usage):
|
|||
select_env(model_class),
|
||||
buffer_size=100,
|
||||
optimize_memory_usage=optimize_memory_usage,
|
||||
# we cannot use optimize_memory_usage and handle_timeout_termination
|
||||
# at the same time
|
||||
replay_buffer_kwargs={"handle_timeout_termination": not optimize_memory_usage},
|
||||
policy_kwargs=dict(net_arch=[64]),
|
||||
learning_starts=10,
|
||||
)
|
||||
|
|
|
|||
Loading…
Reference in a new issue