mirror of
https://github.com/saymrwulf/stable-baselines3.git
synced 2026-05-14 20:58:03 +00:00
Fix replay_buffer_class type annotation (#1042)
* Fix replay_buffer_class type annotation * Update changelog * Further replacement of same type annotation issue * Formatting * Rolled back formatting changes for consistency
This commit is contained in:
parent
a7f30b04e3
commit
fdca786f09
6 changed files with 6 additions and 5 deletions
|
|
@ -27,6 +27,7 @@ Bug Fixes:
|
|||
- Added multidimensional action space support (@qgallouedec)
|
||||
- Fixed missing verbose parameter passing in the ``EvalCallback`` constructor (@burakdmb)
|
||||
- Fixed the issue that when updating the target network in DQN, SAC, TD3, the ``running_mean`` and ``running_var`` properties of batch norm layers are not updated (@honglu2875)
|
||||
- Fixed incorrect type annotation of the replay_buffer_class argument in ``common.OffPolicyAlgorithm`` initializer, where an instance instead of a class was required (@Rocamonde)
|
||||
|
||||
Deprecations:
|
||||
^^^^^^^^^^^^^
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ class OffPolicyAlgorithm(BaseAlgorithm):
|
|||
train_freq: Union[int, Tuple[int, str]] = (1, "step"),
|
||||
gradient_steps: int = 1,
|
||||
action_noise: Optional[ActionNoise] = None,
|
||||
replay_buffer_class: Optional[ReplayBuffer] = None,
|
||||
replay_buffer_class: Optional[Type[ReplayBuffer]] = None,
|
||||
replay_buffer_kwargs: Optional[Dict[str, Any]] = None,
|
||||
optimize_memory_usage: bool = False,
|
||||
policy_kwargs: Optional[Dict[str, Any]] = None,
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@ class DDPG(TD3):
|
|||
train_freq: Union[int, Tuple[int, str]] = (1, "episode"),
|
||||
gradient_steps: int = -1,
|
||||
action_noise: Optional[ActionNoise] = None,
|
||||
replay_buffer_class: Optional[ReplayBuffer] = None,
|
||||
replay_buffer_class: Optional[Type[ReplayBuffer]] = None,
|
||||
replay_buffer_kwargs: Optional[Dict[str, Any]] = None,
|
||||
optimize_memory_usage: bool = False,
|
||||
tensorboard_log: Optional[str] = None,
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ class DQN(OffPolicyAlgorithm):
|
|||
gamma: float = 0.99,
|
||||
train_freq: Union[int, Tuple[int, str]] = 4,
|
||||
gradient_steps: int = 1,
|
||||
replay_buffer_class: Optional[ReplayBuffer] = None,
|
||||
replay_buffer_class: Optional[Type[ReplayBuffer]] = None,
|
||||
replay_buffer_kwargs: Optional[Dict[str, Any]] = None,
|
||||
optimize_memory_usage: bool = False,
|
||||
target_update_interval: int = 10000,
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ class SAC(OffPolicyAlgorithm):
|
|||
train_freq: Union[int, Tuple[int, str]] = 1,
|
||||
gradient_steps: int = 1,
|
||||
action_noise: Optional[ActionNoise] = None,
|
||||
replay_buffer_class: Optional[ReplayBuffer] = None,
|
||||
replay_buffer_class: Optional[Type[ReplayBuffer]] = None,
|
||||
replay_buffer_kwargs: Optional[Dict[str, Any]] = None,
|
||||
optimize_memory_usage: bool = False,
|
||||
ent_coef: Union[str, float] = "auto",
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@ class TD3(OffPolicyAlgorithm):
|
|||
train_freq: Union[int, Tuple[int, str]] = (1, "episode"),
|
||||
gradient_steps: int = -1,
|
||||
action_noise: Optional[ActionNoise] = None,
|
||||
replay_buffer_class: Optional[ReplayBuffer] = None,
|
||||
replay_buffer_class: Optional[Type[ReplayBuffer]] = None,
|
||||
replay_buffer_kwargs: Optional[Dict[str, Any]] = None,
|
||||
optimize_memory_usage: bool = False,
|
||||
policy_delay: int = 2,
|
||||
|
|
|
|||
Loading…
Reference in a new issue