Fix replay_buffer_class type annotation (#1042)

* Fix replay_buffer_class type annotation

* Update changelog

* Further replacement of same type annotation issue

* Formatting

* Rolled back formatting changes for consistency
This commit is contained in:
Juan Rocamonde 2022-09-02 05:10:01 +02:00 committed by GitHub
parent a7f30b04e3
commit fdca786f09
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 6 additions and 5 deletions

View file

@ -27,6 +27,7 @@ Bug Fixes:
- Added multidimensional action space support (@qgallouedec)
- Fixed missing verbose parameter passing in the ``EvalCallback`` constructor (@burakdmb)
- Fixed the issue that when updating the target network in DQN, SAC, TD3, the ``running_mean`` and ``running_var`` properties of batch norm layers are not updated (@honglu2875)
- Fixed incorrect type annotation of the replay_buffer_class argument in ``common.OffPolicyAlgorithm`` initializer, where an instance instead of a class was required (@Rocamonde)
Deprecations:
^^^^^^^^^^^^^

View file

@ -85,7 +85,7 @@ class OffPolicyAlgorithm(BaseAlgorithm):
train_freq: Union[int, Tuple[int, str]] = (1, "step"),
gradient_steps: int = 1,
action_noise: Optional[ActionNoise] = None,
replay_buffer_class: Optional[ReplayBuffer] = None,
replay_buffer_class: Optional[Type[ReplayBuffer]] = None,
replay_buffer_kwargs: Optional[Dict[str, Any]] = None,
optimize_memory_usage: bool = False,
policy_kwargs: Optional[Dict[str, Any]] = None,

View file

@ -66,7 +66,7 @@ class DDPG(TD3):
train_freq: Union[int, Tuple[int, str]] = (1, "episode"),
gradient_steps: int = -1,
action_noise: Optional[ActionNoise] = None,
replay_buffer_class: Optional[ReplayBuffer] = None,
replay_buffer_class: Optional[Type[ReplayBuffer]] = None,
replay_buffer_kwargs: Optional[Dict[str, Any]] = None,
optimize_memory_usage: bool = False,
tensorboard_log: Optional[str] = None,

View file

@ -78,7 +78,7 @@ class DQN(OffPolicyAlgorithm):
gamma: float = 0.99,
train_freq: Union[int, Tuple[int, str]] = 4,
gradient_steps: int = 1,
replay_buffer_class: Optional[ReplayBuffer] = None,
replay_buffer_class: Optional[Type[ReplayBuffer]] = None,
replay_buffer_kwargs: Optional[Dict[str, Any]] = None,
optimize_memory_usage: bool = False,
target_update_interval: int = 10000,

View file

@ -92,7 +92,7 @@ class SAC(OffPolicyAlgorithm):
train_freq: Union[int, Tuple[int, str]] = 1,
gradient_steps: int = 1,
action_noise: Optional[ActionNoise] = None,
replay_buffer_class: Optional[ReplayBuffer] = None,
replay_buffer_class: Optional[Type[ReplayBuffer]] = None,
replay_buffer_kwargs: Optional[Dict[str, Any]] = None,
optimize_memory_usage: bool = False,
ent_coef: Union[str, float] = "auto",

View file

@ -80,7 +80,7 @@ class TD3(OffPolicyAlgorithm):
train_freq: Union[int, Tuple[int, str]] = (1, "episode"),
gradient_steps: int = -1,
action_noise: Optional[ActionNoise] = None,
replay_buffer_class: Optional[ReplayBuffer] = None,
replay_buffer_class: Optional[Type[ReplayBuffer]] = None,
replay_buffer_kwargs: Optional[Dict[str, Any]] = None,
optimize_memory_usage: bool = False,
policy_delay: int = 2,