diff --git a/setup.py b/setup.py index 89a3b57..640112e 100644 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ setup(name='torchy_baselines', license="MIT", long_description="", long_description_content_type='text/markdown', - version="0.0.4", + version="0.0.6a", ) # python setup.py sdist diff --git a/torchy_baselines/__init__.py b/torchy_baselines/__init__.py index a5896e6..b8383db 100644 --- a/torchy_baselines/__init__.py +++ b/torchy_baselines/__init__.py @@ -4,4 +4,4 @@ from torchy_baselines.ppo import PPO from torchy_baselines.sac import SAC from torchy_baselines.td3 import TD3 -__version__ = "0.0.5a" +__version__ = "0.0.6a" diff --git a/torchy_baselines/td3/policies.py b/torchy_baselines/td3/policies.py index 2a39773..19cff0f 100644 --- a/torchy_baselines/td3/policies.py +++ b/torchy_baselines/td3/policies.py @@ -7,7 +7,7 @@ from torchy_baselines.common.policies import BasePolicy, register_policy, create class Actor(BaseNetwork): def __init__(self, obs_dim, action_dim, net_arch, activation_fn=nn.ReLU, - use_sde=False, log_std_init=-2, clip_noise=0.1): + use_sde=False, log_std_init=-2, clip_noise=0.5): super(Actor, self).__init__() self.latent_pi, self.log_std = None, None @@ -67,7 +67,7 @@ class Critic(BaseNetwork): class TD3Policy(BasePolicy): def __init__(self, observation_space, action_space, learning_rate, net_arch=None, device='cpu', - activation_fn=nn.ReLU, use_sde=False, log_std_init=-2, clip_noise=0.1): + activation_fn=nn.ReLU, use_sde=False, log_std_init=-2, clip_noise=0.5): super(TD3Policy, self).__init__(observation_space, action_space, device) if net_arch is None: