From f488d0772a0fde75bc5f27e6ae15bfb49d6c5352 Mon Sep 17 00:00:00 2001 From: Adam Gleave Date: Thu, 3 Feb 2022 16:56:06 -0800 Subject: [PATCH] Autoformat code with black (new version complains about new things) (#757) * Blacken code * Fix GitLab CI: switch to Docker container with new black version --- .gitlab-ci.yml | 2 +- stable_baselines3/common/distributions.py | 4 ++-- stable_baselines3/common/envs/bit_flipping_env.py | 8 ++++---- tests/test_sde.py | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 73f0134..63f9eaf 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,4 +1,4 @@ -image: stablebaselines/stable-baselines3-cpu:1.3.1a3 +image: stablebaselines/stable-baselines3-cpu:1.4.1a0 type-check: script: diff --git a/stable_baselines3/common/distributions.py b/stable_baselines3/common/distributions.py index ca3f0b3..1c0e54a 100644 --- a/stable_baselines3/common/distributions.py +++ b/stable_baselines3/common/distributions.py @@ -222,7 +222,7 @@ class SquashedDiagGaussianDistribution(DiagGaussianDistribution): log_prob = super(SquashedDiagGaussianDistribution, self).log_prob(gaussian_actions) # Squash correction (from original SAC implementation) # this comes from the fact that tanh is bijective and differentiable - log_prob -= th.sum(th.log(1 - actions ** 2 + self.epsilon), dim=1) + log_prob -= th.sum(th.log(1 - actions**2 + self.epsilon), dim=1) return log_prob def entropy(self) -> Optional[th.Tensor]: @@ -531,7 +531,7 @@ class StateDependentNoiseDistribution(Distribution): """ # Stop gradient if we don't want to influence the features self._latent_sde = latent_sde if self.learn_features else latent_sde.detach() - variance = th.mm(self._latent_sde ** 2, self.get_std(log_std) ** 2) + variance = th.mm(self._latent_sde**2, self.get_std(log_std) ** 2) self.distribution = Normal(mean_actions, th.sqrt(variance + self.epsilon)) return self diff --git a/stable_baselines3/common/envs/bit_flipping_env.py b/stable_baselines3/common/envs/bit_flipping_env.py index f5c2fb4..c5d713a 100644 --- a/stable_baselines3/common/envs/bit_flipping_env.py +++ b/stable_baselines3/common/envs/bit_flipping_env.py @@ -46,9 +46,9 @@ class BitFlippingEnv(GoalEnv): # representation of the observation self.observation_space = spaces.Dict( { - "observation": spaces.Discrete(2 ** n_bits), - "achieved_goal": spaces.Discrete(2 ** n_bits), - "desired_goal": spaces.Discrete(2 ** n_bits), + "observation": spaces.Discrete(2**n_bits), + "achieved_goal": spaces.Discrete(2**n_bits), + "desired_goal": spaces.Discrete(2**n_bits), } ) elif image_obs_space: @@ -115,7 +115,7 @@ class BitFlippingEnv(GoalEnv): if self.discrete_obs_space: # The internal state is the binary representation of the # observed one - return int(sum([state[i] * 2 ** i for i in range(len(state))])) + return int(sum([state[i] * 2**i for i in range(len(state))])) if self.image_obs_space: size = np.prod(self.image_shape) diff --git a/tests/test_sde.py b/tests/test_sde.py index e20b01d..17ac150 100644 --- a/tests/test_sde.py +++ b/tests/test_sde.py @@ -26,7 +26,7 @@ def test_state_dependent_exploration_grad(): action = mu + noise - variance = th.mm(state ** 2, sigma_hat ** 2) + variance = th.mm(state**2, sigma_hat**2) action_dist = Normal(mu, th.sqrt(variance)) # Sum over the action dimension because we assume they are independent @@ -44,7 +44,7 @@ def test_state_dependent_exploration_grad(): for i in range(state_dim): # Derivative of the log probability of the jth component of the action # w.r.t. the standard deviation sigma_j - d_log_policy_j = (noise[:, j] ** 2 - sigma_j ** 2) / sigma_j ** 3 + d_log_policy_j = (noise[:, j] ** 2 - sigma_j**2) / sigma_j**3 # Derivative of sigma_j w.r.t. sigma_hat_ij d_log_sigma_j = (state[:, i] ** 2 * sigma_hat[i, j]) / sigma_j # Chain rule, average over the minibatch