From f488d0772a0fde75bc5f27e6ae15bfb49d6c5352 Mon Sep 17 00:00:00 2001
From: Adam Gleave <adam@gleave.me>
Date: Thu, 3 Feb 2022 16:56:06 -0800
Subject: [PATCH] Autoformat code with black (new version complains about new
 things) (#757)

* Blacken code

* Fix GitLab CI: switch to Docker container with new black version
---
 .gitlab-ci.yml                                    | 2 +-
 stable_baselines3/common/distributions.py         | 4 ++--
 stable_baselines3/common/envs/bit_flipping_env.py | 8 ++++----
 tests/test_sde.py                                 | 4 ++--
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 73f0134..63f9eaf 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,4 +1,4 @@
-image: stablebaselines/stable-baselines3-cpu:1.3.1a3
+image: stablebaselines/stable-baselines3-cpu:1.4.1a0
 
 type-check:
   script:
diff --git a/stable_baselines3/common/distributions.py b/stable_baselines3/common/distributions.py
index ca3f0b3..1c0e54a 100644
--- a/stable_baselines3/common/distributions.py
+++ b/stable_baselines3/common/distributions.py
@@ -222,7 +222,7 @@ class SquashedDiagGaussianDistribution(DiagGaussianDistribution):
         log_prob = super(SquashedDiagGaussianDistribution, self).log_prob(gaussian_actions)
         # Squash correction (from original SAC implementation)
         # this comes from the fact that tanh is bijective and differentiable
-        log_prob -= th.sum(th.log(1 - actions ** 2 + self.epsilon), dim=1)
+        log_prob -= th.sum(th.log(1 - actions**2 + self.epsilon), dim=1)
         return log_prob
 
     def entropy(self) -> Optional[th.Tensor]:
@@ -531,7 +531,7 @@ class StateDependentNoiseDistribution(Distribution):
         """
         # Stop gradient if we don't want to influence the features
         self._latent_sde = latent_sde if self.learn_features else latent_sde.detach()
-        variance = th.mm(self._latent_sde ** 2, self.get_std(log_std) ** 2)
+        variance = th.mm(self._latent_sde**2, self.get_std(log_std) ** 2)
         self.distribution = Normal(mean_actions, th.sqrt(variance + self.epsilon))
         return self
 
diff --git a/stable_baselines3/common/envs/bit_flipping_env.py b/stable_baselines3/common/envs/bit_flipping_env.py
index f5c2fb4..c5d713a 100644
--- a/stable_baselines3/common/envs/bit_flipping_env.py
+++ b/stable_baselines3/common/envs/bit_flipping_env.py
@@ -46,9 +46,9 @@ class BitFlippingEnv(GoalEnv):
             # representation of the observation
             self.observation_space = spaces.Dict(
                 {
-                    "observation": spaces.Discrete(2 ** n_bits),
-                    "achieved_goal": spaces.Discrete(2 ** n_bits),
-                    "desired_goal": spaces.Discrete(2 ** n_bits),
+                    "observation": spaces.Discrete(2**n_bits),
+                    "achieved_goal": spaces.Discrete(2**n_bits),
+                    "desired_goal": spaces.Discrete(2**n_bits),
                 }
             )
         elif image_obs_space:
@@ -115,7 +115,7 @@ class BitFlippingEnv(GoalEnv):
         if self.discrete_obs_space:
             # The internal state is the binary representation of the
             # observed one
-            return int(sum([state[i] * 2 ** i for i in range(len(state))]))
+            return int(sum([state[i] * 2**i for i in range(len(state))]))
 
         if self.image_obs_space:
             size = np.prod(self.image_shape)
diff --git a/tests/test_sde.py b/tests/test_sde.py
index e20b01d..17ac150 100644
--- a/tests/test_sde.py
+++ b/tests/test_sde.py
@@ -26,7 +26,7 @@ def test_state_dependent_exploration_grad():
 
     action = mu + noise
 
-    variance = th.mm(state ** 2, sigma_hat ** 2)
+    variance = th.mm(state**2, sigma_hat**2)
     action_dist = Normal(mu, th.sqrt(variance))
 
     # Sum over the action dimension because we assume they are independent
@@ -44,7 +44,7 @@ def test_state_dependent_exploration_grad():
         for i in range(state_dim):
             # Derivative of the log probability of the jth component of the action
             # w.r.t. the standard deviation sigma_j
-            d_log_policy_j = (noise[:, j] ** 2 - sigma_j ** 2) / sigma_j ** 3
+            d_log_policy_j = (noise[:, j] ** 2 - sigma_j**2) / sigma_j**3
             # Derivative of sigma_j w.r.t. sigma_hat_ij
             d_log_sigma_j = (state[:, i] ** 2 * sigma_hat[i, j]) / sigma_j
             # Chain rule, average over the minibatch