2026-07-11 17:48:55 +00:00 · 2022-02-03 16:56:06 -08:00 · 2022-02-03 16:56:06 -08:00 · f488d0772a
commit f488d0772a
parent 54bcfa4544
4 changed files with 9 additions and 9 deletions
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@ -1,4 +1,4 @@
-image: stablebaselines/stable-baselines3-cpu:1.3.1a3
+image: stablebaselines/stable-baselines3-cpu:1.4.1a0

 type-check:
  script:
--- a/stable_baselines3/common/distributions.py
+++ b/stable_baselines3/common/distributions.py
@ -222,7 +222,7 @@ class SquashedDiagGaussianDistribution(DiagGaussianDistribution):
        log_prob = super(SquashedDiagGaussianDistribution, self).log_prob(gaussian_actions)
        # Squash correction (from original SAC implementation)
        # this comes from the fact that tanh is bijective and differentiable
-        log_prob -= th.sum(th.log(1 - actions ** 2 + self.epsilon), dim=1)
+        log_prob -= th.sum(th.log(1 - actions**2 + self.epsilon), dim=1)
        return log_prob

    def entropy(self) -> Optional[th.Tensor]:
@ -531,7 +531,7 @@ class StateDependentNoiseDistribution(Distribution):
        """
        # Stop gradient if we don't want to influence the features
        self._latent_sde = latent_sde if self.learn_features else latent_sde.detach()
-        variance = th.mm(self._latent_sde ** 2, self.get_std(log_std) ** 2)
+        variance = th.mm(self._latent_sde**2, self.get_std(log_std) ** 2)
        self.distribution = Normal(mean_actions, th.sqrt(variance + self.epsilon))
        return self

--- a/stable_baselines3/common/envs/bit_flipping_env.py
+++ b/stable_baselines3/common/envs/bit_flipping_env.py
@ -46,9 +46,9 @@ class BitFlippingEnv(GoalEnv):
            # representation of the observation
            self.observation_space = spaces.Dict(
                {
-                    "observation": spaces.Discrete(2 ** n_bits),
-                    "achieved_goal": spaces.Discrete(2 ** n_bits),
-                    "desired_goal": spaces.Discrete(2 ** n_bits),
+                    "observation": spaces.Discrete(2**n_bits),
+                    "achieved_goal": spaces.Discrete(2**n_bits),
+                    "desired_goal": spaces.Discrete(2**n_bits),
                }
            )
        elif image_obs_space:
@ -115,7 +115,7 @@ class BitFlippingEnv(GoalEnv):
        if self.discrete_obs_space:
            # The internal state is the binary representation of the
            # observed one
-            return int(sum([state[i] * 2 ** i for i in range(len(state))]))
+            return int(sum([state[i] * 2**i for i in range(len(state))]))

        if self.image_obs_space:
            size = np.prod(self.image_shape)
--- a/tests/test_sde.py
+++ b/tests/test_sde.py
@ -26,7 +26,7 @@ def test_state_dependent_exploration_grad():

    action = mu + noise

-    variance = th.mm(state ** 2, sigma_hat ** 2)
+    variance = th.mm(state**2, sigma_hat**2)
    action_dist = Normal(mu, th.sqrt(variance))

    # Sum over the action dimension because we assume they are independent
@ -44,7 +44,7 @@ def test_state_dependent_exploration_grad():
        for i in range(state_dim):
            # Derivative of the log probability of the jth component of the action
            # w.r.t. the standard deviation sigma_j
-            d_log_policy_j = (noise[:, j] ** 2 - sigma_j ** 2) / sigma_j ** 3
+            d_log_policy_j = (noise[:, j] ** 2 - sigma_j**2) / sigma_j**3
            # Derivative of sigma_j w.r.t. sigma_hat_ij
            d_log_sigma_j = (state[:, i] ** 2 * sigma_hat[i, j]) / sigma_j
            # Chain rule, average over the minibatch