Autoformat code with black (new version complains about new things) (#757)

* Blacken code

* Fix GitLab CI: switch to Docker container with new black version
This commit is contained in:
Adam Gleave 2022-02-03 16:56:06 -08:00 committed by GitHub
parent 54bcfa4544
commit f488d0772a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 9 additions and 9 deletions

View file

@ -1,4 +1,4 @@
image: stablebaselines/stable-baselines3-cpu:1.3.1a3
image: stablebaselines/stable-baselines3-cpu:1.4.1a0
type-check:
script:

View file

@ -222,7 +222,7 @@ class SquashedDiagGaussianDistribution(DiagGaussianDistribution):
log_prob = super(SquashedDiagGaussianDistribution, self).log_prob(gaussian_actions)
# Squash correction (from original SAC implementation)
# this comes from the fact that tanh is bijective and differentiable
log_prob -= th.sum(th.log(1 - actions ** 2 + self.epsilon), dim=1)
log_prob -= th.sum(th.log(1 - actions**2 + self.epsilon), dim=1)
return log_prob
def entropy(self) -> Optional[th.Tensor]:
@ -531,7 +531,7 @@ class StateDependentNoiseDistribution(Distribution):
"""
# Stop gradient if we don't want to influence the features
self._latent_sde = latent_sde if self.learn_features else latent_sde.detach()
variance = th.mm(self._latent_sde ** 2, self.get_std(log_std) ** 2)
variance = th.mm(self._latent_sde**2, self.get_std(log_std) ** 2)
self.distribution = Normal(mean_actions, th.sqrt(variance + self.epsilon))
return self

View file

@ -46,9 +46,9 @@ class BitFlippingEnv(GoalEnv):
# representation of the observation
self.observation_space = spaces.Dict(
{
"observation": spaces.Discrete(2 ** n_bits),
"achieved_goal": spaces.Discrete(2 ** n_bits),
"desired_goal": spaces.Discrete(2 ** n_bits),
"observation": spaces.Discrete(2**n_bits),
"achieved_goal": spaces.Discrete(2**n_bits),
"desired_goal": spaces.Discrete(2**n_bits),
}
)
elif image_obs_space:
@ -115,7 +115,7 @@ class BitFlippingEnv(GoalEnv):
if self.discrete_obs_space:
# The internal state is the binary representation of the
# observed one
return int(sum([state[i] * 2 ** i for i in range(len(state))]))
return int(sum([state[i] * 2**i for i in range(len(state))]))
if self.image_obs_space:
size = np.prod(self.image_shape)

View file

@ -26,7 +26,7 @@ def test_state_dependent_exploration_grad():
action = mu + noise
variance = th.mm(state ** 2, sigma_hat ** 2)
variance = th.mm(state**2, sigma_hat**2)
action_dist = Normal(mu, th.sqrt(variance))
# Sum over the action dimension because we assume they are independent
@ -44,7 +44,7 @@ def test_state_dependent_exploration_grad():
for i in range(state_dim):
# Derivative of the log probability of the jth component of the action
# w.r.t. the standard deviation sigma_j
d_log_policy_j = (noise[:, j] ** 2 - sigma_j ** 2) / sigma_j ** 3
d_log_policy_j = (noise[:, j] ** 2 - sigma_j**2) / sigma_j**3
# Derivative of sigma_j w.r.t. sigma_hat_ij
d_log_sigma_j = (state[:, i] ** 2 * sigma_hat[i, j]) / sigma_j
# Chain rule, average over the minibatch