mirror of
https://github.com/saymrwulf/stable-baselines3.git
synced 2026-05-26 22:45:15 +00:00
Autoformat code with black (new version complains about new things) (#757)
* Blacken code * Fix GitLab CI: switch to Docker container with new black version
This commit is contained in:
parent
54bcfa4544
commit
f488d0772a
4 changed files with 9 additions and 9 deletions
|
|
@ -1,4 +1,4 @@
|
|||
image: stablebaselines/stable-baselines3-cpu:1.3.1a3
|
||||
image: stablebaselines/stable-baselines3-cpu:1.4.1a0
|
||||
|
||||
type-check:
|
||||
script:
|
||||
|
|
|
|||
|
|
@ -222,7 +222,7 @@ class SquashedDiagGaussianDistribution(DiagGaussianDistribution):
|
|||
log_prob = super(SquashedDiagGaussianDistribution, self).log_prob(gaussian_actions)
|
||||
# Squash correction (from original SAC implementation)
|
||||
# this comes from the fact that tanh is bijective and differentiable
|
||||
log_prob -= th.sum(th.log(1 - actions ** 2 + self.epsilon), dim=1)
|
||||
log_prob -= th.sum(th.log(1 - actions**2 + self.epsilon), dim=1)
|
||||
return log_prob
|
||||
|
||||
def entropy(self) -> Optional[th.Tensor]:
|
||||
|
|
@ -531,7 +531,7 @@ class StateDependentNoiseDistribution(Distribution):
|
|||
"""
|
||||
# Stop gradient if we don't want to influence the features
|
||||
self._latent_sde = latent_sde if self.learn_features else latent_sde.detach()
|
||||
variance = th.mm(self._latent_sde ** 2, self.get_std(log_std) ** 2)
|
||||
variance = th.mm(self._latent_sde**2, self.get_std(log_std) ** 2)
|
||||
self.distribution = Normal(mean_actions, th.sqrt(variance + self.epsilon))
|
||||
return self
|
||||
|
||||
|
|
|
|||
|
|
@ -46,9 +46,9 @@ class BitFlippingEnv(GoalEnv):
|
|||
# representation of the observation
|
||||
self.observation_space = spaces.Dict(
|
||||
{
|
||||
"observation": spaces.Discrete(2 ** n_bits),
|
||||
"achieved_goal": spaces.Discrete(2 ** n_bits),
|
||||
"desired_goal": spaces.Discrete(2 ** n_bits),
|
||||
"observation": spaces.Discrete(2**n_bits),
|
||||
"achieved_goal": spaces.Discrete(2**n_bits),
|
||||
"desired_goal": spaces.Discrete(2**n_bits),
|
||||
}
|
||||
)
|
||||
elif image_obs_space:
|
||||
|
|
@ -115,7 +115,7 @@ class BitFlippingEnv(GoalEnv):
|
|||
if self.discrete_obs_space:
|
||||
# The internal state is the binary representation of the
|
||||
# observed one
|
||||
return int(sum([state[i] * 2 ** i for i in range(len(state))]))
|
||||
return int(sum([state[i] * 2**i for i in range(len(state))]))
|
||||
|
||||
if self.image_obs_space:
|
||||
size = np.prod(self.image_shape)
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ def test_state_dependent_exploration_grad():
|
|||
|
||||
action = mu + noise
|
||||
|
||||
variance = th.mm(state ** 2, sigma_hat ** 2)
|
||||
variance = th.mm(state**2, sigma_hat**2)
|
||||
action_dist = Normal(mu, th.sqrt(variance))
|
||||
|
||||
# Sum over the action dimension because we assume they are independent
|
||||
|
|
@ -44,7 +44,7 @@ def test_state_dependent_exploration_grad():
|
|||
for i in range(state_dim):
|
||||
# Derivative of the log probability of the jth component of the action
|
||||
# w.r.t. the standard deviation sigma_j
|
||||
d_log_policy_j = (noise[:, j] ** 2 - sigma_j ** 2) / sigma_j ** 3
|
||||
d_log_policy_j = (noise[:, j] ** 2 - sigma_j**2) / sigma_j**3
|
||||
# Derivative of sigma_j w.r.t. sigma_hat_ij
|
||||
d_log_sigma_j = (state[:, i] ** 2 * sigma_hat[i, j]) / sigma_j
|
||||
# Chain rule, average over the minibatch
|
||||
|
|
|
|||
Loading…
Reference in a new issue