mirror of
https://github.com/saymrwulf/stable-baselines3.git
synced 2026-06-29 03:31:08 +00:00
Merge branch 'master' into sde
This commit is contained in:
commit
151104c07d
3 changed files with 4 additions and 3 deletions
|
|
@ -32,6 +32,7 @@ Bug Fixes:
|
|||
- Fix target for updating q values in SAC: the entropy term was not conditioned by terminals states
|
||||
- Use ``cloudpickle.load`` instead of ``pickle.load`` in ``CloudpickleWrapper``. (@shwang)
|
||||
- Fixed a bug with orthogonal initialization when `bias=False` in custom policy (@rk37)
|
||||
- Fixed approximate entropy calculation in PPO and A2C. (@andyshih12)
|
||||
|
||||
Deprecations:
|
||||
^^^^^^^^^^^^^
|
||||
|
|
@ -355,4 +356,4 @@ And all the contributors:
|
|||
@Miffyli @dwiel @miguelrass @qxcv @jaberkow @eavelardev @ruifeng96150 @pedrohbtp @srivatsankrishnan @evilsocket
|
||||
@MarvineGothic @jdossgollin @SyllogismRXS @rusu24edward @jbulow @Antymon @seheevic @justinkterry @edbeeching
|
||||
@flodorner @KuKuXia @NeoExtended @PartiallyTyped @mmcenta @richardwu @kinalmehta @rolandgvc @tkelestemur @mloo3
|
||||
@tirafesi @blurLake @koulakis @joeljosephjin @shwang @rk37
|
||||
@tirafesi @blurLake @koulakis @joeljosephjin @shwang @rk37 @andyshih12
|
||||
|
|
|
|||
|
|
@ -141,7 +141,7 @@ class A2C(OnPolicyAlgorithm):
|
|||
# Entropy loss favor exploration
|
||||
if entropy is None:
|
||||
# Approximate entropy when no analytical form
|
||||
entropy_loss = -log_prob.mean()
|
||||
entropy_loss = -th.mean(-log_prob)
|
||||
else:
|
||||
entropy_loss = -th.mean(entropy)
|
||||
|
||||
|
|
|
|||
|
|
@ -198,7 +198,7 @@ class PPO(OnPolicyAlgorithm):
|
|||
# Entropy loss favor exploration
|
||||
if entropy is None:
|
||||
# Approximate entropy when no analytical form
|
||||
entropy_loss = -log_prob.mean()
|
||||
entropy_loss = -th.mean(-log_prob)
|
||||
else:
|
||||
entropy_loss = -th.mean(entropy)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue