mirror of
https://github.com/saymrwulf/stable-baselines3.git
synced 2026-06-01 23:30:53 +00:00
Use consistent logging keys (#605)
* Use a consistent key to log the total timesteps This changes the timestep logging key of on-policy algorithms from `time/total_timesteps` to `time/total timesteps` (note the underscore/space). The off-policy algorithms and the eval callback already use the latter, so this behavior is more consistent. * Use underscores instead of spaces in logging keys Most keys already followed this policy and consistent behavior is friendlier to new users. * Minor edit and bump version Co-authored-by: Antonin Raffin <antonin.raffin@ensta.org>
This commit is contained in:
parent
75aa31dcfb
commit
09e9fc42eb
5 changed files with 12 additions and 7 deletions
|
|
@ -4,7 +4,7 @@ Changelog
|
|||
==========
|
||||
|
||||
|
||||
Release 1.2.1a2 (WIP)
|
||||
Release 1.2.1a3 (WIP)
|
||||
---------------------------
|
||||
|
||||
|
||||
|
|
@ -12,6 +12,11 @@ Breaking Changes:
|
|||
^^^^^^^^^^^^^^^^^
|
||||
- ``sde_net_arch`` argument in policies is deprecated and will be removed in a future version.
|
||||
- ``_get_latent`` (``ActorCriticPolicy``) was removed
|
||||
- All logging keys now use underscores instead of spaces (@timokau). Concretely this changes:
|
||||
|
||||
- ``time/total timesteps`` to ``time/total_timesteps`` for off-policy algorithms (PPO and A2C) and the eval callback (on-policy algorithms already used the underscored version),
|
||||
- ``rollout/exploration rate`` to ``rollout/exploration_rate`` and
|
||||
- ``rollout/success rate`` to ``rollout/success_rate``.
|
||||
|
||||
New Features:
|
||||
^^^^^^^^^^^^^
|
||||
|
|
@ -788,4 +793,4 @@ And all the contributors:
|
|||
@tirafesi @blurLake @koulakis @joeljosephjin @shwang @rk37 @andyshih12 @RaphaelWag @xicocaio
|
||||
@diditforlulz273 @liorcohen5 @ManifoldFR @mloo3 @SwamyDev @wmmc88 @megan-klaiber @thisray
|
||||
@tfederico @hn2 @LucasAlegre @AptX395 @zampanteymedio @JadenTravnik @decodyng @ardabbour @lorenz-h @mschweizer @lorepieri8 @vwxyzjn
|
||||
@ShangqunYu @PierreExeter @JacopoPan @ltbd78 @tom-doerr @Atlis @liusida @09tangriro @amy12xx @juancroldan @benblack769 @bstee615 @c-rizz @skandermoalla @MihaiAnca13 @davidblom603 @ayeright @cyprienc @wkirgsn @AechPro @CUN-bjy @batu @IljaAvadiev
|
||||
@ShangqunYu @PierreExeter @JacopoPan @ltbd78 @tom-doerr @Atlis @liusida @09tangriro @amy12xx @juancroldan @benblack769 @bstee615 @c-rizz @skandermoalla @MihaiAnca13 @davidblom603 @ayeright @cyprienc @wkirgsn @AechPro @CUN-bjy @batu @IljaAvadiev @timokau
|
||||
|
|
|
|||
|
|
@ -423,7 +423,7 @@ class EvalCallback(EventCallback):
|
|||
self.logger.record("eval/success_rate", success_rate)
|
||||
|
||||
# Dump log so the evaluation results are printed with the correct timestep
|
||||
self.logger.record("time/total timesteps", self.num_timesteps, exclude="tensorboard")
|
||||
self.logger.record("time/total_timesteps", self.num_timesteps, exclude="tensorboard")
|
||||
self.logger.dump(self.num_timesteps)
|
||||
|
||||
if mean_reward > self.best_mean_reward:
|
||||
|
|
|
|||
|
|
@ -437,12 +437,12 @@ class OffPolicyAlgorithm(BaseAlgorithm):
|
|||
self.logger.record("rollout/ep_len_mean", safe_mean([ep_info["l"] for ep_info in self.ep_info_buffer]))
|
||||
self.logger.record("time/fps", fps)
|
||||
self.logger.record("time/time_elapsed", int(time_elapsed), exclude="tensorboard")
|
||||
self.logger.record("time/total timesteps", self.num_timesteps, exclude="tensorboard")
|
||||
self.logger.record("time/total_timesteps", self.num_timesteps, exclude="tensorboard")
|
||||
if self.use_sde:
|
||||
self.logger.record("train/std", (self.actor.get_std()).mean().item())
|
||||
|
||||
if len(self.ep_success_buffer) > 0:
|
||||
self.logger.record("rollout/success rate", safe_mean(self.ep_success_buffer))
|
||||
self.logger.record("rollout/success_rate", safe_mean(self.ep_success_buffer))
|
||||
# Pass the number of timesteps for tensorboard
|
||||
self.logger.dump(step=self.num_timesteps)
|
||||
|
||||
|
|
|
|||
|
|
@ -149,7 +149,7 @@ class DQN(OffPolicyAlgorithm):
|
|||
polyak_update(self.q_net.parameters(), self.q_net_target.parameters(), self.tau)
|
||||
|
||||
self.exploration_rate = self.exploration_schedule(self._current_progress_remaining)
|
||||
self.logger.record("rollout/exploration rate", self.exploration_rate)
|
||||
self.logger.record("rollout/exploration_rate", self.exploration_rate)
|
||||
|
||||
def train(self, gradient_steps: int, batch_size: int = 100) -> None:
|
||||
# Switch to train mode (this affects batch norm / dropout)
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
1.2.1a2
|
||||
1.2.1a3
|
||||
|
|
|
|||
Loading…
Reference in a new issue