Use consistent logging keys (#605)

* Use a consistent key to log the total timesteps This changes the timestep logging key of on-policy algorithms from `time/total_timesteps` to `time/total timesteps` (note the underscore/space). The off-policy algorithms and the eval callback already use the latter, so this behavior is more consistent. * Use underscores instead of spaces in logging keys Most keys already followed this policy and consistent behavior is friendlier to new users. * Minor edit and bump version Co-authored-by: Antonin Raffin <antonin.raffin@ensta.org>
2026-07-18 18:52:30 +00:00 · 2021-10-12 13:17:30 +02:00 · 2021-10-12 13:17:30 +02:00 · 09e9fc42eb
commit 09e9fc42eb
parent 75aa31dcfb
5 changed files with 12 additions and 7 deletions
--- a/docs/misc/changelog.rst
+++ b/docs/misc/changelog.rst
@ -4,7 +4,7 @@ Changelog
 ==========


-Release 1.2.1a2 (WIP)
+Release 1.2.1a3 (WIP)
 ---------------------------


@ -12,6 +12,11 @@ Breaking Changes:
 ^^^^^^^^^^^^^^^^^
 - ``sde_net_arch`` argument in policies is deprecated and will be removed in a future version.
 - ``_get_latent`` (``ActorCriticPolicy``) was removed
+- All logging keys now use underscores instead of spaces (@timokau). Concretely this changes:
+
+    - ``time/total timesteps`` to ``time/total_timesteps`` for off-policy algorithms (PPO and A2C) and the eval callback (on-policy algorithms already used the underscored version),
+    - ``rollout/exploration rate`` to ``rollout/exploration_rate`` and
+    - ``rollout/success rate`` to ``rollout/success_rate``.

 New Features:
 ^^^^^^^^^^^^^
@ -788,4 +793,4 @@ And all the contributors:
@tirafesi @blurLake @koulakis @joeljosephjin @shwang @rk37 @andyshih12 @RaphaelWag @xicocaio
@diditforlulz273 @liorcohen5 @ManifoldFR @mloo3 @SwamyDev @wmmc88 @megan-klaiber @thisray
@tfederico @hn2 @LucasAlegre @AptX395 @zampanteymedio @JadenTravnik @decodyng @ardabbour @lorenz-h @mschweizer @lorepieri8 @vwxyzjn
-@ShangqunYu @PierreExeter @JacopoPan @ltbd78 @tom-doerr @Atlis @liusida @09tangriro @amy12xx @juancroldan @benblack769 @bstee615 @c-rizz @skandermoalla @MihaiAnca13 @davidblom603 @ayeright @cyprienc @wkirgsn @AechPro @CUN-bjy @batu @IljaAvadiev
+@ShangqunYu @PierreExeter @JacopoPan @ltbd78 @tom-doerr @Atlis @liusida @09tangriro @amy12xx @juancroldan @benblack769 @bstee615 @c-rizz @skandermoalla @MihaiAnca13 @davidblom603 @ayeright @cyprienc @wkirgsn @AechPro @CUN-bjy @batu @IljaAvadiev @timokau
--- a/stable_baselines3/common/callbacks.py
+++ b/stable_baselines3/common/callbacks.py
@ -423,7 +423,7 @@ class EvalCallback(EventCallback):
                self.logger.record("eval/success_rate", success_rate)

            # Dump log so the evaluation results are printed with the correct timestep
-            self.logger.record("time/total timesteps", self.num_timesteps, exclude="tensorboard")
+            self.logger.record("time/total_timesteps", self.num_timesteps, exclude="tensorboard")
            self.logger.dump(self.num_timesteps)

            if mean_reward > self.best_mean_reward:
--- a/stable_baselines3/common/off_policy_algorithm.py
+++ b/stable_baselines3/common/off_policy_algorithm.py
@ -437,12 +437,12 @@ class OffPolicyAlgorithm(BaseAlgorithm):
            self.logger.record("rollout/ep_len_mean", safe_mean([ep_info["l"] for ep_info in self.ep_info_buffer]))
        self.logger.record("time/fps", fps)
        self.logger.record("time/time_elapsed", int(time_elapsed), exclude="tensorboard")
-        self.logger.record("time/total timesteps", self.num_timesteps, exclude="tensorboard")
+        self.logger.record("time/total_timesteps", self.num_timesteps, exclude="tensorboard")
        if self.use_sde:
            self.logger.record("train/std", (self.actor.get_std()).mean().item())

        if len(self.ep_success_buffer) > 0:
-            self.logger.record("rollout/success rate", safe_mean(self.ep_success_buffer))
+            self.logger.record("rollout/success_rate", safe_mean(self.ep_success_buffer))
        # Pass the number of timesteps for tensorboard
        self.logger.dump(step=self.num_timesteps)

--- a/stable_baselines3/dqn/dqn.py
+++ b/stable_baselines3/dqn/dqn.py
@ -149,7 +149,7 @@ class DQN(OffPolicyAlgorithm):
            polyak_update(self.q_net.parameters(), self.q_net_target.parameters(), self.tau)

        self.exploration_rate = self.exploration_schedule(self._current_progress_remaining)
-        self.logger.record("rollout/exploration rate", self.exploration_rate)
+        self.logger.record("rollout/exploration_rate", self.exploration_rate)

    def train(self, gradient_steps: int, batch_size: int = 100) -> None:
        # Switch to train mode (this affects batch norm / dropout)
--- a/stable_baselines3/version.txt
+++ b/stable_baselines3/version.txt
@ -1 +1 @@
-1.2.1a2
+1.2.1a3
 @ -1 +1 @@
 .2.1a2
 .2.1a3