From abbf48e93ee956d2ed023588d8325bd4187d26d4 Mon Sep 17 00:00:00 2001 From: Skander Moalla <37197319+skandermoalla@users.noreply.github.com> Date: Thu, 1 Jul 2021 14:43:08 +0100 Subject: [PATCH] Fix Inconsistencies with EvalCallback tensorboard logs (#492) * Make EvalCallback dump the evaluation logs it records #457. * Make test deterministic Co-authored-by: Antonin Raffin --- docs/misc/changelog.rst | 3 ++- stable_baselines3/common/callbacks.py | 4 ++++ tests/test_callbacks.py | 26 ++++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst index c1e4193..d111926 100644 --- a/docs/misc/changelog.rst +++ b/docs/misc/changelog.rst @@ -60,6 +60,7 @@ Bug Fixes: - Fixed saving of ``A2C`` and ``PPO`` policy when using gSDE (thanks @liusida) - Fixed a bug where no output would be shown even if ``verbose>=1`` after passing ``verbose=0`` once - Fixed observation buffers dtype in DictReplayBuffer (@c-rizz) +- Fixed EvalCallback tensorboard logs being logged with the incorrect timestep. They are now written with the timestep at which they were recorded. (@skandermoalla) Deprecations: ^^^^^^^^^^^^^ @@ -707,4 +708,4 @@ And all the contributors: @diditforlulz273 @liorcohen5 @ManifoldFR @mloo3 @SwamyDev @wmmc88 @megan-klaiber @thisray @tfederico @hn2 @LucasAlegre @AptX395 @zampanteymedio @JadenTravnik @decodyng @ardabbour @lorenz-h @mschweizer @lorepieri8 @vwxyzjn @ShangqunYu @PierreExeter @JacopoPan @ltbd78 @tom-doerr @Atlis @liusida @09tangriro @amy12xx @juancroldan @benblack769 @bstee615 -@c-rizz +@c-rizz @skandermoalla diff --git a/stable_baselines3/common/callbacks.py b/stable_baselines3/common/callbacks.py index 4191a16..065bc70 100644 --- a/stable_baselines3/common/callbacks.py +++ b/stable_baselines3/common/callbacks.py @@ -414,6 +414,10 @@ class EvalCallback(EventCallback): print(f"Success rate: {100 * success_rate:.2f}%") self.logger.record("eval/success_rate", success_rate) + # Dump log so the evaluation results are printed with the correct timestep + self.logger.record("time/total timesteps", self.num_timesteps, exclude="tensorboard") + self.logger.dump(self.num_timesteps) + if mean_reward > self.best_mean_reward: if self.verbose > 0: print("New best mean reward!") diff --git a/tests/test_callbacks.py b/tests/test_callbacks.py index 7d43672..c94bb8f 100644 --- a/tests/test_callbacks.py +++ b/tests/test_callbacks.py @@ -141,3 +141,29 @@ def test_eval_success_logging(tmp_path): assert len(eval_callback._is_success_buffer) > 0 # More than 50% success rate assert np.mean(eval_callback._is_success_buffer) > 0.5 + + +def test_eval_callback_logs_are_written_with_the_correct_timestep(tmp_path): + # Skip if no tensorboard installed + pytest.importorskip("tensorboard") + from tensorboard.backend.event_processing.event_accumulator import EventAccumulator + + env_name = select_env(DQN) + model = DQN( + "MlpPolicy", + env_name, + policy_kwargs=dict(net_arch=[32]), + tensorboard_log=tmp_path, + verbose=1, + seed=1, + ) + + eval_env = gym.make(env_name) + eval_freq = 101 + eval_callback = EvalCallback(eval_env, eval_freq=eval_freq, warn=False) + model.learn(500, callback=eval_callback) + + acc = EventAccumulator(str(tmp_path / "DQN_1")) + acc.Reload() + for event in acc.scalars.Items("eval/mean_reward"): + assert event.step % eval_freq == 0