From abbf48e93ee956d2ed023588d8325bd4187d26d4 Mon Sep 17 00:00:00 2001
From: Skander Moalla <37197319+skandermoalla@users.noreply.github.com>
Date: Thu, 1 Jul 2021 14:43:08 +0100
Subject: [PATCH] Fix Inconsistencies with EvalCallback tensorboard logs (#492)

* Make EvalCallback dump the evaluation logs it records #457.

* Make test deterministic

Co-authored-by: Antonin Raffin <antonin.raffin@ensta.org>
---
 docs/misc/changelog.rst               |  3 ++-
 stable_baselines3/common/callbacks.py |  4 ++++
 tests/test_callbacks.py               | 26 ++++++++++++++++++++++++++
 3 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
index c1e4193..d111926 100644
--- a/docs/misc/changelog.rst
+++ b/docs/misc/changelog.rst
@@ -60,6 +60,7 @@ Bug Fixes:
 - Fixed saving of ``A2C`` and ``PPO`` policy when using gSDE (thanks @liusida)
 - Fixed a bug where no output would be shown even if ``verbose>=1`` after passing ``verbose=0`` once
 - Fixed observation buffers dtype in DictReplayBuffer (@c-rizz)
+- Fixed EvalCallback tensorboard logs being logged with the incorrect timestep. They are now written with the timestep at which they were recorded. (@skandermoalla)
 
 Deprecations:
 ^^^^^^^^^^^^^
@@ -707,4 +708,4 @@ And all the contributors:
 @diditforlulz273 @liorcohen5 @ManifoldFR @mloo3 @SwamyDev @wmmc88 @megan-klaiber @thisray
 @tfederico @hn2 @LucasAlegre @AptX395 @zampanteymedio @JadenTravnik @decodyng @ardabbour @lorenz-h @mschweizer @lorepieri8 @vwxyzjn
 @ShangqunYu @PierreExeter @JacopoPan @ltbd78 @tom-doerr @Atlis @liusida @09tangriro @amy12xx @juancroldan @benblack769 @bstee615
-@c-rizz
+@c-rizz @skandermoalla
diff --git a/stable_baselines3/common/callbacks.py b/stable_baselines3/common/callbacks.py
index 4191a16..065bc70 100644
--- a/stable_baselines3/common/callbacks.py
+++ b/stable_baselines3/common/callbacks.py
@@ -414,6 +414,10 @@ class EvalCallback(EventCallback):
                     print(f"Success rate: {100 * success_rate:.2f}%")
                 self.logger.record("eval/success_rate", success_rate)
 
+            # Dump log so the evaluation results are printed with the correct timestep
+            self.logger.record("time/total timesteps", self.num_timesteps, exclude="tensorboard")
+            self.logger.dump(self.num_timesteps)
+
             if mean_reward > self.best_mean_reward:
                 if self.verbose > 0:
                     print("New best mean reward!")
diff --git a/tests/test_callbacks.py b/tests/test_callbacks.py
index 7d43672..c94bb8f 100644
--- a/tests/test_callbacks.py
+++ b/tests/test_callbacks.py
@@ -141,3 +141,29 @@ def test_eval_success_logging(tmp_path):
     assert len(eval_callback._is_success_buffer) > 0
     # More than 50% success rate
     assert np.mean(eval_callback._is_success_buffer) > 0.5
+
+
+def test_eval_callback_logs_are_written_with_the_correct_timestep(tmp_path):
+    # Skip if no tensorboard installed
+    pytest.importorskip("tensorboard")
+    from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
+
+    env_name = select_env(DQN)
+    model = DQN(
+        "MlpPolicy",
+        env_name,
+        policy_kwargs=dict(net_arch=[32]),
+        tensorboard_log=tmp_path,
+        verbose=1,
+        seed=1,
+    )
+
+    eval_env = gym.make(env_name)
+    eval_freq = 101
+    eval_callback = EvalCallback(eval_env, eval_freq=eval_freq, warn=False)
+    model.learn(500, callback=eval_callback)
+
+    acc = EventAccumulator(str(tmp_path / "DQN_1"))
+    acc.Reload()
+    for event in acc.scalars.Items("eval/mean_reward"):
+        assert event.step % eval_freq == 0