From f375cc393938a6b4e4dc0fb1de82b4afca37c1bd Mon Sep 17 00:00:00 2001
From: Rushit Shah <29002479+rushitnshah@users.noreply.github.com>
Date: Mon, 4 Mar 2024 04:42:16 -0600
Subject: [PATCH] Fix docstring for ``log_interval`` to differentiate between
 on-policy/off-policy logging frequency (#1855)

* Fix docstring for log_interval inside the learn method in the base class.

* Updated changelog.

* Update docstring

---------

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
---
 docs/misc/changelog.rst                | 3 ++-
 stable_baselines3/common/base_class.py | 5 ++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
index 5176c69..e67e5ed 100644
--- a/docs/misc/changelog.rst
+++ b/docs/misc/changelog.rst
@@ -66,6 +66,7 @@ Documentation:
 - Added video link to "Practical Tips for Reliable Reinforcement Learning" video
 - Added ``render_mode="human"`` in the README example (@marekm4)
 - Fixed docstring signature for sum_independent_dims (@stagoverflow)
+- Updated docstring description for ``log_interval`` in the base class (@rushitnshah).  
 
 Release 2.2.1 (2023-11-17)
 --------------------------
@@ -1566,4 +1567,4 @@ And all the contributors:
 @anand-bala @hughperkins @sidney-tio @AlexPasqua @dominicgkerr @Akhilez @Rocamonde @tobirohrer @ZikangXiong @ReHoss
 @DavyMorgan @luizapozzobon @Bonifatius94 @theSquaredError @harveybellini @DavyMorgan @FieteO @jonasreiher @npit @WeberSamuel @troiganto
 @lutogniew @lbergmann1 @lukashass @BertrandDecoster @pseudo-rnd-thoughts @stefanbschneider @kyle-he @PatrickHelm @corentinlger
-@marekm4 @stagoverflow
+@marekm4 @stagoverflow @rushitnshah
diff --git a/stable_baselines3/common/base_class.py b/stable_baselines3/common/base_class.py
index 5e87599..e6c7d3c 100644
--- a/stable_baselines3/common/base_class.py
+++ b/stable_baselines3/common/base_class.py
@@ -523,7 +523,10 @@ class BaseAlgorithm(ABC):
 
         :param total_timesteps: The total number of samples (env steps) to train on
         :param callback: callback(s) called at every step with state of the algorithm.
-        :param log_interval: The number of episodes before logging.
+        :param log_interval: for on-policy algos (e.g., PPO, A2C, ...) this is the number of
+            training iterations (i.e., log_interval * n_steps * n_envs timesteps) before logging;
+            for off-policy algos (e.g., TD3, SAC, ...) this is the number of episodes before
+            logging.
         :param tb_log_name: the name of the run for TensorBoard logging
         :param reset_num_timesteps: whether or not to reset the current timestep number (used in logging)
         :param progress_bar: Display a progress bar using tqdm and rich.