diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst index 5176c69..e67e5ed 100644 --- a/docs/misc/changelog.rst +++ b/docs/misc/changelog.rst @@ -66,6 +66,7 @@ Documentation: - Added video link to "Practical Tips for Reliable Reinforcement Learning" video - Added ``render_mode="human"`` in the README example (@marekm4) - Fixed docstring signature for sum_independent_dims (@stagoverflow) +- Updated docstring description for ``log_interval`` in the base class (@rushitnshah). Release 2.2.1 (2023-11-17) -------------------------- @@ -1566,4 +1567,4 @@ And all the contributors: @anand-bala @hughperkins @sidney-tio @AlexPasqua @dominicgkerr @Akhilez @Rocamonde @tobirohrer @ZikangXiong @ReHoss @DavyMorgan @luizapozzobon @Bonifatius94 @theSquaredError @harveybellini @DavyMorgan @FieteO @jonasreiher @npit @WeberSamuel @troiganto @lutogniew @lbergmann1 @lukashass @BertrandDecoster @pseudo-rnd-thoughts @stefanbschneider @kyle-he @PatrickHelm @corentinlger -@marekm4 @stagoverflow +@marekm4 @stagoverflow @rushitnshah diff --git a/stable_baselines3/common/base_class.py b/stable_baselines3/common/base_class.py index 5e87599..e6c7d3c 100644 --- a/stable_baselines3/common/base_class.py +++ b/stable_baselines3/common/base_class.py @@ -523,7 +523,10 @@ class BaseAlgorithm(ABC): :param total_timesteps: The total number of samples (env steps) to train on :param callback: callback(s) called at every step with state of the algorithm. - :param log_interval: The number of episodes before logging. + :param log_interval: for on-policy algos (e.g., PPO, A2C, ...) this is the number of + training iterations (i.e., log_interval * n_steps * n_envs timesteps) before logging; + for off-policy algos (e.g., TD3, SAC, ...) this is the number of episodes before + logging. :param tb_log_name: the name of the run for TensorBoard logging :param reset_num_timesteps: whether or not to reset the current timestep number (used in logging) :param progress_bar: Display a progress bar using tqdm and rich.