Clarify and standardize verbosity documentation (#1056)

* Standardize the use of verbosity: > to >= * Make verbose docstring more specific * Update changelog
2026-06-30 03:38:13 +00:00 · 2022-09-09 16:46:28 +02:00 · 2022-09-09 16:46:28 +02:00 · 98e786f744
commit 98e786f744
parent 29f6687b98
15 changed files with 57 additions and 45 deletions
--- a/docs/guide/callbacks.rst
+++ b/docs/guide/callbacks.rst
@ -27,7 +27,7 @@ You can find two examples of custom callbacks in the documentation: one for savi
        """
        A custom callback that derives from ``BaseCallback``.

-        :param verbose: (int) Verbosity level 0: not output 1: info 2: debug
+        :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
        """
        def __init__(self, verbose=0):
            super(CustomCallback, self).__init__(verbose)
@ -121,7 +121,7 @@ A child callback is for instance :ref:`StopTrainingOnRewardThreshold <StopTraini

        :param callback: (Optional[BaseCallback]) Callback that will be called
            when an event is triggered.
-        :param verbose: (int)
+        :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
        """
        def __init__(self, callback: Optional[BaseCallback] = None, verbose: int = 0):
            super(EventCallback, self).__init__(verbose=verbose)
--- a/docs/guide/examples.rst
+++ b/docs/guide/examples.rst
@ -248,7 +248,7 @@ If your callback returns False, training is aborted early.
      :param check_freq:
      :param log_dir: Path to the folder where the model will be saved.
        It must contains the file created by the ``Monitor`` wrapper.
-      :param verbose: Verbosity level.
+      :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
      """
      def __init__(self, check_freq: int, log_dir: str, verbose: int = 1):
          super(SaveOnBestTrainingRewardCallback, self).__init__(verbose)
@ -270,7 +270,7 @@ If your callback returns False, training is aborted early.
            if len(x) > 0:
                # Mean training reward over the last 100 episodes
                mean_reward = np.mean(y[-100:])
-                if self.verbose > 0:
+                if self.verbose >= 1:
                  print(f"Num timesteps: {self.num_timesteps}")
                  print(f"Best mean reward: {self.best_mean_reward:.2f} - Last mean reward per episode: {mean_reward:.2f}")

@ -278,7 +278,7 @@ If your callback returns False, training is aborted early.
                if mean_reward > self.best_mean_reward:
                    self.best_mean_reward = mean_reward
                    # Example for saving best model
-                    if self.verbose > 0:
+                    if self.verbose >= 1:
                      print(f"Saving new best model to {self.save_path}")
                    self.model.save(self.save_path)

--- a/docs/misc/changelog.rst
+++ b/docs/misc/changelog.rst
@ -46,6 +46,8 @@ Documentation:
 - Added info on split tensorboard logs into (@Melanol)
 - Fixed typo in ppo doc (@francescoluciano)
 - Fixed typo in install doc(@jlp-ue)
+- Clarified and standardized verbosity documentation
+

 Release 1.6.0 (2022-07-11)
 ---------------------------
--- a/stable_baselines3/a2c/a2c.py
+++ b/stable_baselines3/a2c/a2c.py
@ -44,7 +44,8 @@ class A2C(OnPolicyAlgorithm):
    :param create_eval_env: Whether to create a second environment that will be
        used for evaluating the agent periodically. (Only available when passing string for the environment)
    :param policy_kwargs: additional arguments to be passed to the policy on creation
-    :param verbose: the verbosity level: 0 no output, 1 info, 2 debug
+    :param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for
+        debug messages
    :param seed: Seed for the pseudo random generators
    :param device: Device (cpu, cuda, ...) on which the code should be run.
        Setting it to auto, the code will be run on the GPU if possible.
--- a/stable_baselines3/common/base_class.py
+++ b/stable_baselines3/common/base_class.py
@ -43,7 +43,7 @@ def maybe_make_env(env: Union[GymEnv, str, None], verbose: int) -> Optional[GymE
    """If env is a string, make the environment; otherwise, return env.

    :param env: The environment to learn from.
-    :param verbose: logging verbosity
+    :param verbose: Verbosity level: 0 for no output, 1 for indicating if envrironment is created
    :return A Gym (vector) environment.
    """
    if isinstance(env, str):
@ -64,7 +64,8 @@ class BaseAlgorithm(ABC):
        it can be a function of the current progress remaining (from 1 to 0)
    :param policy_kwargs: Additional arguments to be passed to the policy on creation
    :param tensorboard_log: the log location for tensorboard (if None, no logging)
-    :param verbose: The verbosity level: 0 none, 1 training information, 2 debug
+    :param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for
+        debug messages
    :param device: Device on which the code should run.
        By default, it will try to use a Cuda compatible device and fallback to cpu
        if it is not possible.
@ -108,7 +109,7 @@ class BaseAlgorithm(ABC):
            self.policy_class = policy

        self.device = get_device(device)
-        if verbose > 0:
+        if verbose >= 1:
            print(f"Using {self.device} device")

        self.env = None  # type: Optional[GymEnv]
@ -198,7 +199,7 @@ class BaseAlgorithm(ABC):
        or to re-order the image channels.

        :param env:
-        :param verbose:
+        :param verbose: Verbosity level: 0 for no output, 1 for indicating wrappers used
        :param monitor_wrapper: Whether to wrap the env in a ``Monitor`` when possible.
        :return: The wrapped environment.
        """
--- a/stable_baselines3/common/callbacks.py
+++ b/stable_baselines3/common/callbacks.py
@ -15,8 +15,7 @@ class BaseCallback(ABC):
    """
    Base class for callback.

-    :param verbose: Verbosity of the output (set to 1 for info messages,
-        2 for debug)
+    :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
    """

    def __init__(self, verbose: int = 0):
@ -124,7 +123,7 @@ class EventCallback(BaseCallback):

    :param callback: Callback that will be called
        when an event is triggered.
-    :param verbose:
+    :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
    """

    def __init__(self, callback: Optional[BaseCallback] = None, verbose: int = 0):
@ -231,7 +230,7 @@ class CheckpointCallback(BaseCallback):
    :param name_prefix: Common prefix to the saved models
    :param save_replay_buffer: Save the model replay buffer
    :param save_vecnormalize: Save the ``VecNormalize`` statistics
-    :param verbose: Verbosity of the output (set to 2 for debug messages)
+    :param verbose: Verbosity level: 0 for no output, 2 for indicating when saving model checkpoint
    """

    def __init__(
@ -270,7 +269,7 @@ class CheckpointCallback(BaseCallback):
        if self.n_calls % self.save_freq == 0:
            model_path = self._checkpoint_path(extension="zip")
            self.model.save(model_path)
-            if self.verbose > 1:
+            if self.verbose >= 2:
                print(f"Saving model checkpoint to {model_path}")

            if self.save_replay_buffer and hasattr(self.model, "replay_buffer") and self.model.replay_buffer is not None:
@ -284,7 +283,7 @@ class CheckpointCallback(BaseCallback):
                # Save the VecNormalize statistics
                vec_normalize_path = self._checkpoint_path("vecnormalize_", extension="pkl")
                self.model.get_vec_normalize_env().save(vec_normalize_path)
-                if self.verbose > 1:
+                if self.verbose >= 2:
                    print(f"Saving model VecNormalize to {vec_normalize_path}")

        return True
@ -295,7 +294,7 @@ class ConvertCallback(BaseCallback):
    Convert functional callback (old-style) to object.

    :param callback:
-    :param verbose:
+    :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
    """

    def __init__(self, callback: Callable[[Dict[str, Any], Dict[str, Any]], bool], verbose: int = 0):
@ -331,7 +330,7 @@ class EvalCallback(EventCallback):
    :param deterministic: Whether the evaluation should
        use a stochastic or deterministic actions.
    :param render: Whether to render or not the environment during evaluation
-    :param verbose:
+    :param verbose: Verbosity level: 0 for no output, 1 for indicating information about evaluation results
    :param warn: Passed to ``evaluate_policy`` (warns if ``eval_env`` has not been
        wrapped with a Monitor wrapper)
    """
@ -467,7 +466,7 @@ class EvalCallback(EventCallback):
            mean_ep_length, std_ep_length = np.mean(episode_lengths), np.std(episode_lengths)
            self.last_mean_reward = mean_reward

-            if self.verbose > 0:
+            if self.verbose >= 1:
                print(f"Eval num_timesteps={self.num_timesteps}, " f"episode_reward={mean_reward:.2f} +/- {std_reward:.2f}")
                print(f"Episode length: {mean_ep_length:.2f} +/- {std_ep_length:.2f}")
            # Add to current Logger
@ -476,7 +475,7 @@ class EvalCallback(EventCallback):

            if len(self._is_success_buffer) > 0:
                success_rate = np.mean(self._is_success_buffer)
-                if self.verbose > 0:
+                if self.verbose >= 1:
                    print(f"Success rate: {100 * success_rate:.2f}%")
                self.logger.record("eval/success_rate", success_rate)

@ -485,7 +484,7 @@ class EvalCallback(EventCallback):
            self.logger.dump(self.num_timesteps)

            if mean_reward > self.best_mean_reward:
-                if self.verbose > 0:
+                if self.verbose >= 1:
                    print("New best mean reward!")
                if self.best_model_save_path is not None:
                    self.model.save(os.path.join(self.best_model_save_path, "best_model"))
@ -519,7 +518,8 @@ class StopTrainingOnRewardThreshold(BaseCallback):

    :param reward_threshold:  Minimum expected reward per episode
        to stop training.
-    :param verbose:
+    :param verbose: Verbosity level: 0 for no output, 1 for indicating when training ended because episodic reward
+        threshold reached
    """

    def __init__(self, reward_threshold: float, verbose: int = 0):
@ -530,7 +530,7 @@ class StopTrainingOnRewardThreshold(BaseCallback):
        assert self.parent is not None, "``StopTrainingOnMinimumReward`` callback must be used " "with an ``EvalCallback``"
        # Convert np.bool_ to bool, otherwise callback() is False won't work
        continue_training = bool(self.parent.best_mean_reward < self.reward_threshold)
-        if self.verbose > 0 and not continue_training:
+        if self.verbose >= 1 and not continue_training:
            print(
                f"Stopping training because the mean reward {self.parent.best_mean_reward:.2f} "
                f" is above the threshold {self.reward_threshold}"
@ -567,7 +567,8 @@ class StopTrainingOnMaxEpisodes(BaseCallback):
    and in total for ``max_episodes * n_envs`` episodes.

    :param max_episodes: Maximum number of episodes to stop training.
-    :param verbose: Select whether to print information about when training ended by reaching ``max_episodes``
+    :param verbose: Verbosity level: 0 for no output, 1 for indicating information about when training ended by
+        reaching ``max_episodes``
    """

    def __init__(self, max_episodes: int, verbose: int = 0):
@ -587,7 +588,7 @@ class StopTrainingOnMaxEpisodes(BaseCallback):

        continue_training = self.n_episodes < self._total_max_episodes

-        if self.verbose > 0 and not continue_training:
+        if self.verbose >= 1 and not continue_training:
            mean_episodes_per_env = self.n_episodes / self.training_env.num_envs
            mean_ep_str = (
                f"with an average of {mean_episodes_per_env:.2f} episodes per env" if self.training_env.num_envs > 1 else ""
@ -612,7 +613,7 @@ class StopTrainingOnNoModelImprovement(BaseCallback):

    :param max_no_improvement_evals: Maximum number of consecutive evaluations without a new best model.
    :param min_evals: Number of evaluations before start to count evaluations without improvements.
-    :param verbose: Verbosity of the output (set to 1 for info messages)
+    :param verbose: Verbosity level: 0 for no output, 1 for indicating when training ended because no new best model
    """

    def __init__(self, max_no_improvement_evals: int, min_evals: int = 0, verbose: int = 0):
@ -637,7 +638,7 @@ class StopTrainingOnNoModelImprovement(BaseCallback):

        self.last_best_mean_reward = self.parent.best_mean_reward

-        if self.verbose > 0 and not continue_training:
+        if self.verbose >= 1 and not continue_training:
            print(
                f"Stopping training because there was no new best model in the last {self.no_improvement_evals:d} evaluations"
            )
--- a/stable_baselines3/common/off_policy_algorithm.py
+++ b/stable_baselines3/common/off_policy_algorithm.py
@ -51,7 +51,8 @@ class OffPolicyAlgorithm(BaseAlgorithm):
        See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195
    :param policy_kwargs: Additional arguments to be passed to the policy on creation
    :param tensorboard_log: the log location for tensorboard (if None, no logging)
-    :param verbose: The verbosity level: 0 none, 1 training information, 2 debug
+    :param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for
+        debug messages
    :param device: Device on which the code should run.
        By default, it will try to use a Cuda compatible device and fallback to cpu
        if it is not possible.
--- a/stable_baselines3/common/on_policy_algorithm.py
+++ b/stable_baselines3/common/on_policy_algorithm.py
@ -41,7 +41,8 @@ class OnPolicyAlgorithm(BaseAlgorithm):
    :param monitor_wrapper: When creating an environment, whether to wrap it
        or not in a Monitor wrapper.
    :param policy_kwargs: additional arguments to be passed to the policy on creation
-    :param verbose: the verbosity level: 0 no output, 1 info, 2 debug
+    :param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for
+        debug messages
    :param seed: Seed for the pseudo random generators
    :param device: Device (cpu, cuda, ...) on which the code should be run.
        Setting it to auto, the code will be run on the GPU if possible.
--- a/stable_baselines3/common/save_util.py
+++ b/stable_baselines3/common/save_util.py
@ -186,14 +186,14 @@ def open_path(path: Union[str, pathlib.Path, io.BufferedIOBase], mode: str, verb
    If the provided path is a string or a pathlib.Path, it ensures that it exists. If the mode is "read"
    it checks that it exists, if it doesn't exist it attempts to read path.suffix if a suffix is provided.
    If the mode is "write" and the path does not exist, it creates all the parent folders. If the path
-    points to a folder, it changes the path to path_2. If the path already exists and verbose == 2,
+    points to a folder, it changes the path to path_2. If the path already exists and verbose >= 2,
    it raises a warning.

    :param path: the path to open.
        if save_path is a str or pathlib.Path and mode is "w", single dispatch ensures that the
        path actually exists. If path is a io.BufferedIOBase the path exists.
    :param mode: how to open the file. "w"|"write" for writing, "r"|"read" for reading.
-    :param verbose: Verbosity level, 0 means only warnings, 2 means debug information.
+    :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
    :param suffix: The preferred suffix. If mode is "w" then the opened file has the suffix.
        If mode is "r" then we attempt to open the path. If an error is raised and the suffix
        is not None, we attempt to open the path with the suffix.
@ -223,7 +223,7 @@ def open_path_str(path: str, mode: str, verbose: int = 0, suffix: Optional[str]
    :param path: the path to open. If mode is "w" then it ensures that the path exists
        by creating the necessary folders and renaming path if it points to a folder.
    :param mode: how to open the file. "w" for writing, "r" for reading.
-    :param verbose: Verbosity level, 0 means only warnings, 2 means debug information.
+    :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
    :param suffix: The preferred suffix. If mode is "w" then the opened file has the suffix.
        If mode is "r" then we attempt to open the path. If an error is raised and the suffix
        is not None, we attempt to open the path with the suffix.
@ -242,7 +242,7 @@ def open_path_pathlib(path: pathlib.Path, mode: str, verbose: int = 0, suffix: O
        ensures that the path exists by creating the necessary folders and
        renaming path if it points to a folder.
    :param mode: how to open the file. "w" for writing, "r" for reading.
-    :param verbose: Verbosity level, 0 means only warnings, 2 means debug information.
+    :param verbose: Verbosity level: 0 for no output, 2 for indicating if path without suffix is not found when mode is "r"
    :param suffix: The preferred suffix. If mode is "w" then the opened file has the suffix.
        If mode is "r" then we attempt to open the path. If an error is raised and the suffix
        is not None, we attempt to open the path with the suffix.
@ -257,7 +257,7 @@ def open_path_pathlib(path: pathlib.Path, mode: str, verbose: int = 0, suffix: O
        except FileNotFoundError as error:
            if suffix is not None and suffix != "":
                newpath = pathlib.Path(f"{path}.{suffix}")
-                if verbose == 2:
+                if verbose >= 2:
                    warnings.warn(f"Path '{path}' not found. Attempting {newpath}.")
                path, suffix = newpath, None
            else:
@ -266,7 +266,7 @@ def open_path_pathlib(path: pathlib.Path, mode: str, verbose: int = 0, suffix: O
        try:
            if path.suffix == "" and suffix is not None and suffix != "":
                path = pathlib.Path(f"{path}.{suffix}")
-            if path.exists() and path.is_file() and verbose == 2:
+            if path.exists() and path.is_file() and verbose >= 2:
                warnings.warn(f"Path '{path}' exists, will overwrite it.")
            path = path.open("wb")
        except IsADirectoryError:
@ -300,7 +300,7 @@ def save_to_zip_file(
    :param params: Model parameters being stored expected to contain an entry for every
                   state_dict with its name and the state_dict.
    :param pytorch_variables: Other PyTorch variables expected to contain name and value of the variable.
-    :param verbose: Verbosity level, 0 means only warnings, 2 means debug information
+    :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
    """
    save_path = open_path(save_path, "w", verbose=0, suffix="zip")
    # data/params can be None, so do not
@ -336,7 +336,7 @@ def save_to_pkl(path: Union[str, pathlib.Path, io.BufferedIOBase], obj: Any, ver
        if save_path is a str or pathlib.Path and mode is "w", single dispatch ensures that the
        path actually exists. If path is a io.BufferedIOBase the path exists.
    :param obj: The object to save.
-    :param verbose: Verbosity level, 0 means only warnings, 2 means debug information.
+    :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
    """
    with open_path(path, "w", verbose=verbose, suffix="pkl") as file_handler:
        # Use protocol>=4 to support saving replay buffers >= 4Gb
@ -352,7 +352,7 @@ def load_from_pkl(path: Union[str, pathlib.Path, io.BufferedIOBase], verbose: in
    :param path: the path to open.
        if save_path is a str or pathlib.Path and mode is "w", single dispatch ensures that the
        path actually exists. If path is a io.BufferedIOBase the path exists.
-    :param verbose: Verbosity level, 0 means only warnings, 2 means debug information.
+    :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
    """
    with open_path(path, "r", verbose=verbose, suffix="pkl") as file_handler:
        return pickle.load(file_handler)
@ -379,7 +379,7 @@ def load_from_zip_file(
        ``keras.models.load_model``. Useful when you have an object in
        file that can not be deserialized.
    :param device: Device on which the code should run.
-    :param verbose: Verbosity level, 0 means only warnings, 2 means debug information.
+    :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
    :param print_system_info: Whether to print or not the system info
        about the saved model.
    :return: Class parameters, model state_dicts (aka "params", dict of state_dict)
--- a/stable_baselines3/common/utils.py
+++ b/stable_baselines3/common/utils.py
@ -182,7 +182,7 @@ def configure_logger(
    """
    Configure the logger's outputs.

-    :param verbose: the verbosity level: 0 no output, 1 info, 2 debug
+    :param verbose: Verbosity level: 0 for no output, 1 for the standard output to be part of the logger outputs
    :param tensorboard_log: the log location for tensorboard (if None, no logging)
    :param tb_log_name: tensorboard log
    :param reset_num_timesteps:  Whether the ``num_timesteps`` attribute is reset or not.
--- a/stable_baselines3/ddpg/ddpg.py
+++ b/stable_baselines3/ddpg/ddpg.py
@ -46,7 +46,8 @@ class DDPG(TD3):
    :param create_eval_env: Whether to create a second environment that will be
        used for evaluating the agent periodically. (Only available when passing string for the environment)
    :param policy_kwargs: additional arguments to be passed to the policy on creation
-    :param verbose: the verbosity level: 0 no output, 1 info, 2 debug
+    :param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for
+        debug messages
    :param seed: Seed for the pseudo random generators
    :param device: Device (cpu, cuda, ...) on which the code should be run.
        Setting it to auto, the code will be run on the GPU if possible.
--- a/stable_baselines3/dqn/dqn.py
+++ b/stable_baselines3/dqn/dqn.py
@ -53,7 +53,8 @@ class DQN(OffPolicyAlgorithm):
    :param create_eval_env: Whether to create a second environment that will be
        used for evaluating the agent periodically. (Only available when passing string for the environment)
    :param policy_kwargs: additional arguments to be passed to the policy on creation
-    :param verbose: the verbosity level: 0 no output, 1 info, 2 debug
+    :param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for
+        debug messages
    :param seed: Seed for the pseudo random generators
    :param device: Device (cpu, cuda, ...) on which the code should be run.
        Setting it to auto, the code will be run on the GPU if possible.
--- a/stable_baselines3/ppo/ppo.py
+++ b/stable_baselines3/ppo/ppo.py
@ -58,7 +58,8 @@ class PPO(OnPolicyAlgorithm):
    :param create_eval_env: Whether to create a second environment that will be
        used for evaluating the agent periodically. (Only available when passing string for the environment)
    :param policy_kwargs: additional arguments to be passed to the policy on creation
-    :param verbose: the verbosity level: 0 no output, 1 info, 2 debug
+    :param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for
+        debug messages
    :param seed: Seed for the pseudo random generators
    :param device: Device (cpu, cuda, ...) on which the code should be run.
        Setting it to auto, the code will be run on the GPU if possible.
--- a/stable_baselines3/sac/sac.py
+++ b/stable_baselines3/sac/sac.py
@ -66,7 +66,8 @@ class SAC(OffPolicyAlgorithm):
    :param create_eval_env: Whether to create a second environment that will be
        used for evaluating the agent periodically. (Only available when passing string for the environment)
    :param policy_kwargs: additional arguments to be passed to the policy on creation
-    :param verbose: the verbosity level: 0 no output, 1 info, 2 debug
+    :param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for
+        debug messages
    :param seed: Seed for the pseudo random generators
    :param device: Device (cpu, cuda, ...) on which the code should be run.
        Setting it to auto, the code will be run on the GPU if possible.
--- a/stable_baselines3/td3/td3.py
+++ b/stable_baselines3/td3/td3.py
@ -54,7 +54,8 @@ class TD3(OffPolicyAlgorithm):
    :param create_eval_env: Whether to create a second environment that will be
        used for evaluating the agent periodically. (Only available when passing string for the environment)
    :param policy_kwargs: additional arguments to be passed to the policy on creation
-    :param verbose: the verbosity level: 0 no output, 1 info, 2 debug
+    :param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for
+        debug messages
    :param seed: Seed for the pseudo random generators
    :param device: Device (cpu, cuda, ...) on which the code should be run.
        Setting it to auto, the code will be run on the GPU if possible.