diff --git a/docs/guide/callbacks.rst b/docs/guide/callbacks.rst index 7e22bbf..07b78d7 100644 --- a/docs/guide/callbacks.rst +++ b/docs/guide/callbacks.rst @@ -27,7 +27,7 @@ You can find two examples of custom callbacks in the documentation: one for savi """ A custom callback that derives from ``BaseCallback``. - :param verbose: (int) Verbosity level 0: not output 1: info 2: debug + :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages """ def __init__(self, verbose=0): super(CustomCallback, self).__init__(verbose) @@ -121,7 +121,7 @@ A child callback is for instance :ref:`StopTrainingOnRewardThreshold 0: # Mean training reward over the last 100 episodes mean_reward = np.mean(y[-100:]) - if self.verbose > 0: + if self.verbose >= 1: print(f"Num timesteps: {self.num_timesteps}") print(f"Best mean reward: {self.best_mean_reward:.2f} - Last mean reward per episode: {mean_reward:.2f}") @@ -278,7 +278,7 @@ If your callback returns False, training is aborted early. if mean_reward > self.best_mean_reward: self.best_mean_reward = mean_reward # Example for saving best model - if self.verbose > 0: + if self.verbose >= 1: print(f"Saving new best model to {self.save_path}") self.model.save(self.save_path) diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst index 714e910..f0f9d5a 100644 --- a/docs/misc/changelog.rst +++ b/docs/misc/changelog.rst @@ -46,6 +46,8 @@ Documentation: - Added info on split tensorboard logs into (@Melanol) - Fixed typo in ppo doc (@francescoluciano) - Fixed typo in install doc(@jlp-ue) +- Clarified and standardized verbosity documentation + Release 1.6.0 (2022-07-11) --------------------------- diff --git a/stable_baselines3/a2c/a2c.py b/stable_baselines3/a2c/a2c.py index 13adf68..8058f52 100644 --- a/stable_baselines3/a2c/a2c.py +++ b/stable_baselines3/a2c/a2c.py @@ -44,7 +44,8 @@ class A2C(OnPolicyAlgorithm): :param create_eval_env: Whether to create a second environment that will be used for evaluating the agent periodically. (Only available when passing string for the environment) :param policy_kwargs: additional arguments to be passed to the policy on creation - :param verbose: the verbosity level: 0 no output, 1 info, 2 debug + :param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for + debug messages :param seed: Seed for the pseudo random generators :param device: Device (cpu, cuda, ...) on which the code should be run. Setting it to auto, the code will be run on the GPU if possible. diff --git a/stable_baselines3/common/base_class.py b/stable_baselines3/common/base_class.py index 9445ee4..72428e0 100644 --- a/stable_baselines3/common/base_class.py +++ b/stable_baselines3/common/base_class.py @@ -43,7 +43,7 @@ def maybe_make_env(env: Union[GymEnv, str, None], verbose: int) -> Optional[GymE """If env is a string, make the environment; otherwise, return env. :param env: The environment to learn from. - :param verbose: logging verbosity + :param verbose: Verbosity level: 0 for no output, 1 for indicating if envrironment is created :return A Gym (vector) environment. """ if isinstance(env, str): @@ -64,7 +64,8 @@ class BaseAlgorithm(ABC): it can be a function of the current progress remaining (from 1 to 0) :param policy_kwargs: Additional arguments to be passed to the policy on creation :param tensorboard_log: the log location for tensorboard (if None, no logging) - :param verbose: The verbosity level: 0 none, 1 training information, 2 debug + :param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for + debug messages :param device: Device on which the code should run. By default, it will try to use a Cuda compatible device and fallback to cpu if it is not possible. @@ -108,7 +109,7 @@ class BaseAlgorithm(ABC): self.policy_class = policy self.device = get_device(device) - if verbose > 0: + if verbose >= 1: print(f"Using {self.device} device") self.env = None # type: Optional[GymEnv] @@ -198,7 +199,7 @@ class BaseAlgorithm(ABC): or to re-order the image channels. :param env: - :param verbose: + :param verbose: Verbosity level: 0 for no output, 1 for indicating wrappers used :param monitor_wrapper: Whether to wrap the env in a ``Monitor`` when possible. :return: The wrapped environment. """ diff --git a/stable_baselines3/common/callbacks.py b/stable_baselines3/common/callbacks.py index a2abd44..dfbf0ea 100644 --- a/stable_baselines3/common/callbacks.py +++ b/stable_baselines3/common/callbacks.py @@ -15,8 +15,7 @@ class BaseCallback(ABC): """ Base class for callback. - :param verbose: Verbosity of the output (set to 1 for info messages, - 2 for debug) + :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages """ def __init__(self, verbose: int = 0): @@ -124,7 +123,7 @@ class EventCallback(BaseCallback): :param callback: Callback that will be called when an event is triggered. - :param verbose: + :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages """ def __init__(self, callback: Optional[BaseCallback] = None, verbose: int = 0): @@ -231,7 +230,7 @@ class CheckpointCallback(BaseCallback): :param name_prefix: Common prefix to the saved models :param save_replay_buffer: Save the model replay buffer :param save_vecnormalize: Save the ``VecNormalize`` statistics - :param verbose: Verbosity of the output (set to 2 for debug messages) + :param verbose: Verbosity level: 0 for no output, 2 for indicating when saving model checkpoint """ def __init__( @@ -270,7 +269,7 @@ class CheckpointCallback(BaseCallback): if self.n_calls % self.save_freq == 0: model_path = self._checkpoint_path(extension="zip") self.model.save(model_path) - if self.verbose > 1: + if self.verbose >= 2: print(f"Saving model checkpoint to {model_path}") if self.save_replay_buffer and hasattr(self.model, "replay_buffer") and self.model.replay_buffer is not None: @@ -284,7 +283,7 @@ class CheckpointCallback(BaseCallback): # Save the VecNormalize statistics vec_normalize_path = self._checkpoint_path("vecnormalize_", extension="pkl") self.model.get_vec_normalize_env().save(vec_normalize_path) - if self.verbose > 1: + if self.verbose >= 2: print(f"Saving model VecNormalize to {vec_normalize_path}") return True @@ -295,7 +294,7 @@ class ConvertCallback(BaseCallback): Convert functional callback (old-style) to object. :param callback: - :param verbose: + :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages """ def __init__(self, callback: Callable[[Dict[str, Any], Dict[str, Any]], bool], verbose: int = 0): @@ -331,7 +330,7 @@ class EvalCallback(EventCallback): :param deterministic: Whether the evaluation should use a stochastic or deterministic actions. :param render: Whether to render or not the environment during evaluation - :param verbose: + :param verbose: Verbosity level: 0 for no output, 1 for indicating information about evaluation results :param warn: Passed to ``evaluate_policy`` (warns if ``eval_env`` has not been wrapped with a Monitor wrapper) """ @@ -467,7 +466,7 @@ class EvalCallback(EventCallback): mean_ep_length, std_ep_length = np.mean(episode_lengths), np.std(episode_lengths) self.last_mean_reward = mean_reward - if self.verbose > 0: + if self.verbose >= 1: print(f"Eval num_timesteps={self.num_timesteps}, " f"episode_reward={mean_reward:.2f} +/- {std_reward:.2f}") print(f"Episode length: {mean_ep_length:.2f} +/- {std_ep_length:.2f}") # Add to current Logger @@ -476,7 +475,7 @@ class EvalCallback(EventCallback): if len(self._is_success_buffer) > 0: success_rate = np.mean(self._is_success_buffer) - if self.verbose > 0: + if self.verbose >= 1: print(f"Success rate: {100 * success_rate:.2f}%") self.logger.record("eval/success_rate", success_rate) @@ -485,7 +484,7 @@ class EvalCallback(EventCallback): self.logger.dump(self.num_timesteps) if mean_reward > self.best_mean_reward: - if self.verbose > 0: + if self.verbose >= 1: print("New best mean reward!") if self.best_model_save_path is not None: self.model.save(os.path.join(self.best_model_save_path, "best_model")) @@ -519,7 +518,8 @@ class StopTrainingOnRewardThreshold(BaseCallback): :param reward_threshold: Minimum expected reward per episode to stop training. - :param verbose: + :param verbose: Verbosity level: 0 for no output, 1 for indicating when training ended because episodic reward + threshold reached """ def __init__(self, reward_threshold: float, verbose: int = 0): @@ -530,7 +530,7 @@ class StopTrainingOnRewardThreshold(BaseCallback): assert self.parent is not None, "``StopTrainingOnMinimumReward`` callback must be used " "with an ``EvalCallback``" # Convert np.bool_ to bool, otherwise callback() is False won't work continue_training = bool(self.parent.best_mean_reward < self.reward_threshold) - if self.verbose > 0 and not continue_training: + if self.verbose >= 1 and not continue_training: print( f"Stopping training because the mean reward {self.parent.best_mean_reward:.2f} " f" is above the threshold {self.reward_threshold}" @@ -567,7 +567,8 @@ class StopTrainingOnMaxEpisodes(BaseCallback): and in total for ``max_episodes * n_envs`` episodes. :param max_episodes: Maximum number of episodes to stop training. - :param verbose: Select whether to print information about when training ended by reaching ``max_episodes`` + :param verbose: Verbosity level: 0 for no output, 1 for indicating information about when training ended by + reaching ``max_episodes`` """ def __init__(self, max_episodes: int, verbose: int = 0): @@ -587,7 +588,7 @@ class StopTrainingOnMaxEpisodes(BaseCallback): continue_training = self.n_episodes < self._total_max_episodes - if self.verbose > 0 and not continue_training: + if self.verbose >= 1 and not continue_training: mean_episodes_per_env = self.n_episodes / self.training_env.num_envs mean_ep_str = ( f"with an average of {mean_episodes_per_env:.2f} episodes per env" if self.training_env.num_envs > 1 else "" @@ -612,7 +613,7 @@ class StopTrainingOnNoModelImprovement(BaseCallback): :param max_no_improvement_evals: Maximum number of consecutive evaluations without a new best model. :param min_evals: Number of evaluations before start to count evaluations without improvements. - :param verbose: Verbosity of the output (set to 1 for info messages) + :param verbose: Verbosity level: 0 for no output, 1 for indicating when training ended because no new best model """ def __init__(self, max_no_improvement_evals: int, min_evals: int = 0, verbose: int = 0): @@ -637,7 +638,7 @@ class StopTrainingOnNoModelImprovement(BaseCallback): self.last_best_mean_reward = self.parent.best_mean_reward - if self.verbose > 0 and not continue_training: + if self.verbose >= 1 and not continue_training: print( f"Stopping training because there was no new best model in the last {self.no_improvement_evals:d} evaluations" ) diff --git a/stable_baselines3/common/off_policy_algorithm.py b/stable_baselines3/common/off_policy_algorithm.py index ebac818..d2574ed 100644 --- a/stable_baselines3/common/off_policy_algorithm.py +++ b/stable_baselines3/common/off_policy_algorithm.py @@ -51,7 +51,8 @@ class OffPolicyAlgorithm(BaseAlgorithm): See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195 :param policy_kwargs: Additional arguments to be passed to the policy on creation :param tensorboard_log: the log location for tensorboard (if None, no logging) - :param verbose: The verbosity level: 0 none, 1 training information, 2 debug + :param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for + debug messages :param device: Device on which the code should run. By default, it will try to use a Cuda compatible device and fallback to cpu if it is not possible. diff --git a/stable_baselines3/common/on_policy_algorithm.py b/stable_baselines3/common/on_policy_algorithm.py index 84c89d9..a9c0a41 100644 --- a/stable_baselines3/common/on_policy_algorithm.py +++ b/stable_baselines3/common/on_policy_algorithm.py @@ -41,7 +41,8 @@ class OnPolicyAlgorithm(BaseAlgorithm): :param monitor_wrapper: When creating an environment, whether to wrap it or not in a Monitor wrapper. :param policy_kwargs: additional arguments to be passed to the policy on creation - :param verbose: the verbosity level: 0 no output, 1 info, 2 debug + :param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for + debug messages :param seed: Seed for the pseudo random generators :param device: Device (cpu, cuda, ...) on which the code should be run. Setting it to auto, the code will be run on the GPU if possible. diff --git a/stable_baselines3/common/save_util.py b/stable_baselines3/common/save_util.py index 1569001..90392df 100644 --- a/stable_baselines3/common/save_util.py +++ b/stable_baselines3/common/save_util.py @@ -186,14 +186,14 @@ def open_path(path: Union[str, pathlib.Path, io.BufferedIOBase], mode: str, verb If the provided path is a string or a pathlib.Path, it ensures that it exists. If the mode is "read" it checks that it exists, if it doesn't exist it attempts to read path.suffix if a suffix is provided. If the mode is "write" and the path does not exist, it creates all the parent folders. If the path - points to a folder, it changes the path to path_2. If the path already exists and verbose == 2, + points to a folder, it changes the path to path_2. If the path already exists and verbose >= 2, it raises a warning. :param path: the path to open. if save_path is a str or pathlib.Path and mode is "w", single dispatch ensures that the path actually exists. If path is a io.BufferedIOBase the path exists. :param mode: how to open the file. "w"|"write" for writing, "r"|"read" for reading. - :param verbose: Verbosity level, 0 means only warnings, 2 means debug information. + :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages :param suffix: The preferred suffix. If mode is "w" then the opened file has the suffix. If mode is "r" then we attempt to open the path. If an error is raised and the suffix is not None, we attempt to open the path with the suffix. @@ -223,7 +223,7 @@ def open_path_str(path: str, mode: str, verbose: int = 0, suffix: Optional[str] :param path: the path to open. If mode is "w" then it ensures that the path exists by creating the necessary folders and renaming path if it points to a folder. :param mode: how to open the file. "w" for writing, "r" for reading. - :param verbose: Verbosity level, 0 means only warnings, 2 means debug information. + :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages :param suffix: The preferred suffix. If mode is "w" then the opened file has the suffix. If mode is "r" then we attempt to open the path. If an error is raised and the suffix is not None, we attempt to open the path with the suffix. @@ -242,7 +242,7 @@ def open_path_pathlib(path: pathlib.Path, mode: str, verbose: int = 0, suffix: O ensures that the path exists by creating the necessary folders and renaming path if it points to a folder. :param mode: how to open the file. "w" for writing, "r" for reading. - :param verbose: Verbosity level, 0 means only warnings, 2 means debug information. + :param verbose: Verbosity level: 0 for no output, 2 for indicating if path without suffix is not found when mode is "r" :param suffix: The preferred suffix. If mode is "w" then the opened file has the suffix. If mode is "r" then we attempt to open the path. If an error is raised and the suffix is not None, we attempt to open the path with the suffix. @@ -257,7 +257,7 @@ def open_path_pathlib(path: pathlib.Path, mode: str, verbose: int = 0, suffix: O except FileNotFoundError as error: if suffix is not None and suffix != "": newpath = pathlib.Path(f"{path}.{suffix}") - if verbose == 2: + if verbose >= 2: warnings.warn(f"Path '{path}' not found. Attempting {newpath}.") path, suffix = newpath, None else: @@ -266,7 +266,7 @@ def open_path_pathlib(path: pathlib.Path, mode: str, verbose: int = 0, suffix: O try: if path.suffix == "" and suffix is not None and suffix != "": path = pathlib.Path(f"{path}.{suffix}") - if path.exists() and path.is_file() and verbose == 2: + if path.exists() and path.is_file() and verbose >= 2: warnings.warn(f"Path '{path}' exists, will overwrite it.") path = path.open("wb") except IsADirectoryError: @@ -300,7 +300,7 @@ def save_to_zip_file( :param params: Model parameters being stored expected to contain an entry for every state_dict with its name and the state_dict. :param pytorch_variables: Other PyTorch variables expected to contain name and value of the variable. - :param verbose: Verbosity level, 0 means only warnings, 2 means debug information + :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages """ save_path = open_path(save_path, "w", verbose=0, suffix="zip") # data/params can be None, so do not @@ -336,7 +336,7 @@ def save_to_pkl(path: Union[str, pathlib.Path, io.BufferedIOBase], obj: Any, ver if save_path is a str or pathlib.Path and mode is "w", single dispatch ensures that the path actually exists. If path is a io.BufferedIOBase the path exists. :param obj: The object to save. - :param verbose: Verbosity level, 0 means only warnings, 2 means debug information. + :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages """ with open_path(path, "w", verbose=verbose, suffix="pkl") as file_handler: # Use protocol>=4 to support saving replay buffers >= 4Gb @@ -352,7 +352,7 @@ def load_from_pkl(path: Union[str, pathlib.Path, io.BufferedIOBase], verbose: in :param path: the path to open. if save_path is a str or pathlib.Path and mode is "w", single dispatch ensures that the path actually exists. If path is a io.BufferedIOBase the path exists. - :param verbose: Verbosity level, 0 means only warnings, 2 means debug information. + :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages """ with open_path(path, "r", verbose=verbose, suffix="pkl") as file_handler: return pickle.load(file_handler) @@ -379,7 +379,7 @@ def load_from_zip_file( ``keras.models.load_model``. Useful when you have an object in file that can not be deserialized. :param device: Device on which the code should run. - :param verbose: Verbosity level, 0 means only warnings, 2 means debug information. + :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages :param print_system_info: Whether to print or not the system info about the saved model. :return: Class parameters, model state_dicts (aka "params", dict of state_dict) diff --git a/stable_baselines3/common/utils.py b/stable_baselines3/common/utils.py index a2126a2..53c642c 100644 --- a/stable_baselines3/common/utils.py +++ b/stable_baselines3/common/utils.py @@ -182,7 +182,7 @@ def configure_logger( """ Configure the logger's outputs. - :param verbose: the verbosity level: 0 no output, 1 info, 2 debug + :param verbose: Verbosity level: 0 for no output, 1 for the standard output to be part of the logger outputs :param tensorboard_log: the log location for tensorboard (if None, no logging) :param tb_log_name: tensorboard log :param reset_num_timesteps: Whether the ``num_timesteps`` attribute is reset or not. diff --git a/stable_baselines3/ddpg/ddpg.py b/stable_baselines3/ddpg/ddpg.py index d208a00..a9244e7 100644 --- a/stable_baselines3/ddpg/ddpg.py +++ b/stable_baselines3/ddpg/ddpg.py @@ -46,7 +46,8 @@ class DDPG(TD3): :param create_eval_env: Whether to create a second environment that will be used for evaluating the agent periodically. (Only available when passing string for the environment) :param policy_kwargs: additional arguments to be passed to the policy on creation - :param verbose: the verbosity level: 0 no output, 1 info, 2 debug + :param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for + debug messages :param seed: Seed for the pseudo random generators :param device: Device (cpu, cuda, ...) on which the code should be run. Setting it to auto, the code will be run on the GPU if possible. diff --git a/stable_baselines3/dqn/dqn.py b/stable_baselines3/dqn/dqn.py index 4830e7c..80e024b 100644 --- a/stable_baselines3/dqn/dqn.py +++ b/stable_baselines3/dqn/dqn.py @@ -53,7 +53,8 @@ class DQN(OffPolicyAlgorithm): :param create_eval_env: Whether to create a second environment that will be used for evaluating the agent periodically. (Only available when passing string for the environment) :param policy_kwargs: additional arguments to be passed to the policy on creation - :param verbose: the verbosity level: 0 no output, 1 info, 2 debug + :param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for + debug messages :param seed: Seed for the pseudo random generators :param device: Device (cpu, cuda, ...) on which the code should be run. Setting it to auto, the code will be run on the GPU if possible. diff --git a/stable_baselines3/ppo/ppo.py b/stable_baselines3/ppo/ppo.py index 0f7f8e4..6bb9c23 100644 --- a/stable_baselines3/ppo/ppo.py +++ b/stable_baselines3/ppo/ppo.py @@ -58,7 +58,8 @@ class PPO(OnPolicyAlgorithm): :param create_eval_env: Whether to create a second environment that will be used for evaluating the agent periodically. (Only available when passing string for the environment) :param policy_kwargs: additional arguments to be passed to the policy on creation - :param verbose: the verbosity level: 0 no output, 1 info, 2 debug + :param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for + debug messages :param seed: Seed for the pseudo random generators :param device: Device (cpu, cuda, ...) on which the code should be run. Setting it to auto, the code will be run on the GPU if possible. diff --git a/stable_baselines3/sac/sac.py b/stable_baselines3/sac/sac.py index f967b38..de08b75 100644 --- a/stable_baselines3/sac/sac.py +++ b/stable_baselines3/sac/sac.py @@ -66,7 +66,8 @@ class SAC(OffPolicyAlgorithm): :param create_eval_env: Whether to create a second environment that will be used for evaluating the agent periodically. (Only available when passing string for the environment) :param policy_kwargs: additional arguments to be passed to the policy on creation - :param verbose: the verbosity level: 0 no output, 1 info, 2 debug + :param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for + debug messages :param seed: Seed for the pseudo random generators :param device: Device (cpu, cuda, ...) on which the code should be run. Setting it to auto, the code will be run on the GPU if possible. diff --git a/stable_baselines3/td3/td3.py b/stable_baselines3/td3/td3.py index c8376f5..51df755 100644 --- a/stable_baselines3/td3/td3.py +++ b/stable_baselines3/td3/td3.py @@ -54,7 +54,8 @@ class TD3(OffPolicyAlgorithm): :param create_eval_env: Whether to create a second environment that will be used for evaluating the agent periodically. (Only available when passing string for the environment) :param policy_kwargs: additional arguments to be passed to the policy on creation - :param verbose: the verbosity level: 0 no output, 1 info, 2 debug + :param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for + debug messages :param seed: Seed for the pseudo random generators :param device: Device (cpu, cuda, ...) on which the code should be run. Setting it to auto, the code will be run on the GPU if possible.