mirror of
https://github.com/saymrwulf/stable-baselines3.git
synced 2026-06-30 03:38:13 +00:00
Clarify and standardize verbosity documentation (#1056)
* Standardize the use of verbosity: > to >= * Make verbose docstring more specific * Update changelog
This commit is contained in:
parent
29f6687b98
commit
98e786f744
15 changed files with 57 additions and 45 deletions
|
|
@ -27,7 +27,7 @@ You can find two examples of custom callbacks in the documentation: one for savi
|
|||
"""
|
||||
A custom callback that derives from ``BaseCallback``.
|
||||
|
||||
:param verbose: (int) Verbosity level 0: not output 1: info 2: debug
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
|
||||
"""
|
||||
def __init__(self, verbose=0):
|
||||
super(CustomCallback, self).__init__(verbose)
|
||||
|
|
@ -121,7 +121,7 @@ A child callback is for instance :ref:`StopTrainingOnRewardThreshold <StopTraini
|
|||
|
||||
:param callback: (Optional[BaseCallback]) Callback that will be called
|
||||
when an event is triggered.
|
||||
:param verbose: (int)
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
|
||||
"""
|
||||
def __init__(self, callback: Optional[BaseCallback] = None, verbose: int = 0):
|
||||
super(EventCallback, self).__init__(verbose=verbose)
|
||||
|
|
|
|||
|
|
@ -248,7 +248,7 @@ If your callback returns False, training is aborted early.
|
|||
:param check_freq:
|
||||
:param log_dir: Path to the folder where the model will be saved.
|
||||
It must contains the file created by the ``Monitor`` wrapper.
|
||||
:param verbose: Verbosity level.
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
|
||||
"""
|
||||
def __init__(self, check_freq: int, log_dir: str, verbose: int = 1):
|
||||
super(SaveOnBestTrainingRewardCallback, self).__init__(verbose)
|
||||
|
|
@ -270,7 +270,7 @@ If your callback returns False, training is aborted early.
|
|||
if len(x) > 0:
|
||||
# Mean training reward over the last 100 episodes
|
||||
mean_reward = np.mean(y[-100:])
|
||||
if self.verbose > 0:
|
||||
if self.verbose >= 1:
|
||||
print(f"Num timesteps: {self.num_timesteps}")
|
||||
print(f"Best mean reward: {self.best_mean_reward:.2f} - Last mean reward per episode: {mean_reward:.2f}")
|
||||
|
||||
|
|
@ -278,7 +278,7 @@ If your callback returns False, training is aborted early.
|
|||
if mean_reward > self.best_mean_reward:
|
||||
self.best_mean_reward = mean_reward
|
||||
# Example for saving best model
|
||||
if self.verbose > 0:
|
||||
if self.verbose >= 1:
|
||||
print(f"Saving new best model to {self.save_path}")
|
||||
self.model.save(self.save_path)
|
||||
|
||||
|
|
|
|||
|
|
@ -46,6 +46,8 @@ Documentation:
|
|||
- Added info on split tensorboard logs into (@Melanol)
|
||||
- Fixed typo in ppo doc (@francescoluciano)
|
||||
- Fixed typo in install doc(@jlp-ue)
|
||||
- Clarified and standardized verbosity documentation
|
||||
|
||||
|
||||
Release 1.6.0 (2022-07-11)
|
||||
---------------------------
|
||||
|
|
|
|||
|
|
@ -44,7 +44,8 @@ class A2C(OnPolicyAlgorithm):
|
|||
:param create_eval_env: Whether to create a second environment that will be
|
||||
used for evaluating the agent periodically. (Only available when passing string for the environment)
|
||||
:param policy_kwargs: additional arguments to be passed to the policy on creation
|
||||
:param verbose: the verbosity level: 0 no output, 1 info, 2 debug
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for
|
||||
debug messages
|
||||
:param seed: Seed for the pseudo random generators
|
||||
:param device: Device (cpu, cuda, ...) on which the code should be run.
|
||||
Setting it to auto, the code will be run on the GPU if possible.
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ def maybe_make_env(env: Union[GymEnv, str, None], verbose: int) -> Optional[GymE
|
|||
"""If env is a string, make the environment; otherwise, return env.
|
||||
|
||||
:param env: The environment to learn from.
|
||||
:param verbose: logging verbosity
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for indicating if envrironment is created
|
||||
:return A Gym (vector) environment.
|
||||
"""
|
||||
if isinstance(env, str):
|
||||
|
|
@ -64,7 +64,8 @@ class BaseAlgorithm(ABC):
|
|||
it can be a function of the current progress remaining (from 1 to 0)
|
||||
:param policy_kwargs: Additional arguments to be passed to the policy on creation
|
||||
:param tensorboard_log: the log location for tensorboard (if None, no logging)
|
||||
:param verbose: The verbosity level: 0 none, 1 training information, 2 debug
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for
|
||||
debug messages
|
||||
:param device: Device on which the code should run.
|
||||
By default, it will try to use a Cuda compatible device and fallback to cpu
|
||||
if it is not possible.
|
||||
|
|
@ -108,7 +109,7 @@ class BaseAlgorithm(ABC):
|
|||
self.policy_class = policy
|
||||
|
||||
self.device = get_device(device)
|
||||
if verbose > 0:
|
||||
if verbose >= 1:
|
||||
print(f"Using {self.device} device")
|
||||
|
||||
self.env = None # type: Optional[GymEnv]
|
||||
|
|
@ -198,7 +199,7 @@ class BaseAlgorithm(ABC):
|
|||
or to re-order the image channels.
|
||||
|
||||
:param env:
|
||||
:param verbose:
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for indicating wrappers used
|
||||
:param monitor_wrapper: Whether to wrap the env in a ``Monitor`` when possible.
|
||||
:return: The wrapped environment.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -15,8 +15,7 @@ class BaseCallback(ABC):
|
|||
"""
|
||||
Base class for callback.
|
||||
|
||||
:param verbose: Verbosity of the output (set to 1 for info messages,
|
||||
2 for debug)
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
|
||||
"""
|
||||
|
||||
def __init__(self, verbose: int = 0):
|
||||
|
|
@ -124,7 +123,7 @@ class EventCallback(BaseCallback):
|
|||
|
||||
:param callback: Callback that will be called
|
||||
when an event is triggered.
|
||||
:param verbose:
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
|
||||
"""
|
||||
|
||||
def __init__(self, callback: Optional[BaseCallback] = None, verbose: int = 0):
|
||||
|
|
@ -231,7 +230,7 @@ class CheckpointCallback(BaseCallback):
|
|||
:param name_prefix: Common prefix to the saved models
|
||||
:param save_replay_buffer: Save the model replay buffer
|
||||
:param save_vecnormalize: Save the ``VecNormalize`` statistics
|
||||
:param verbose: Verbosity of the output (set to 2 for debug messages)
|
||||
:param verbose: Verbosity level: 0 for no output, 2 for indicating when saving model checkpoint
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
|
|
@ -270,7 +269,7 @@ class CheckpointCallback(BaseCallback):
|
|||
if self.n_calls % self.save_freq == 0:
|
||||
model_path = self._checkpoint_path(extension="zip")
|
||||
self.model.save(model_path)
|
||||
if self.verbose > 1:
|
||||
if self.verbose >= 2:
|
||||
print(f"Saving model checkpoint to {model_path}")
|
||||
|
||||
if self.save_replay_buffer and hasattr(self.model, "replay_buffer") and self.model.replay_buffer is not None:
|
||||
|
|
@ -284,7 +283,7 @@ class CheckpointCallback(BaseCallback):
|
|||
# Save the VecNormalize statistics
|
||||
vec_normalize_path = self._checkpoint_path("vecnormalize_", extension="pkl")
|
||||
self.model.get_vec_normalize_env().save(vec_normalize_path)
|
||||
if self.verbose > 1:
|
||||
if self.verbose >= 2:
|
||||
print(f"Saving model VecNormalize to {vec_normalize_path}")
|
||||
|
||||
return True
|
||||
|
|
@ -295,7 +294,7 @@ class ConvertCallback(BaseCallback):
|
|||
Convert functional callback (old-style) to object.
|
||||
|
||||
:param callback:
|
||||
:param verbose:
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
|
||||
"""
|
||||
|
||||
def __init__(self, callback: Callable[[Dict[str, Any], Dict[str, Any]], bool], verbose: int = 0):
|
||||
|
|
@ -331,7 +330,7 @@ class EvalCallback(EventCallback):
|
|||
:param deterministic: Whether the evaluation should
|
||||
use a stochastic or deterministic actions.
|
||||
:param render: Whether to render or not the environment during evaluation
|
||||
:param verbose:
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for indicating information about evaluation results
|
||||
:param warn: Passed to ``evaluate_policy`` (warns if ``eval_env`` has not been
|
||||
wrapped with a Monitor wrapper)
|
||||
"""
|
||||
|
|
@ -467,7 +466,7 @@ class EvalCallback(EventCallback):
|
|||
mean_ep_length, std_ep_length = np.mean(episode_lengths), np.std(episode_lengths)
|
||||
self.last_mean_reward = mean_reward
|
||||
|
||||
if self.verbose > 0:
|
||||
if self.verbose >= 1:
|
||||
print(f"Eval num_timesteps={self.num_timesteps}, " f"episode_reward={mean_reward:.2f} +/- {std_reward:.2f}")
|
||||
print(f"Episode length: {mean_ep_length:.2f} +/- {std_ep_length:.2f}")
|
||||
# Add to current Logger
|
||||
|
|
@ -476,7 +475,7 @@ class EvalCallback(EventCallback):
|
|||
|
||||
if len(self._is_success_buffer) > 0:
|
||||
success_rate = np.mean(self._is_success_buffer)
|
||||
if self.verbose > 0:
|
||||
if self.verbose >= 1:
|
||||
print(f"Success rate: {100 * success_rate:.2f}%")
|
||||
self.logger.record("eval/success_rate", success_rate)
|
||||
|
||||
|
|
@ -485,7 +484,7 @@ class EvalCallback(EventCallback):
|
|||
self.logger.dump(self.num_timesteps)
|
||||
|
||||
if mean_reward > self.best_mean_reward:
|
||||
if self.verbose > 0:
|
||||
if self.verbose >= 1:
|
||||
print("New best mean reward!")
|
||||
if self.best_model_save_path is not None:
|
||||
self.model.save(os.path.join(self.best_model_save_path, "best_model"))
|
||||
|
|
@ -519,7 +518,8 @@ class StopTrainingOnRewardThreshold(BaseCallback):
|
|||
|
||||
:param reward_threshold: Minimum expected reward per episode
|
||||
to stop training.
|
||||
:param verbose:
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for indicating when training ended because episodic reward
|
||||
threshold reached
|
||||
"""
|
||||
|
||||
def __init__(self, reward_threshold: float, verbose: int = 0):
|
||||
|
|
@ -530,7 +530,7 @@ class StopTrainingOnRewardThreshold(BaseCallback):
|
|||
assert self.parent is not None, "``StopTrainingOnMinimumReward`` callback must be used " "with an ``EvalCallback``"
|
||||
# Convert np.bool_ to bool, otherwise callback() is False won't work
|
||||
continue_training = bool(self.parent.best_mean_reward < self.reward_threshold)
|
||||
if self.verbose > 0 and not continue_training:
|
||||
if self.verbose >= 1 and not continue_training:
|
||||
print(
|
||||
f"Stopping training because the mean reward {self.parent.best_mean_reward:.2f} "
|
||||
f" is above the threshold {self.reward_threshold}"
|
||||
|
|
@ -567,7 +567,8 @@ class StopTrainingOnMaxEpisodes(BaseCallback):
|
|||
and in total for ``max_episodes * n_envs`` episodes.
|
||||
|
||||
:param max_episodes: Maximum number of episodes to stop training.
|
||||
:param verbose: Select whether to print information about when training ended by reaching ``max_episodes``
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for indicating information about when training ended by
|
||||
reaching ``max_episodes``
|
||||
"""
|
||||
|
||||
def __init__(self, max_episodes: int, verbose: int = 0):
|
||||
|
|
@ -587,7 +588,7 @@ class StopTrainingOnMaxEpisodes(BaseCallback):
|
|||
|
||||
continue_training = self.n_episodes < self._total_max_episodes
|
||||
|
||||
if self.verbose > 0 and not continue_training:
|
||||
if self.verbose >= 1 and not continue_training:
|
||||
mean_episodes_per_env = self.n_episodes / self.training_env.num_envs
|
||||
mean_ep_str = (
|
||||
f"with an average of {mean_episodes_per_env:.2f} episodes per env" if self.training_env.num_envs > 1 else ""
|
||||
|
|
@ -612,7 +613,7 @@ class StopTrainingOnNoModelImprovement(BaseCallback):
|
|||
|
||||
:param max_no_improvement_evals: Maximum number of consecutive evaluations without a new best model.
|
||||
:param min_evals: Number of evaluations before start to count evaluations without improvements.
|
||||
:param verbose: Verbosity of the output (set to 1 for info messages)
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for indicating when training ended because no new best model
|
||||
"""
|
||||
|
||||
def __init__(self, max_no_improvement_evals: int, min_evals: int = 0, verbose: int = 0):
|
||||
|
|
@ -637,7 +638,7 @@ class StopTrainingOnNoModelImprovement(BaseCallback):
|
|||
|
||||
self.last_best_mean_reward = self.parent.best_mean_reward
|
||||
|
||||
if self.verbose > 0 and not continue_training:
|
||||
if self.verbose >= 1 and not continue_training:
|
||||
print(
|
||||
f"Stopping training because there was no new best model in the last {self.no_improvement_evals:d} evaluations"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -51,7 +51,8 @@ class OffPolicyAlgorithm(BaseAlgorithm):
|
|||
See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195
|
||||
:param policy_kwargs: Additional arguments to be passed to the policy on creation
|
||||
:param tensorboard_log: the log location for tensorboard (if None, no logging)
|
||||
:param verbose: The verbosity level: 0 none, 1 training information, 2 debug
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for
|
||||
debug messages
|
||||
:param device: Device on which the code should run.
|
||||
By default, it will try to use a Cuda compatible device and fallback to cpu
|
||||
if it is not possible.
|
||||
|
|
|
|||
|
|
@ -41,7 +41,8 @@ class OnPolicyAlgorithm(BaseAlgorithm):
|
|||
:param monitor_wrapper: When creating an environment, whether to wrap it
|
||||
or not in a Monitor wrapper.
|
||||
:param policy_kwargs: additional arguments to be passed to the policy on creation
|
||||
:param verbose: the verbosity level: 0 no output, 1 info, 2 debug
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for
|
||||
debug messages
|
||||
:param seed: Seed for the pseudo random generators
|
||||
:param device: Device (cpu, cuda, ...) on which the code should be run.
|
||||
Setting it to auto, the code will be run on the GPU if possible.
|
||||
|
|
|
|||
|
|
@ -186,14 +186,14 @@ def open_path(path: Union[str, pathlib.Path, io.BufferedIOBase], mode: str, verb
|
|||
If the provided path is a string or a pathlib.Path, it ensures that it exists. If the mode is "read"
|
||||
it checks that it exists, if it doesn't exist it attempts to read path.suffix if a suffix is provided.
|
||||
If the mode is "write" and the path does not exist, it creates all the parent folders. If the path
|
||||
points to a folder, it changes the path to path_2. If the path already exists and verbose == 2,
|
||||
points to a folder, it changes the path to path_2. If the path already exists and verbose >= 2,
|
||||
it raises a warning.
|
||||
|
||||
:param path: the path to open.
|
||||
if save_path is a str or pathlib.Path and mode is "w", single dispatch ensures that the
|
||||
path actually exists. If path is a io.BufferedIOBase the path exists.
|
||||
:param mode: how to open the file. "w"|"write" for writing, "r"|"read" for reading.
|
||||
:param verbose: Verbosity level, 0 means only warnings, 2 means debug information.
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
|
||||
:param suffix: The preferred suffix. If mode is "w" then the opened file has the suffix.
|
||||
If mode is "r" then we attempt to open the path. If an error is raised and the suffix
|
||||
is not None, we attempt to open the path with the suffix.
|
||||
|
|
@ -223,7 +223,7 @@ def open_path_str(path: str, mode: str, verbose: int = 0, suffix: Optional[str]
|
|||
:param path: the path to open. If mode is "w" then it ensures that the path exists
|
||||
by creating the necessary folders and renaming path if it points to a folder.
|
||||
:param mode: how to open the file. "w" for writing, "r" for reading.
|
||||
:param verbose: Verbosity level, 0 means only warnings, 2 means debug information.
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
|
||||
:param suffix: The preferred suffix. If mode is "w" then the opened file has the suffix.
|
||||
If mode is "r" then we attempt to open the path. If an error is raised and the suffix
|
||||
is not None, we attempt to open the path with the suffix.
|
||||
|
|
@ -242,7 +242,7 @@ def open_path_pathlib(path: pathlib.Path, mode: str, verbose: int = 0, suffix: O
|
|||
ensures that the path exists by creating the necessary folders and
|
||||
renaming path if it points to a folder.
|
||||
:param mode: how to open the file. "w" for writing, "r" for reading.
|
||||
:param verbose: Verbosity level, 0 means only warnings, 2 means debug information.
|
||||
:param verbose: Verbosity level: 0 for no output, 2 for indicating if path without suffix is not found when mode is "r"
|
||||
:param suffix: The preferred suffix. If mode is "w" then the opened file has the suffix.
|
||||
If mode is "r" then we attempt to open the path. If an error is raised and the suffix
|
||||
is not None, we attempt to open the path with the suffix.
|
||||
|
|
@ -257,7 +257,7 @@ def open_path_pathlib(path: pathlib.Path, mode: str, verbose: int = 0, suffix: O
|
|||
except FileNotFoundError as error:
|
||||
if suffix is not None and suffix != "":
|
||||
newpath = pathlib.Path(f"{path}.{suffix}")
|
||||
if verbose == 2:
|
||||
if verbose >= 2:
|
||||
warnings.warn(f"Path '{path}' not found. Attempting {newpath}.")
|
||||
path, suffix = newpath, None
|
||||
else:
|
||||
|
|
@ -266,7 +266,7 @@ def open_path_pathlib(path: pathlib.Path, mode: str, verbose: int = 0, suffix: O
|
|||
try:
|
||||
if path.suffix == "" and suffix is not None and suffix != "":
|
||||
path = pathlib.Path(f"{path}.{suffix}")
|
||||
if path.exists() and path.is_file() and verbose == 2:
|
||||
if path.exists() and path.is_file() and verbose >= 2:
|
||||
warnings.warn(f"Path '{path}' exists, will overwrite it.")
|
||||
path = path.open("wb")
|
||||
except IsADirectoryError:
|
||||
|
|
@ -300,7 +300,7 @@ def save_to_zip_file(
|
|||
:param params: Model parameters being stored expected to contain an entry for every
|
||||
state_dict with its name and the state_dict.
|
||||
:param pytorch_variables: Other PyTorch variables expected to contain name and value of the variable.
|
||||
:param verbose: Verbosity level, 0 means only warnings, 2 means debug information
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
|
||||
"""
|
||||
save_path = open_path(save_path, "w", verbose=0, suffix="zip")
|
||||
# data/params can be None, so do not
|
||||
|
|
@ -336,7 +336,7 @@ def save_to_pkl(path: Union[str, pathlib.Path, io.BufferedIOBase], obj: Any, ver
|
|||
if save_path is a str or pathlib.Path and mode is "w", single dispatch ensures that the
|
||||
path actually exists. If path is a io.BufferedIOBase the path exists.
|
||||
:param obj: The object to save.
|
||||
:param verbose: Verbosity level, 0 means only warnings, 2 means debug information.
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
|
||||
"""
|
||||
with open_path(path, "w", verbose=verbose, suffix="pkl") as file_handler:
|
||||
# Use protocol>=4 to support saving replay buffers >= 4Gb
|
||||
|
|
@ -352,7 +352,7 @@ def load_from_pkl(path: Union[str, pathlib.Path, io.BufferedIOBase], verbose: in
|
|||
:param path: the path to open.
|
||||
if save_path is a str or pathlib.Path and mode is "w", single dispatch ensures that the
|
||||
path actually exists. If path is a io.BufferedIOBase the path exists.
|
||||
:param verbose: Verbosity level, 0 means only warnings, 2 means debug information.
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
|
||||
"""
|
||||
with open_path(path, "r", verbose=verbose, suffix="pkl") as file_handler:
|
||||
return pickle.load(file_handler)
|
||||
|
|
@ -379,7 +379,7 @@ def load_from_zip_file(
|
|||
``keras.models.load_model``. Useful when you have an object in
|
||||
file that can not be deserialized.
|
||||
:param device: Device on which the code should run.
|
||||
:param verbose: Verbosity level, 0 means only warnings, 2 means debug information.
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
|
||||
:param print_system_info: Whether to print or not the system info
|
||||
about the saved model.
|
||||
:return: Class parameters, model state_dicts (aka "params", dict of state_dict)
|
||||
|
|
|
|||
|
|
@ -182,7 +182,7 @@ def configure_logger(
|
|||
"""
|
||||
Configure the logger's outputs.
|
||||
|
||||
:param verbose: the verbosity level: 0 no output, 1 info, 2 debug
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for the standard output to be part of the logger outputs
|
||||
:param tensorboard_log: the log location for tensorboard (if None, no logging)
|
||||
:param tb_log_name: tensorboard log
|
||||
:param reset_num_timesteps: Whether the ``num_timesteps`` attribute is reset or not.
|
||||
|
|
|
|||
|
|
@ -46,7 +46,8 @@ class DDPG(TD3):
|
|||
:param create_eval_env: Whether to create a second environment that will be
|
||||
used for evaluating the agent periodically. (Only available when passing string for the environment)
|
||||
:param policy_kwargs: additional arguments to be passed to the policy on creation
|
||||
:param verbose: the verbosity level: 0 no output, 1 info, 2 debug
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for
|
||||
debug messages
|
||||
:param seed: Seed for the pseudo random generators
|
||||
:param device: Device (cpu, cuda, ...) on which the code should be run.
|
||||
Setting it to auto, the code will be run on the GPU if possible.
|
||||
|
|
|
|||
|
|
@ -53,7 +53,8 @@ class DQN(OffPolicyAlgorithm):
|
|||
:param create_eval_env: Whether to create a second environment that will be
|
||||
used for evaluating the agent periodically. (Only available when passing string for the environment)
|
||||
:param policy_kwargs: additional arguments to be passed to the policy on creation
|
||||
:param verbose: the verbosity level: 0 no output, 1 info, 2 debug
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for
|
||||
debug messages
|
||||
:param seed: Seed for the pseudo random generators
|
||||
:param device: Device (cpu, cuda, ...) on which the code should be run.
|
||||
Setting it to auto, the code will be run on the GPU if possible.
|
||||
|
|
|
|||
|
|
@ -58,7 +58,8 @@ class PPO(OnPolicyAlgorithm):
|
|||
:param create_eval_env: Whether to create a second environment that will be
|
||||
used for evaluating the agent periodically. (Only available when passing string for the environment)
|
||||
:param policy_kwargs: additional arguments to be passed to the policy on creation
|
||||
:param verbose: the verbosity level: 0 no output, 1 info, 2 debug
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for
|
||||
debug messages
|
||||
:param seed: Seed for the pseudo random generators
|
||||
:param device: Device (cpu, cuda, ...) on which the code should be run.
|
||||
Setting it to auto, the code will be run on the GPU if possible.
|
||||
|
|
|
|||
|
|
@ -66,7 +66,8 @@ class SAC(OffPolicyAlgorithm):
|
|||
:param create_eval_env: Whether to create a second environment that will be
|
||||
used for evaluating the agent periodically. (Only available when passing string for the environment)
|
||||
:param policy_kwargs: additional arguments to be passed to the policy on creation
|
||||
:param verbose: the verbosity level: 0 no output, 1 info, 2 debug
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for
|
||||
debug messages
|
||||
:param seed: Seed for the pseudo random generators
|
||||
:param device: Device (cpu, cuda, ...) on which the code should be run.
|
||||
Setting it to auto, the code will be run on the GPU if possible.
|
||||
|
|
|
|||
|
|
@ -54,7 +54,8 @@ class TD3(OffPolicyAlgorithm):
|
|||
:param create_eval_env: Whether to create a second environment that will be
|
||||
used for evaluating the agent periodically. (Only available when passing string for the environment)
|
||||
:param policy_kwargs: additional arguments to be passed to the policy on creation
|
||||
:param verbose: the verbosity level: 0 no output, 1 info, 2 debug
|
||||
:param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for
|
||||
debug messages
|
||||
:param seed: Seed for the pseudo random generators
|
||||
:param device: Device (cpu, cuda, ...) on which the code should be run.
|
||||
Setting it to auto, the code will be run on the GPU if possible.
|
||||
|
|
|
|||
Loading…
Reference in a new issue