stable-baselines3/stable_baselines3/common/callbacks.py

import os
import warnings
from abc import ABC, abstractmethod
from typing import Any, Callable, Dict, List, Optional, Union

import gym
import numpy as np

from stable_baselines3.common import base_class, logger  # pytype: disable=pyi-error
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.vec_env import DummyVecEnv, VecEnv, sync_envs_normalization


class BaseCallback(ABC):
    """
    Base class for callback.

    :param verbose:
    """

    def __init__(self, verbose: int = 0):
        super(BaseCallback, self).__init__()
        # The RL model
        self.model = None  # type: Optional[base_class.BaseAlgorithm]
        # An alias for self.model.get_env(), the environment used for training
        self.training_env = None  # type: Union[gym.Env, VecEnv, None]
        # Number of time the callback was called
        self.n_calls = 0  # type: int
        # n_envs * n times env.step() was called
        self.num_timesteps = 0  # type: int
        self.verbose = verbose
        self.locals: Dict[str, Any] = {}
        self.globals: Dict[str, Any] = {}
        self.logger = None
        # Sometimes, for event callback, it is useful
        # to have access to the parent object
        self.parent = None  # type: Optional[BaseCallback]

    # Type hint as string to avoid circular import
    def init_callback(self, model: "base_class.BaseAlgorithm") -> None:
        """
        Initialize the callback by saving references to the
        RL model and the training environment for convenience.
        """
        self.model = model
        self.training_env = model.get_env()
        self.logger = logger
        self._init_callback()

    def _init_callback(self) -> None:
        pass

    def on_training_start(self, locals_: Dict[str, Any], globals_: Dict[str, Any]) -> None:
        # Those are reference and will be updated automatically
        self.locals = locals_
        self.globals = globals_
        self._on_training_start()

    def _on_training_start(self) -> None:
        pass

    def on_rollout_start(self) -> None:
        self._on_rollout_start()

    def _on_rollout_start(self) -> None:
        pass

    @abstractmethod
    def _on_step(self) -> bool:
        """
        :return: If the callback returns False, training is aborted early.
        """
        return True

    def on_step(self) -> bool:
        """
        This method will be called by the model after each call to ``env.step()``.

        For child callback (of an ``EventCallback``), this will be called
        when the event is triggered.

        :return: If the callback returns False, training is aborted early.
        """
        self.n_calls += 1
        # timesteps start at zero
        self.num_timesteps = self.model.num_timesteps

        return self._on_step()

    def on_training_end(self) -> None:
        self._on_training_end()

    def _on_training_end(self) -> None:
        pass

    def on_rollout_end(self) -> None:
        self._on_rollout_end()

    def _on_rollout_end(self) -> None:
        pass

    def update_locals(self, locals_: Dict[str, Any]) -> None:
        """
        Update the references to the local variables.

        :param locals_: the local variables during rollout collection
        """
        self.locals.update(locals_)
        self.update_child_locals(locals_)

    def update_child_locals(self, locals_: Dict[str, Any]) -> None:
        """
        Update the references to the local variables on sub callbacks.

        :param locals_: the local variables during rollout collection
        """
        pass


class EventCallback(BaseCallback):
    """
    Base class for triggering callback on event.

    :param callback: Callback that will be called
        when an event is triggered.
    :param verbose:
    """

    def __init__(self, callback: Optional[BaseCallback] = None, verbose: int = 0):
        super(EventCallback, self).__init__(verbose=verbose)
        self.callback = callback
        # Give access to the parent
        if callback is not None:
            self.callback.parent = self

    def init_callback(self, model: "base_class.BaseAlgorithm") -> None:
        super(EventCallback, self).init_callback(model)
        if self.callback is not None:
            self.callback.init_callback(self.model)

    def _on_training_start(self) -> None:
        if self.callback is not None:
            self.callback.on_training_start(self.locals, self.globals)

    def _on_event(self) -> bool:
        if self.callback is not None:
            return self.callback.on_step()
        return True

    def _on_step(self) -> bool:
        return True

    def update_child_locals(self, locals_: Dict[str, Any]) -> None:
        """
        Update the references to the local variables.

        :param locals_: the local variables during rollout collection
        """
        if self.callback is not None:
            self.callback.update_locals(locals_)


class CallbackList(BaseCallback):
    """
    Class for chaining callbacks.

    :param callbacks: A list of callbacks that will be called
        sequentially.
    """

    def __init__(self, callbacks: List[BaseCallback]):
        super(CallbackList, self).__init__()
        assert isinstance(callbacks, list)
        self.callbacks = callbacks

    def _init_callback(self) -> None:
        for callback in self.callbacks:
            callback.init_callback(self.model)

    def _on_training_start(self) -> None:
        for callback in self.callbacks:
            callback.on_training_start(self.locals, self.globals)

    def _on_rollout_start(self) -> None:
        for callback in self.callbacks:
            callback.on_rollout_start()

    def _on_step(self) -> bool:
        continue_training = True
        for callback in self.callbacks:
            # Return False (stop training) if at least one callback returns False
            continue_training = callback.on_step() and continue_training
        return continue_training

    def _on_rollout_end(self) -> None:
        for callback in self.callbacks:
            callback.on_rollout_end()

    def _on_training_end(self) -> None:
        for callback in self.callbacks:
            callback.on_training_end()

    def update_child_locals(self, locals_: Dict[str, Any]) -> None:
        """
        Update the references to the local variables.

        :param locals_: the local variables during rollout collection
        """
        for callback in self.callbacks:
            callback.update_locals(locals_)


class CheckpointCallback(BaseCallback):
    """
    Callback for saving a model every ``save_freq`` steps

    :param save_freq:
    :param save_path: Path to the folder where the model will be saved.
    :param name_prefix: Common prefix to the saved models
    :param verbose:
    """

    def __init__(self, save_freq: int, save_path: str, name_prefix: str = "rl_model", verbose: int = 0):
        super(CheckpointCallback, self).__init__(verbose)
        self.save_freq = save_freq
        self.save_path = save_path
        self.name_prefix = name_prefix

    def _init_callback(self) -> None:
        # Create folder if needed
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self) -> bool:
        if self.n_calls % self.save_freq == 0:
            path = os.path.join(self.save_path, f"{self.name_prefix}_{self.num_timesteps}_steps")
            self.model.save(path)
            if self.verbose > 1:
                print(f"Saving model checkpoint to {path}")
        return True


class ConvertCallback(BaseCallback):
    """
    Convert functional callback (old-style) to object.

    :param callback:
    :param verbose:
    """

    def __init__(self, callback: Callable[[Dict[str, Any], Dict[str, Any]], bool], verbose: int = 0):
        super(ConvertCallback, self).__init__(verbose)
        self.callback = callback

    def _on_step(self) -> bool:
        if self.callback is not None:
            return self.callback(self.locals, self.globals)
        return True


class EvalCallback(EventCallback):
    """
    Callback for evaluating an agent.

    :param eval_env: The environment used for initialization
    :param callback_on_new_best: Callback to trigger
        when there is a new best model according to the ``mean_reward``
    :param n_eval_episodes: The number of episodes to test the agent
    :param eval_freq: Evaluate the agent every eval_freq call of the callback.
    :param log_path: Path to a folder where the evaluations (``evaluations.npz``)
        will be saved. It will be updated at each evaluation.
    :param best_model_save_path: Path to a folder where the best model
        according to performance on the eval env will be saved.
    :param deterministic: Whether the evaluation should
        use a stochastic or deterministic actions.
    :param render: Whether to render or not the environment during evaluation
    :param verbose:
    :param warn: Passed to ``evaluate_policy`` (warns if ``eval_env`` has not been
        wrapped with a Monitor wrapper)
    """

    def __init__(
        self,
        eval_env: Union[gym.Env, VecEnv],
        callback_on_new_best: Optional[BaseCallback] = None,
        n_eval_episodes: int = 5,
        eval_freq: int = 10000,
        log_path: str = None,
        best_model_save_path: str = None,
        deterministic: bool = True,
        render: bool = False,
        verbose: int = 1,
        warn: bool = True,
    ):
        super(EvalCallback, self).__init__(callback_on_new_best, verbose=verbose)
        self.n_eval_episodes = n_eval_episodes
        self.eval_freq = eval_freq
        self.best_mean_reward = -np.inf
        self.last_mean_reward = -np.inf
        self.deterministic = deterministic
        self.render = render
        self.warn = warn

        # Convert to VecEnv for consistency
        if not isinstance(eval_env, VecEnv):
            eval_env = DummyVecEnv([lambda: eval_env])

        if isinstance(eval_env, VecEnv):
            assert eval_env.num_envs == 1, "You must pass only one environment for evaluation"

        self.eval_env = eval_env
        self.best_model_save_path = best_model_save_path
        # Logs will be written in ``evaluations.npz``
        if log_path is not None:
            log_path = os.path.join(log_path, "evaluations")
        self.log_path = log_path
        self.evaluations_results = []
        self.evaluations_timesteps = []
        self.evaluations_length = []
        # For computing success rate
        self._is_success_buffer = []
        self.evaluations_successes = []

    def _init_callback(self) -> None:
        # Does not work in some corner cases, where the wrapper is not the same
        if not isinstance(self.training_env, type(self.eval_env)):
            warnings.warn("Training and eval env are not of the same type" f"{self.training_env} != {self.eval_env}")

        # Create folders if needed
        if self.best_model_save_path is not None:
            os.makedirs(self.best_model_save_path, exist_ok=True)
        if self.log_path is not None:
            os.makedirs(os.path.dirname(self.log_path), exist_ok=True)

    def _log_success_callback(self, locals_: Dict[str, Any], globals_: Dict[str, Any]) -> None:
        """
        Callback passed to the  ``evaluate_policy`` function
        in order to log the success rate (when applicable),
        for instance when using HER.

        :param locals_:
        :param globals_:
        """
        info = locals_["info"]
        # VecEnv: unpack
        if not isinstance(info, dict):
            info = info[0]

        if locals_["done"]:
            maybe_is_success = info.get("is_success")
            if maybe_is_success is not None:
                self._is_success_buffer.append(maybe_is_success)

    def _on_step(self) -> bool:

        if self.eval_freq > 0 and self.n_calls % self.eval_freq == 0:
            # Sync training and eval env if there is VecNormalize
            sync_envs_normalization(self.training_env, self.eval_env)

            # Reset success rate buffer
            self._is_success_buffer = []

            episode_rewards, episode_lengths = evaluate_policy(
                self.model,
                self.eval_env,
                n_eval_episodes=self.n_eval_episodes,
                render=self.render,
                deterministic=self.deterministic,
                return_episode_rewards=True,
                warn=self.warn,
                callback=self._log_success_callback,
            )

            if self.log_path is not None:
                self.evaluations_timesteps.append(self.num_timesteps)
                self.evaluations_results.append(episode_rewards)
                self.evaluations_length.append(episode_lengths)

                kwargs = {}
                # Save success log if present
                if len(self._is_success_buffer) > 0:
                    self.evaluations_successes.append(self._is_success_buffer)
                    kwargs = dict(successes=self.evaluations_successes)

                np.savez(
                    self.log_path,
                    timesteps=self.evaluations_timesteps,
                    results=self.evaluations_results,
                    ep_lengths=self.evaluations_length,
                    **kwargs,
                )

            mean_reward, std_reward = np.mean(episode_rewards), np.std(episode_rewards)
            mean_ep_length, std_ep_length = np.mean(episode_lengths), np.std(episode_lengths)
            self.last_mean_reward = mean_reward

            if self.verbose > 0:
                print(f"Eval num_timesteps={self.num_timesteps}, " f"episode_reward={mean_reward:.2f} +/- {std_reward:.2f}")
                print(f"Episode length: {mean_ep_length:.2f} +/- {std_ep_length:.2f}")
            # Add to current Logger
            self.logger.record("eval/mean_reward", float(mean_reward))
            self.logger.record("eval/mean_ep_length", mean_ep_length)

            if len(self._is_success_buffer) > 0:
                success_rate = np.mean(self._is_success_buffer)
                if self.verbose > 0:
                    print(f"Success rate: {100 * success_rate:.2f}%")
                self.logger.record("eval/success_rate", success_rate)

            if mean_reward > self.best_mean_reward:
                if self.verbose > 0:
                    print("New best mean reward!")
                if self.best_model_save_path is not None:
                    self.model.save(os.path.join(self.best_model_save_path, "best_model"))
                self.best_mean_reward = mean_reward
                # Trigger callback if needed
                if self.callback is not None:
                    return self._on_event()

        return True

    def update_child_locals(self, locals_: Dict[str, Any]) -> None:
        """
        Update the references to the local variables.

        :param locals_: the local variables during rollout collection
        """
        if self.callback:
            self.callback.update_locals(locals_)


class StopTrainingOnRewardThreshold(BaseCallback):
    """
    Stop the training once a threshold in episodic reward
    has been reached (i.e. when the model is good enough).

    It must be used with the ``EvalCallback``.

    :param reward_threshold:  Minimum expected reward per episode
        to stop training.
    :param verbose:
    """

    def __init__(self, reward_threshold: float, verbose: int = 0):
        super(StopTrainingOnRewardThreshold, self).__init__(verbose=verbose)
        self.reward_threshold = reward_threshold

    def _on_step(self) -> bool:
        assert self.parent is not None, "``StopTrainingOnMinimumReward`` callback must be used " "with an ``EvalCallback``"
        # Convert np.bool_ to bool, otherwise callback() is False won't work
        continue_training = bool(self.parent.best_mean_reward < self.reward_threshold)
        if self.verbose > 0 and not continue_training:
            print(
                f"Stopping training because the mean reward {self.parent.best_mean_reward:.2f} "
                f" is above the threshold {self.reward_threshold}"
            )
        return continue_training


class EveryNTimesteps(EventCallback):
    """
    Trigger a callback every ``n_steps`` timesteps

    :param n_steps: Number of timesteps between two trigger.
    :param callback: Callback that will be called
        when the event is triggered.
    """

    def __init__(self, n_steps: int, callback: BaseCallback):
        super(EveryNTimesteps, self).__init__(callback)
        self.n_steps = n_steps
        self.last_time_trigger = 0

    def _on_step(self) -> bool:
        if (self.num_timesteps - self.last_time_trigger) >= self.n_steps:
            self.last_time_trigger = self.num_timesteps
            return self._on_event()
        return True


class StopTrainingOnMaxEpisodes(BaseCallback):
    """
    Stop the training once a maximum number of episodes are played.

    For multiple environments presumes that, the desired behavior is that the agent trains on each env for ``max_episodes``
    and in total for ``max_episodes * n_envs`` episodes.

    :param max_episodes: Maximum number of episodes to stop training.
    :param verbose: Select whether to print information about when training ended by reaching ``max_episodes``
    """

    def __init__(self, max_episodes: int, verbose: int = 0):
        super(StopTrainingOnMaxEpisodes, self).__init__(verbose=verbose)
        self.max_episodes = max_episodes
        self._total_max_episodes = max_episodes
        self.n_episodes = 0

    def _init_callback(self) -> None:
        # At start set total max according to number of envirnments
        self._total_max_episodes = self.max_episodes * self.training_env.num_envs

    def _on_step(self) -> bool:
        # Checking for both 'done' and 'dones' keywords because:
        # Some models use keyword 'done' (e.g.,: SAC, TD3, DQN, DDPG)
        # While some models use keyword 'dones' (e.g.,: A2C, PPO)
        done_array = np.array(self.locals.get("done") if self.locals.get("done") is not None else self.locals.get("dones"))
        self.n_episodes += np.sum(done_array).item()

        continue_training = self.n_episodes < self._total_max_episodes

        if self.verbose > 0 and not continue_training:
            mean_episodes_per_env = self.n_episodes / self.training_env.num_envs
            mean_ep_str = (
                f"with an average of {mean_episodes_per_env:.2f} episodes per env" if self.training_env.num_envs > 1 else ""
            )

            print(
                f"Stopping training with a total of {self.num_timesteps} steps because the "
                f"{self.locals.get('tb_log_name')} model reached max_episodes={self.max_episodes}, "
                f"by playing for {self.n_episodes} episodes "
                f"{mean_ep_str}"
            )
        return continue_training
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								import os
-												Auto-formatting with black and isort (#97)

* Add auto formatting with black and isort

* Reformat code

* Ignore typing errors

* Add note about line length

* Add minimum version for isort

* Add commit-checks

* Update docker image

* Fixed lost import (during last merge)

* Fix opencv dependency
											
										
										
											2020-07-16 14:12:16 +00:00
+								import warnings
 								from abc import ABC, abstractmethod
-												Improve typing coverage (#175)

* Improve typing coverage

* Even more types

* Fixes

* Update changelog

* Unified docstrings

* Improve error messages for unsupported spaces
											
										
										
											2020-10-07 08:51:49 +00:00
+								from typing import Any, Callable, Dict, List, Optional, Union
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
 								import gym
 								import numpy as np
-												Update docs (custom policy, type hints) (#167)

* Change import

* Update custom policy doc

* Re-enable sphinx_autodoc_typehints

* Update docker image

* Attempt to fix read the doc build error

* Add sphinx_autodoc_typehints to read the doc env

* Fix pip version

* Add full custom policy example

* Fix
											
										
										
											2020-09-29 17:41:14 +00:00
+								from stable_baselines3.common import base_class, logger  # pytype: disable=pyi-error
-												Auto-formatting with black and isort (#97)

* Add auto formatting with black and isort

* Reformat code

* Ignore typing errors

* Add note about line length

* Add minimum version for isort

* Add commit-checks

* Update docker image

* Fixed lost import (during last merge)

* Fix opencv dependency
											
										
										
											2020-07-16 14:12:16 +00:00
+								from stable_baselines3.common.evaluation import evaluate_policy
 								from stable_baselines3.common.vec_env import DummyVecEnv, VecEnv, sync_envs_normalization
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
 								class BaseCallback(ABC):
 								    """
 								    Base class for callback.
-												Cleanup docstring types (#169)

* Cleanup docstring types

* Update style

* Test with js hack

* Revert "Test with js hack"

This reverts commit d091f438e8851ab8d01b66628e06a104f5e5ec69.

* Fix types

* Fix typo

* Update CONTRIBUTING example
											
										
										
											2020-10-02 17:05:55 +00:00
+								    :param verbose:
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								    """
-												Auto-formatting with black and isort (#97)

* Add auto formatting with black and isort

* Reformat code

* Ignore typing errors

* Add note about line length

* Add minimum version for isort

* Add commit-checks

* Update docker image

* Fixed lost import (during last merge)

* Fix opencv dependency
											
										
										
											2020-07-16 14:12:16 +00:00
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								    def __init__(self, verbose: int = 0):
 								        super(BaseCallback, self).__init__()
-												Sync callbacks

											
										
										
											2020-03-12 11:34:25 +00:00
+								        # The RL model
-												Update docs (custom policy, type hints) (#167)

* Change import

* Update custom policy doc

* Re-enable sphinx_autodoc_typehints

* Update docker image

* Attempt to fix read the doc build error

* Add sphinx_autodoc_typehints to read the doc env

* Fix pip version

* Add full custom policy example

* Fix
											
										
										
											2020-09-29 17:41:14 +00:00
+								        self.model = None  # type: Optional[base_class.BaseAlgorithm]
-												Sync callbacks

											
										
										
											2020-03-12 11:34:25 +00:00
+								        # An alias for self.model.get_env(), the environment used for training
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								        self.training_env = None  # type: Union[gym.Env, VecEnv, None]
-												Sync callbacks

											
										
										
											2020-03-12 11:34:25 +00:00
+								        # Number of time the callback was called
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								        self.n_calls = 0  # type: int
-												Sync callbacks

											
										
										
											2020-03-12 11:34:25 +00:00
+								        # n_envs * n times env.step() was called
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								        self.num_timesteps = 0  # type: int
 								        self.verbose = verbose
-												Callbacks have access to locals (#115)

* callbacks have access to locals

* changeloc

* doc

* callbacks have access to locals

* changeloc

* doc

* Added update function for child callbacks

* Pre-Release 0.8.0 (#134)

* Fix double reset and improve typing coverage (#136)

* Fix double reset and improve typing coverage

* Revert minor edit

* Add doc about types

* Update child callbacks

* cleaned imports

* format

* import order

* Simplify tests and add comments

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
											
										
										
											2020-08-23 12:34:01 +00:00
+								        self.locals: Dict[str, Any] = {}
 								        self.globals: Dict[str, Any] = {}
-												Tensorboard integration (#30)

* init commit tensorboard-integration

* Added tb logger to ppo (with output exclusions)

* fixed truncated stdout

* categorize stdout outputs by tag

* separated exclusions from values, added missing logs

* saving exclusions as dict instead of list

* reformatting, auto run indexing

* included renaming suggestions, fixed tests

* tb support for sac

* linting

* moved logging to base class

* tb support for td3

* removed histograms, non-verbose output working

* modifed changelog

* linting

* fixed type error

* moved logger config to utils

* removed episode_rewards log from ppo

* Enable tensorboard in tests

* Remove unused import

* Update logger sub titles

* Minor edit for PPO

* Update logger and tb log folder

* Pass correct logger to Callbacks

* updated docs

* added tb example image to docs

* add support for continuing training in tensorboard

* added tensorboard to docs index

* added tb test

* moved logger config to _setup_learn, updated tests

* accessing verbose from base class

* Update doc and tests

* Rename session -> time

* Update version

* Update logger truncate

* Update types

* Remove duplicated code

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
											
										
										
											2020-06-01 09:55:44 +00:00
+								        self.logger = None
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								        # Sometimes, for event callback, it is useful
 								        # to have access to the parent object
 								        self.parent = None  # type: Optional[BaseCallback]
-												Refactor evaluation

											
										
										
											2020-01-27 14:53:27 +00:00
+								    # Type hint as string to avoid circular import
-												Update docs (custom policy, type hints) (#167)

* Change import

* Update custom policy doc

* Re-enable sphinx_autodoc_typehints

* Update docker image

* Attempt to fix read the doc build error

* Add sphinx_autodoc_typehints to read the doc env

* Fix pip version

* Add full custom policy example

* Fix
											
										
										
											2020-09-29 17:41:14 +00:00
+								    def init_callback(self, model: "base_class.BaseAlgorithm") -> None:
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								        """
 								        Initialize the callback by saving references to the
 								        RL model and the training environment for convenience.
 								        """
 								        self.model = model
 								        self.training_env = model.get_env()
-												Tensorboard integration (#30)

* init commit tensorboard-integration

* Added tb logger to ppo (with output exclusions)

* fixed truncated stdout

* categorize stdout outputs by tag

* separated exclusions from values, added missing logs

* saving exclusions as dict instead of list

* reformatting, auto run indexing

* included renaming suggestions, fixed tests

* tb support for sac

* linting

* moved logging to base class

* tb support for td3

* removed histograms, non-verbose output working

* modifed changelog

* linting

* fixed type error

* moved logger config to utils

* removed episode_rewards log from ppo

* Enable tensorboard in tests

* Remove unused import

* Update logger sub titles

* Minor edit for PPO

* Update logger and tb log folder

* Pass correct logger to Callbacks

* updated docs

* added tb example image to docs

* add support for continuing training in tensorboard

* added tensorboard to docs index

* added tb test

* moved logger config to _setup_learn, updated tests

* accessing verbose from base class

* Update doc and tests

* Rename session -> time

* Update version

* Update logger truncate

* Update types

* Remove duplicated code

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
											
										
										
											2020-06-01 09:55:44 +00:00
+								        self.logger = logger
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								        self._init_callback()
 								    def _init_callback(self) -> None:
 								        pass
 								    def on_training_start(self, locals_: Dict[str, Any], globals_: Dict[str, Any]) -> None:
 								        # Those are reference and will be updated automatically
 								        self.locals = locals_
 								        self.globals = globals_
 								        self._on_training_start()
 								    def _on_training_start(self) -> None:
 								        pass
 								    def on_rollout_start(self) -> None:
 								        self._on_rollout_start()
 								    def _on_rollout_start(self) -> None:
 								        pass
 								    @abstractmethod
 								    def _on_step(self) -> bool:
 								        """
-												Cleanup docstring types (#169)

* Cleanup docstring types

* Update style

* Test with js hack

* Revert "Test with js hack"

This reverts commit d091f438e8851ab8d01b66628e06a104f5e5ec69.

* Fix types

* Fix typo

* Update CONTRIBUTING example
											
										
										
											2020-10-02 17:05:55 +00:00
+								        :return: If the callback returns False, training is aborted early.
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								        """
 								        return True
-												Sync callbacks

											
										
										
											2020-03-12 11:34:25 +00:00
+								    def on_step(self) -> bool:
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								        """
-												Sync callbacks

											
										
										
											2020-03-12 11:34:25 +00:00
+								        This method will be called by the model after each call to ``env.step()``.
 								        For child callback (of an ``EventCallback``), this will be called
 								        when the event is triggered.
-												Cleanup docstring types (#169)

* Cleanup docstring types

* Update style

* Test with js hack

* Revert "Test with js hack"

This reverts commit d091f438e8851ab8d01b66628e06a104f5e5ec69.

* Fix types

* Fix typo

* Update CONTRIBUTING example
											
										
										
											2020-10-02 17:05:55 +00:00
+								        :return: If the callback returns False, training is aborted early.
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								        """
 								        self.n_calls += 1
 								        # timesteps start at zero
-												Add StopTrainingOnMaxEpisodes to callback collection (#147)

* Add StopTrainingOnMaxEpisodes class to pre-made callback collection

* Adjust instant when counters are incremented for both OnPolicy and OffPolicy algorithms

* Improv to StopTrainingOnMaxEpisodes including output, tests and doc

* Improv StopTrainingOnMaxEpisodes callback running _init_callback

* Update callbacks.py

* Update test_callbacks.py

* Fix style

* Update changelog.rst

* Fix test

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
Co-authored-by: Antonin Raffin <antonin.raffin@dlr.de>
											
										
										
											2020-08-28 09:36:33 +00:00
+								        self.num_timesteps = self.model.num_timesteps
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
 								        return self._on_step()
 								    def on_training_end(self) -> None:
 								        self._on_training_end()
 								    def _on_training_end(self) -> None:
 								        pass
 								    def on_rollout_end(self) -> None:
 								        self._on_rollout_end()
 								    def _on_rollout_end(self) -> None:
 								        pass
-												Callbacks have access to locals (#115)

* callbacks have access to locals

* changeloc

* doc

* callbacks have access to locals

* changeloc

* doc

* Added update function for child callbacks

* Pre-Release 0.8.0 (#134)

* Fix double reset and improve typing coverage (#136)

* Fix double reset and improve typing coverage

* Revert minor edit

* Add doc about types

* Update child callbacks

* cleaned imports

* format

* import order

* Simplify tests and add comments

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
											
										
										
											2020-08-23 12:34:01 +00:00
+								    def update_locals(self, locals_: Dict[str, Any]) -> None:
 								        """
 								        Update the references to the local variables.
-												Cleanup docstring types (#169)

* Cleanup docstring types

* Update style

* Test with js hack

* Revert "Test with js hack"

This reverts commit d091f438e8851ab8d01b66628e06a104f5e5ec69.

* Fix types

* Fix typo

* Update CONTRIBUTING example
											
										
										
											2020-10-02 17:05:55 +00:00
+								        :param locals_: the local variables during rollout collection
-												Callbacks have access to locals (#115)

* callbacks have access to locals

* changeloc

* doc

* callbacks have access to locals

* changeloc

* doc

* Added update function for child callbacks

* Pre-Release 0.8.0 (#134)

* Fix double reset and improve typing coverage (#136)

* Fix double reset and improve typing coverage

* Revert minor edit

* Add doc about types

* Update child callbacks

* cleaned imports

* format

* import order

* Simplify tests and add comments

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
											
										
										
											2020-08-23 12:34:01 +00:00
+								        """
 								        self.locals.update(locals_)
 								        self.update_child_locals(locals_)
 								    def update_child_locals(self, locals_: Dict[str, Any]) -> None:
 								        """
 								        Update the references to the local variables on sub callbacks.
-												Cleanup docstring types (#169)

* Cleanup docstring types

* Update style

* Test with js hack

* Revert "Test with js hack"

This reverts commit d091f438e8851ab8d01b66628e06a104f5e5ec69.

* Fix types

* Fix typo

* Update CONTRIBUTING example
											
										
										
											2020-10-02 17:05:55 +00:00
+								        :param locals_: the local variables during rollout collection
-												Callbacks have access to locals (#115)

* callbacks have access to locals

* changeloc

* doc

* callbacks have access to locals

* changeloc

* doc

* Added update function for child callbacks

* Pre-Release 0.8.0 (#134)

* Fix double reset and improve typing coverage (#136)

* Fix double reset and improve typing coverage

* Revert minor edit

* Add doc about types

* Update child callbacks

* cleaned imports

* format

* import order

* Simplify tests and add comments

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
											
										
										
											2020-08-23 12:34:01 +00:00
+								        """
 								        pass
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
 								class EventCallback(BaseCallback):
 								    """
 								    Base class for triggering callback on event.
-												Cleanup docstring types (#169)

* Cleanup docstring types

* Update style

* Test with js hack

* Revert "Test with js hack"

This reverts commit d091f438e8851ab8d01b66628e06a104f5e5ec69.

* Fix types

* Fix typo

* Update CONTRIBUTING example
											
										
										
											2020-10-02 17:05:55 +00:00
+								    :param callback: Callback that will be called
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								        when an event is triggered.
-												Cleanup docstring types (#169)

* Cleanup docstring types

* Update style

* Test with js hack

* Revert "Test with js hack"

This reverts commit d091f438e8851ab8d01b66628e06a104f5e5ec69.

* Fix types

* Fix typo

* Update CONTRIBUTING example
											
										
										
											2020-10-02 17:05:55 +00:00
+								    :param verbose:
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								    """
-												Auto-formatting with black and isort (#97)

* Add auto formatting with black and isort

* Reformat code

* Ignore typing errors

* Add note about line length

* Add minimum version for isort

* Add commit-checks

* Update docker image

* Fixed lost import (during last merge)

* Fix opencv dependency
											
										
										
											2020-07-16 14:12:16 +00:00
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								    def __init__(self, callback: Optional[BaseCallback] = None, verbose: int = 0):
 								        super(EventCallback, self).__init__(verbose=verbose)
 								        self.callback = callback
 								        # Give access to the parent
 								        if callback is not None:
 								            self.callback.parent = self
-												Update docs (custom policy, type hints) (#167)

* Change import

* Update custom policy doc

* Re-enable sphinx_autodoc_typehints

* Update docker image

* Attempt to fix read the doc build error

* Add sphinx_autodoc_typehints to read the doc env

* Fix pip version

* Add full custom policy example

* Fix
											
										
										
											2020-09-29 17:41:14 +00:00
+								    def init_callback(self, model: "base_class.BaseAlgorithm") -> None:
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								        super(EventCallback, self).init_callback(model)
-												Refactor evaluation

											
										
										
											2020-01-27 14:53:27 +00:00
+								        if self.callback is not None:
 								            self.callback.init_callback(self.model)
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
 								    def _on_training_start(self) -> None:
-												Refactor evaluation

											
										
										
											2020-01-27 14:53:27 +00:00
+								        if self.callback is not None:
 								            self.callback.on_training_start(self.locals, self.globals)
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
 								    def _on_event(self) -> bool:
 								        if self.callback is not None:
-												Fix event callback

											
										
										
											2020-03-12 12:24:11 +00:00
+								            return self.callback.on_step()
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								        return True
-												Refactor evaluation

											
										
										
											2020-01-27 14:53:27 +00:00
+								    def _on_step(self) -> bool:
 								        return True
-												Callbacks have access to locals (#115)

* callbacks have access to locals

* changeloc

* doc

* callbacks have access to locals

* changeloc

* doc

* Added update function for child callbacks

* Pre-Release 0.8.0 (#134)

* Fix double reset and improve typing coverage (#136)

* Fix double reset and improve typing coverage

* Revert minor edit

* Add doc about types

* Update child callbacks

* cleaned imports

* format

* import order

* Simplify tests and add comments

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
											
										
										
											2020-08-23 12:34:01 +00:00
+								    def update_child_locals(self, locals_: Dict[str, Any]) -> None:
 								        """
 								        Update the references to the local variables.
-												Cleanup docstring types (#169)

* Cleanup docstring types

* Update style

* Test with js hack

* Revert "Test with js hack"

This reverts commit d091f438e8851ab8d01b66628e06a104f5e5ec69.

* Fix types

* Fix typo

* Update CONTRIBUTING example
											
										
										
											2020-10-02 17:05:55 +00:00
+								        :param locals_: the local variables during rollout collection
-												Callbacks have access to locals (#115)

* callbacks have access to locals

* changeloc

* doc

* callbacks have access to locals

* changeloc

* doc

* Added update function for child callbacks

* Pre-Release 0.8.0 (#134)

* Fix double reset and improve typing coverage (#136)

* Fix double reset and improve typing coverage

* Revert minor edit

* Add doc about types

* Update child callbacks

* cleaned imports

* format

* import order

* Simplify tests and add comments

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
											
										
										
											2020-08-23 12:34:01 +00:00
+								        """
 								        if self.callback is not None:
 								            self.callback.update_locals(locals_)
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
 								class CallbackList(BaseCallback):
-												Sync callbacks

											
										
										
											2020-03-12 11:34:25 +00:00
+								    """
 								    Class for chaining callbacks.
-												Cleanup docstring types (#169)

* Cleanup docstring types

* Update style

* Test with js hack

* Revert "Test with js hack"

This reverts commit d091f438e8851ab8d01b66628e06a104f5e5ec69.

* Fix types

* Fix typo

* Update CONTRIBUTING example
											
										
										
											2020-10-02 17:05:55 +00:00
+								    :param callbacks: A list of callbacks that will be called
-												Sync callbacks

											
										
										
											2020-03-12 11:34:25 +00:00
+								        sequentially.
 								    """
-												Auto-formatting with black and isort (#97)

* Add auto formatting with black and isort

* Reformat code

* Ignore typing errors

* Add note about line length

* Add minimum version for isort

* Add commit-checks

* Update docker image

* Fixed lost import (during last merge)

* Fix opencv dependency
											
										
										
											2020-07-16 14:12:16 +00:00
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								    def __init__(self, callbacks: List[BaseCallback]):
 								        super(CallbackList, self).__init__()
 								        assert isinstance(callbacks, list)
 								        self.callbacks = callbacks
 								    def _init_callback(self) -> None:
 								        for callback in self.callbacks:
 								            callback.init_callback(self.model)
 								    def _on_training_start(self) -> None:
 								        for callback in self.callbacks:
 								            callback.on_training_start(self.locals, self.globals)
-												Sync callbacks

											
										
										
											2020-03-12 11:34:25 +00:00
+								    def _on_rollout_start(self) -> None:
 								        for callback in self.callbacks:
 								            callback.on_rollout_start()
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								    def _on_step(self) -> bool:
 								        continue_training = True
 								        for callback in self.callbacks:
 								            # Return False (stop training) if at least one callback returns False
-												Sync callbacks

											
										
										
											2020-03-12 11:34:25 +00:00
+								            continue_training = callback.on_step() and continue_training
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								        return continue_training
-												Sync callbacks

											
										
										
											2020-03-12 11:34:25 +00:00
+								    def _on_rollout_end(self) -> None:
 								        for callback in self.callbacks:
 								            callback.on_rollout_end()
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								    def _on_training_end(self) -> None:
 								        for callback in self.callbacks:
 								            callback.on_training_end()
-												Callbacks have access to locals (#115)

* callbacks have access to locals

* changeloc

* doc

* callbacks have access to locals

* changeloc

* doc

* Added update function for child callbacks

* Pre-Release 0.8.0 (#134)

* Fix double reset and improve typing coverage (#136)

* Fix double reset and improve typing coverage

* Revert minor edit

* Add doc about types

* Update child callbacks

* cleaned imports

* format

* import order

* Simplify tests and add comments

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
											
										
										
											2020-08-23 12:34:01 +00:00
+								    def update_child_locals(self, locals_: Dict[str, Any]) -> None:
 								        """
 								        Update the references to the local variables.
-												Cleanup docstring types (#169)

* Cleanup docstring types

* Update style

* Test with js hack

* Revert "Test with js hack"

This reverts commit d091f438e8851ab8d01b66628e06a104f5e5ec69.

* Fix types

* Fix typo

* Update CONTRIBUTING example
											
										
										
											2020-10-02 17:05:55 +00:00
+								        :param locals_: the local variables during rollout collection
-												Callbacks have access to locals (#115)

* callbacks have access to locals

* changeloc

* doc

* callbacks have access to locals

* changeloc

* doc

* Added update function for child callbacks

* Pre-Release 0.8.0 (#134)

* Fix double reset and improve typing coverage (#136)

* Fix double reset and improve typing coverage

* Revert minor edit

* Add doc about types

* Update child callbacks

* cleaned imports

* format

* import order

* Simplify tests and add comments

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
											
										
										
											2020-08-23 12:34:01 +00:00
+								        """
 								        for callback in self.callbacks:
 								            callback.update_locals(locals_)
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
 								class CheckpointCallback(BaseCallback):
 								    """
-												Sync callbacks

											
										
										
											2020-03-12 11:34:25 +00:00
+								    Callback for saving a model every ``save_freq`` steps
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
-												Cleanup docstring types (#169)

* Cleanup docstring types

* Update style

* Test with js hack

* Revert "Test with js hack"

This reverts commit d091f438e8851ab8d01b66628e06a104f5e5ec69.

* Fix types

* Fix typo

* Update CONTRIBUTING example
											
										
										
											2020-10-02 17:05:55 +00:00
+								    :param save_freq:
 								    :param save_path: Path to the folder where the model will be saved.
 								    :param name_prefix: Common prefix to the saved models
-												Improve typing coverage (#175)

* Improve typing coverage

* Even more types

* Fixes

* Update changelog

* Unified docstrings

* Improve error messages for unsupported spaces
											
										
										
											2020-10-07 08:51:49 +00:00
+								    :param verbose:
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								    """
-												Auto-formatting with black and isort (#97)

* Add auto formatting with black and isort

* Reformat code

* Ignore typing errors

* Add note about line length

* Add minimum version for isort

* Add commit-checks

* Update docker image

* Fixed lost import (during last merge)

* Fix opencv dependency
											
										
										
											2020-07-16 14:12:16 +00:00
-												Improve typing coverage (#175)

* Improve typing coverage

* Even more types

* Fixes

* Update changelog

* Unified docstrings

* Improve error messages for unsupported spaces
											
										
										
											2020-10-07 08:51:49 +00:00
+								    def __init__(self, save_freq: int, save_path: str, name_prefix: str = "rl_model", verbose: int = 0):
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								        super(CheckpointCallback, self).__init__(verbose)
 								        self.save_freq = save_freq
 								        self.save_path = save_path
 								        self.name_prefix = name_prefix
 								    def _init_callback(self) -> None:
 								        # Create folder if needed
 								        if self.save_path is not None:
 								            os.makedirs(self.save_path, exist_ok=True)
 								    def _on_step(self) -> bool:
 								        if self.n_calls % self.save_freq == 0:
-												Auto-formatting with black and isort (#97)

* Add auto formatting with black and isort

* Reformat code

* Ignore typing errors

* Add note about line length

* Add minimum version for isort

* Add commit-checks

* Update docker image

* Fixed lost import (during last merge)

* Fix opencv dependency
											
										
										
											2020-07-16 14:12:16 +00:00
+								            path = os.path.join(self.save_path, f"{self.name_prefix}_{self.num_timesteps}_steps")
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								            self.model.save(path)
 								            if self.verbose > 1:
 								                print(f"Saving model checkpoint to {path}")
 								        return True
 								class ConvertCallback(BaseCallback):
 								    """
 								    Convert functional callback (old-style) to object.
-												Cleanup docstring types (#169)

* Cleanup docstring types

* Update style

* Test with js hack

* Revert "Test with js hack"

This reverts commit d091f438e8851ab8d01b66628e06a104f5e5ec69.

* Fix types

* Fix typo

* Update CONTRIBUTING example
											
										
										
											2020-10-02 17:05:55 +00:00
+								    :param callback:
 								    :param verbose:
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								    """
-												Auto-formatting with black and isort (#97)

* Add auto formatting with black and isort

* Reformat code

* Ignore typing errors

* Add note about line length

* Add minimum version for isort

* Add commit-checks

* Update docker image

* Fixed lost import (during last merge)

* Fix opencv dependency
											
										
										
											2020-07-16 14:12:16 +00:00
-												Add callable signatures to type annotations. (#215)

* Add callback signature to the learning rate type annotations.

* Add callback signature to the learning rate schedule type annotations.

* Add missing type annotations for learning rate callbacks.

* Add signature to old-style learning and evaluation callbacks.

* Add signature to env wrapper callback.

* Add type annotation to closure function.

* Use MaybeCallback more consistently.

* Update changelog.

* Remove now unused List import.

* Fix import order.

* Add type alias for learning rate schedules.

* Optimize imports.

* Fix messed up import.

* Remove resolved TODO.

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
											
										
										
											2020-11-15 16:50:28 +00:00
+								    def __init__(self, callback: Callable[[Dict[str, Any], Dict[str, Any]], bool], verbose: int = 0):
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								        super(ConvertCallback, self).__init__(verbose)
 								        self.callback = callback
 								    def _on_step(self) -> bool:
 								        if self.callback is not None:
 								            return self.callback(self.locals, self.globals)
 								        return True
 								class EvalCallback(EventCallback):
 								    """
 								    Callback for evaluating an agent.
-												Cleanup docstring types (#169)

* Cleanup docstring types

* Update style

* Test with js hack

* Revert "Test with js hack"

This reverts commit d091f438e8851ab8d01b66628e06a104f5e5ec69.

* Fix types

* Fix typo

* Update CONTRIBUTING example
											
										
										
											2020-10-02 17:05:55 +00:00
+								    :param eval_env: The environment used for initialization
 								    :param callback_on_new_best: Callback to trigger
-												Sync callbacks

											
										
										
											2020-03-12 11:34:25 +00:00
+								        when there is a new best model according to the ``mean_reward``
-												Cleanup docstring types (#169)

* Cleanup docstring types

* Update style

* Test with js hack

* Revert "Test with js hack"

This reverts commit d091f438e8851ab8d01b66628e06a104f5e5ec69.

* Fix types

* Fix typo

* Update CONTRIBUTING example
											
										
										
											2020-10-02 17:05:55 +00:00
+								    :param n_eval_episodes: The number of episodes to test the agent
 								    :param eval_freq: Evaluate the agent every eval_freq call of the callback.
 								    :param log_path: Path to a folder where the evaluations (``evaluations.npz``)
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								        will be saved. It will be updated at each evaluation.
-												Cleanup docstring types (#169)

* Cleanup docstring types

* Update style

* Test with js hack

* Revert "Test with js hack"

This reverts commit d091f438e8851ab8d01b66628e06a104f5e5ec69.

* Fix types

* Fix typo

* Update CONTRIBUTING example
											
										
										
											2020-10-02 17:05:55 +00:00
+								    :param best_model_save_path: Path to a folder where the best model
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								        according to performance on the eval env will be saved.
-												Cleanup docstring types (#169)

* Cleanup docstring types

* Update style

* Test with js hack

* Revert "Test with js hack"

This reverts commit d091f438e8851ab8d01b66628e06a104f5e5ec69.

* Fix types

* Fix typo

* Update CONTRIBUTING example
											
										
										
											2020-10-02 17:05:55 +00:00
+								    :param deterministic: Whether the evaluation should
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								        use a stochastic or deterministic actions.
-												Cleanup docstring types (#169)

* Cleanup docstring types

* Update style

* Test with js hack

* Revert "Test with js hack"

This reverts commit d091f438e8851ab8d01b66628e06a104f5e5ec69.

* Fix types

* Fix typo

* Update CONTRIBUTING example
											
										
										
											2020-10-02 17:05:55 +00:00
+								    :param render: Whether to render or not the environment during evaluation
 								    :param verbose:
-												Use Monitor episode reward/length for `evaluate_policy` (#220)

* Update evaluate_policy to use monitor data if available

* Update documentation

* Cleaning up

* Remove unnecessary typing trickery

* Update doc

* Rename is_wrapped to clarify it is for vecenvs

* Add is_wrapped for regular envs

* Add is_wrapped call for subprocvecenv and update code for circular imports

* Move new functions back to env_util and fix imports

* Update changelog

* Clarify evaluate_policy docs

* Add tests for wrapped modifying episode lengths

* Fix tests

* Update changelog

* Minor edits

* Add warn switch to evaluate_policy and update tests

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
											
										
										
											2020-11-16 10:52:28 +00:00
+								    :param warn: Passed to ``evaluate_policy`` (warns if ``eval_env`` has not been
 								        wrapped with a Monitor wrapper)
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								    """
-												Auto-formatting with black and isort (#97)

* Add auto formatting with black and isort

* Reformat code

* Ignore typing errors

* Add note about line length

* Add minimum version for isort

* Add commit-checks

* Update docker image

* Fixed lost import (during last merge)

* Fix opencv dependency
											
										
										
											2020-07-16 14:12:16 +00:00
 								    def __init__(
 								        self,
 								        eval_env: Union[gym.Env, VecEnv],
 								        callback_on_new_best: Optional[BaseCallback] = None,
 								        n_eval_episodes: int = 5,
 								        eval_freq: int = 10000,
 								        log_path: str = None,
 								        best_model_save_path: str = None,
 								        deterministic: bool = True,
 								        render: bool = False,
 								        verbose: int = 1,
-												Use Monitor episode reward/length for `evaluate_policy` (#220)

* Update evaluate_policy to use monitor data if available

* Update documentation

* Cleaning up

* Remove unnecessary typing trickery

* Update doc

* Rename is_wrapped to clarify it is for vecenvs

* Add is_wrapped for regular envs

* Add is_wrapped call for subprocvecenv and update code for circular imports

* Move new functions back to env_util and fix imports

* Update changelog

* Clarify evaluate_policy docs

* Add tests for wrapped modifying episode lengths

* Fix tests

* Update changelog

* Minor edits

* Add warn switch to evaluate_policy and update tests

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
											
										
										
											2020-11-16 10:52:28 +00:00
+								        warn: bool = True,
-												Auto-formatting with black and isort (#97)

* Add auto formatting with black and isort

* Reformat code

* Ignore typing errors

* Add note about line length

* Add minimum version for isort

* Add commit-checks

* Update docker image

* Fixed lost import (during last merge)

* Fix opencv dependency
											
										
										
											2020-07-16 14:12:16 +00:00
+								    ):
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								        super(EvalCallback, self).__init__(callback_on_new_best, verbose=verbose)
 								        self.n_eval_episodes = n_eval_episodes
 								        self.eval_freq = eval_freq
 								        self.best_mean_reward = -np.inf
-												Save last mean reward

											
										
										
											2020-02-11 12:22:44 +00:00
+								        self.last_mean_reward = -np.inf
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								        self.deterministic = deterministic
-												Refactor evaluation

											
										
										
											2020-01-27 14:53:27 +00:00
+								        self.render = render
-												Use Monitor episode reward/length for `evaluate_policy` (#220)

* Update evaluate_policy to use monitor data if available

* Update documentation

* Cleaning up

* Remove unnecessary typing trickery

* Update doc

* Rename is_wrapped to clarify it is for vecenvs

* Add is_wrapped for regular envs

* Add is_wrapped call for subprocvecenv and update code for circular imports

* Move new functions back to env_util and fix imports

* Update changelog

* Clarify evaluate_policy docs

* Add tests for wrapped modifying episode lengths

* Fix tests

* Update changelog

* Minor edits

* Add warn switch to evaluate_policy and update tests

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
											
										
										
											2020-11-16 10:52:28 +00:00
+								        self.warn = warn
-												Refactor evaluation

											
										
										
											2020-01-27 14:53:27 +00:00
-												Sync callbacks

											
										
										
											2020-03-12 11:34:25 +00:00
+								        # Convert to VecEnv for consistency
 								        if not isinstance(eval_env, VecEnv):
 								            eval_env = DummyVecEnv([lambda: eval_env])
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								        if isinstance(eval_env, VecEnv):
 								            assert eval_env.num_envs == 1, "You must pass only one environment for evaluation"
 								        self.eval_env = eval_env
 								        self.best_model_save_path = best_model_save_path
-												Sync callbacks

											
										
										
											2020-03-12 11:34:25 +00:00
+								        # Logs will be written in ``evaluations.npz``
-												Fix eval log path

											
										
										
											2020-01-31 12:48:25 +00:00
+								        if log_path is not None:
-												Auto-formatting with black and isort (#97)

* Add auto formatting with black and isort

* Reformat code

* Ignore typing errors

* Add note about line length

* Add minimum version for isort

* Add commit-checks

* Update docker image

* Fixed lost import (during last merge)

* Fix opencv dependency
											
										
										
											2020-07-16 14:12:16 +00:00
+								            log_path = os.path.join(log_path, "evaluations")
-												Fix eval log path

											
										
										
											2020-01-31 12:48:25 +00:00
+								        self.log_path = log_path
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								        self.evaluations_results = []
 								        self.evaluations_timesteps = []
-												Refactor evaluation

											
										
										
											2020-01-27 14:53:27 +00:00
+								        self.evaluations_length = []
-												Add eval success rate logging (#255)

* Add eval success rate logging

* Fix name clash

* Log data

* Bump version
											
										
										
											2020-12-08 14:49:07 +00:00
+								        # For computing success rate
 								        self._is_success_buffer = []
 								        self.evaluations_successes = []
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
-												Improve typing coverage (#175)

* Improve typing coverage

* Even more types

* Fixes

* Update changelog

* Unified docstrings

* Improve error messages for unsupported spaces
											
										
										
											2020-10-07 08:51:49 +00:00
+								    def _init_callback(self) -> None:
-												Sync callbacks

											
										
										
											2020-03-12 11:34:25 +00:00
+								        # Does not work in some corner cases, where the wrapper is not the same
-												Documentation update and style fixes (#21)

* Update doc: add gSDE

* Fix codestyle

* Remove travis script

* Add lint check to gitlab
											
										
										
											2020-05-15 11:54:06 +00:00
+								        if not isinstance(self.training_env, type(self.eval_env)):
-												Auto-formatting with black and isort (#97)

* Add auto formatting with black and isort

* Reformat code

* Ignore typing errors

* Add note about line length

* Add minimum version for isort

* Add commit-checks

* Update docker image

* Fixed lost import (during last merge)

* Fix opencv dependency
											
										
										
											2020-07-16 14:12:16 +00:00
+								            warnings.warn("Training and eval env are not of the same type" f"{self.training_env} != {self.eval_env}")
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
 								        # Create folders if needed
 								        if self.best_model_save_path is not None:
 								            os.makedirs(self.best_model_save_path, exist_ok=True)
 								        if self.log_path is not None:
 								            os.makedirs(os.path.dirname(self.log_path), exist_ok=True)
-												Add eval success rate logging (#255)

* Add eval success rate logging

* Fix name clash

* Log data

* Bump version
											
										
										
											2020-12-08 14:49:07 +00:00
+								    def _log_success_callback(self, locals_: Dict[str, Any], globals_: Dict[str, Any]) -> None:
 								        """
 								        Callback passed to the  ``evaluate_policy`` function
 								        in order to log the success rate (when applicable),
 								        for instance when using HER.
 								        :param locals_:
 								        :param globals_:
 								        """
 								        info = locals_["info"]
 								        # VecEnv: unpack
 								        if not isinstance(info, dict):
 								            info = info[0]
 								        if locals_["done"]:
 								            maybe_is_success = info.get("is_success")
 								            if maybe_is_success is not None:
 								                self._is_success_buffer.append(maybe_is_success)
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								    def _on_step(self) -> bool:
-												Refactor evaluation

											
										
										
											2020-01-27 14:53:27 +00:00
+								        if self.eval_freq > 0 and self.n_calls % self.eval_freq == 0:
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								            # Sync training and eval env if there is VecNormalize
 								            sync_envs_normalization(self.training_env, self.eval_env)
-												Add eval success rate logging (#255)

* Add eval success rate logging

* Fix name clash

* Log data

* Bump version
											
										
										
											2020-12-08 14:49:07 +00:00
+								            # Reset success rate buffer
 								            self._is_success_buffer = []
-												Auto-formatting with black and isort (#97)

* Add auto formatting with black and isort

* Reformat code

* Ignore typing errors

* Add note about line length

* Add minimum version for isort

* Add commit-checks

* Update docker image

* Fixed lost import (during last merge)

* Fix opencv dependency
											
										
										
											2020-07-16 14:12:16 +00:00
+								            episode_rewards, episode_lengths = evaluate_policy(
 								                self.model,
 								                self.eval_env,
 								                n_eval_episodes=self.n_eval_episodes,
 								                render=self.render,
 								                deterministic=self.deterministic,
 								                return_episode_rewards=True,
-												Use Monitor episode reward/length for `evaluate_policy` (#220)

* Update evaluate_policy to use monitor data if available

* Update documentation

* Cleaning up

* Remove unnecessary typing trickery

* Update doc

* Rename is_wrapped to clarify it is for vecenvs

* Add is_wrapped for regular envs

* Add is_wrapped call for subprocvecenv and update code for circular imports

* Move new functions back to env_util and fix imports

* Update changelog

* Clarify evaluate_policy docs

* Add tests for wrapped modifying episode lengths

* Fix tests

* Update changelog

* Minor edits

* Add warn switch to evaluate_policy and update tests

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
											
										
										
											2020-11-16 10:52:28 +00:00
+								                warn=self.warn,
-												Add eval success rate logging (#255)

* Add eval success rate logging

* Fix name clash

* Log data

* Bump version
											
										
										
											2020-12-08 14:49:07 +00:00
+								                callback=self._log_success_callback,
-												Auto-formatting with black and isort (#97)

* Add auto formatting with black and isort

* Reformat code

* Ignore typing errors

* Add note about line length

* Add minimum version for isort

* Add commit-checks

* Update docker image

* Fixed lost import (during last merge)

* Fix opencv dependency
											
										
										
											2020-07-16 14:12:16 +00:00
+								            )
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
 								            if self.log_path is not None:
 								                self.evaluations_timesteps.append(self.num_timesteps)
 								                self.evaluations_results.append(episode_rewards)
-												Refactor evaluation

											
										
										
											2020-01-27 14:53:27 +00:00
+								                self.evaluations_length.append(episode_lengths)
-												Add eval success rate logging (#255)

* Add eval success rate logging

* Fix name clash

* Log data

* Bump version
											
										
										
											2020-12-08 14:49:07 +00:00
 								                kwargs = {}
 								                # Save success log if present
 								                if len(self._is_success_buffer) > 0:
 								                    self.evaluations_successes.append(self._is_success_buffer)
 								                    kwargs = dict(successes=self.evaluations_successes)
-												Auto-formatting with black and isort (#97)

* Add auto formatting with black and isort

* Reformat code

* Ignore typing errors

* Add note about line length

* Add minimum version for isort

* Add commit-checks

* Update docker image

* Fixed lost import (during last merge)

* Fix opencv dependency
											
										
										
											2020-07-16 14:12:16 +00:00
+								                np.savez(
 								                    self.log_path,
 								                    timesteps=self.evaluations_timesteps,
 								                    results=self.evaluations_results,
 								                    ep_lengths=self.evaluations_length,
-												Add eval success rate logging (#255)

* Add eval success rate logging

* Fix name clash

* Log data

* Bump version
											
										
										
											2020-12-08 14:49:07 +00:00
+								                    **kwargs,
-												Auto-formatting with black and isort (#97)

* Add auto formatting with black and isort

* Reformat code

* Ignore typing errors

* Add note about line length

* Add minimum version for isort

* Add commit-checks

* Update docker image

* Fixed lost import (during last merge)

* Fix opencv dependency
											
										
										
											2020-07-16 14:12:16 +00:00
+								                )
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
 								            mean_reward, std_reward = np.mean(episode_rewards), np.std(episode_rewards)
-												Refactor evaluation

											
										
										
											2020-01-27 14:53:27 +00:00
+								            mean_ep_length, std_ep_length = np.mean(episode_lengths), np.std(episode_lengths)
-												Save last mean reward

											
										
										
											2020-02-11 12:22:44 +00:00
+								            self.last_mean_reward = mean_reward
-												Refactor evaluation

											
										
										
											2020-01-27 14:53:27 +00:00
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								            if self.verbose > 0:
-												Auto-formatting with black and isort (#97)

* Add auto formatting with black and isort

* Reformat code

* Ignore typing errors

* Add note about line length

* Add minimum version for isort

* Add commit-checks

* Update docker image

* Fixed lost import (during last merge)

* Fix opencv dependency
											
										
										
											2020-07-16 14:12:16 +00:00
+								                print(f"Eval num_timesteps={self.num_timesteps}, " f"episode_reward={mean_reward:.2f} +/- {std_reward:.2f}")
-												Refactor evaluation

											
										
										
											2020-01-27 14:53:27 +00:00
+								                print(f"Episode length: {mean_ep_length:.2f} +/- {std_ep_length:.2f}")
-												Tensorboard integration (#30)

* init commit tensorboard-integration

* Added tb logger to ppo (with output exclusions)

* fixed truncated stdout

* categorize stdout outputs by tag

* separated exclusions from values, added missing logs

* saving exclusions as dict instead of list

* reformatting, auto run indexing

* included renaming suggestions, fixed tests

* tb support for sac

* linting

* moved logging to base class

* tb support for td3

* removed histograms, non-verbose output working

* modifed changelog

* linting

* fixed type error

* moved logger config to utils

* removed episode_rewards log from ppo

* Enable tensorboard in tests

* Remove unused import

* Update logger sub titles

* Minor edit for PPO

* Update logger and tb log folder

* Pass correct logger to Callbacks

* updated docs

* added tb example image to docs

* add support for continuing training in tensorboard

* added tensorboard to docs index

* added tb test

* moved logger config to _setup_learn, updated tests

* accessing verbose from base class

* Update doc and tests

* Rename session -> time

* Update version

* Update logger truncate

* Update types

* Remove duplicated code

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
											
										
										
											2020-06-01 09:55:44 +00:00
+								            # Add to current Logger
-												Auto-formatting with black and isort (#97)

* Add auto formatting with black and isort

* Reformat code

* Ignore typing errors

* Add note about line length

* Add minimum version for isort

* Add commit-checks

* Update docker image

* Fixed lost import (during last merge)

* Fix opencv dependency
											
										
										
											2020-07-16 14:12:16 +00:00
+								            self.logger.record("eval/mean_reward", float(mean_reward))
 								            self.logger.record("eval/mean_ep_length", mean_ep_length)
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
-												Add eval success rate logging (#255)

* Add eval success rate logging

* Fix name clash

* Log data

* Bump version
											
										
										
											2020-12-08 14:49:07 +00:00
+								            if len(self._is_success_buffer) > 0:
 								                success_rate = np.mean(self._is_success_buffer)
 								                if self.verbose > 0:
 								                    print(f"Success rate: {100 * success_rate:.2f}%")
 								                self.logger.record("eval/success_rate", success_rate)
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								            if mean_reward > self.best_mean_reward:
 								                if self.verbose > 0:
 								                    print("New best mean reward!")
 								                if self.best_model_save_path is not None:
-												Auto-formatting with black and isort (#97)

* Add auto formatting with black and isort

* Reformat code

* Ignore typing errors

* Add note about line length

* Add minimum version for isort

* Add commit-checks

* Update docker image

* Fixed lost import (during last merge)

* Fix opencv dependency
											
										
										
											2020-07-16 14:12:16 +00:00
+								                    self.model.save(os.path.join(self.best_model_save_path, "best_model"))
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								                self.best_mean_reward = mean_reward
 								                # Trigger callback if needed
 								                if self.callback is not None:
 								                    return self._on_event()
 								        return True
-												Callbacks have access to locals (#115)

* callbacks have access to locals

* changeloc

* doc

* callbacks have access to locals

* changeloc

* doc

* Added update function for child callbacks

* Pre-Release 0.8.0 (#134)

* Fix double reset and improve typing coverage (#136)

* Fix double reset and improve typing coverage

* Revert minor edit

* Add doc about types

* Update child callbacks

* cleaned imports

* format

* import order

* Simplify tests and add comments

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
											
										
										
											2020-08-23 12:34:01 +00:00
+								    def update_child_locals(self, locals_: Dict[str, Any]) -> None:
 								        """
 								        Update the references to the local variables.
-												Cleanup docstring types (#169)

* Cleanup docstring types

* Update style

* Test with js hack

* Revert "Test with js hack"

This reverts commit d091f438e8851ab8d01b66628e06a104f5e5ec69.

* Fix types

* Fix typo

* Update CONTRIBUTING example
											
										
										
											2020-10-02 17:05:55 +00:00
+								        :param locals_: the local variables during rollout collection
-												Callbacks have access to locals (#115)

* callbacks have access to locals

* changeloc

* doc

* callbacks have access to locals

* changeloc

* doc

* Added update function for child callbacks

* Pre-Release 0.8.0 (#134)

* Fix double reset and improve typing coverage (#136)

* Fix double reset and improve typing coverage

* Revert minor edit

* Add doc about types

* Update child callbacks

* cleaned imports

* format

* import order

* Simplify tests and add comments

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
											
										
										
											2020-08-23 12:34:01 +00:00
+								        """
 								        if self.callback:
 								            self.callback.update_locals(locals_)
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
 								class StopTrainingOnRewardThreshold(BaseCallback):
 								    """
 								    Stop the training once a threshold in episodic reward
 								    has been reached (i.e. when the model is good enough).
-												Sync callbacks

											
										
										
											2020-03-12 11:34:25 +00:00
+								    It must be used with the ``EvalCallback``.
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
-												Cleanup docstring types (#169)

* Cleanup docstring types

* Update style

* Test with js hack

* Revert "Test with js hack"

This reverts commit d091f438e8851ab8d01b66628e06a104f5e5ec69.

* Fix types

* Fix typo

* Update CONTRIBUTING example
											
										
										
											2020-10-02 17:05:55 +00:00
+								    :param reward_threshold:  Minimum expected reward per episode
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								        to stop training.
-												Cleanup docstring types (#169)

* Cleanup docstring types

* Update style

* Test with js hack

* Revert "Test with js hack"

This reverts commit d091f438e8851ab8d01b66628e06a104f5e5ec69.

* Fix types

* Fix typo

* Update CONTRIBUTING example
											
										
										
											2020-10-02 17:05:55 +00:00
+								    :param verbose:
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								    """
-												Auto-formatting with black and isort (#97)

* Add auto formatting with black and isort

* Reformat code

* Ignore typing errors

* Add note about line length

* Add minimum version for isort

* Add commit-checks

* Update docker image

* Fixed lost import (during last merge)

* Fix opencv dependency
											
										
										
											2020-07-16 14:12:16 +00:00
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								    def __init__(self, reward_threshold: float, verbose: int = 0):
 								        super(StopTrainingOnRewardThreshold, self).__init__(verbose=verbose)
 								        self.reward_threshold = reward_threshold
 								    def _on_step(self) -> bool:
-												Auto-formatting with black and isort (#97)

* Add auto formatting with black and isort

* Reformat code

* Ignore typing errors

* Add note about line length

* Add minimum version for isort

* Add commit-checks

* Update docker image

* Fixed lost import (during last merge)

* Fix opencv dependency
											
										
										
											2020-07-16 14:12:16 +00:00
+								        assert self.parent is not None, "``StopTrainingOnMinimumReward`` callback must be used " "with an ``EvalCallback``"
-												Fix numpy warning and update migration guide (#307)


											
										
										
											2021-02-01 10:24:44 +00:00
+								        # Convert np.bool_ to bool, otherwise callback() is False won't work
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								        continue_training = bool(self.parent.best_mean_reward < self.reward_threshold)
 								        if self.verbose > 0 and not continue_training:
-												Auto-formatting with black and isort (#97)

* Add auto formatting with black and isort

* Reformat code

* Ignore typing errors

* Add note about line length

* Add minimum version for isort

* Add commit-checks

* Update docker image

* Fixed lost import (during last merge)

* Fix opencv dependency
											
										
										
											2020-07-16 14:12:16 +00:00
+								            print(
 								                f"Stopping training because the mean reward {self.parent.best_mean_reward:.2f} "
 								                f" is above the threshold {self.reward_threshold}"
 								            )
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								        return continue_training
 								class EveryNTimesteps(EventCallback):
 								    """
-												Sync callbacks

											
										
										
											2020-03-12 11:34:25 +00:00
+								    Trigger a callback every ``n_steps`` timesteps
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
-												Cleanup docstring types (#169)

* Cleanup docstring types

* Update style

* Test with js hack

* Revert "Test with js hack"

This reverts commit d091f438e8851ab8d01b66628e06a104f5e5ec69.

* Fix types

* Fix typo

* Update CONTRIBUTING example
											
										
										
											2020-10-02 17:05:55 +00:00
+								    :param n_steps: Number of timesteps between two trigger.
 								    :param callback: Callback that will be called
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								        when the event is triggered.
 								    """
-												Auto-formatting with black and isort (#97)

* Add auto formatting with black and isort

* Reformat code

* Ignore typing errors

* Add note about line length

* Add minimum version for isort

* Add commit-checks

* Update docker image

* Fixed lost import (during last merge)

* Fix opencv dependency
											
										
										
											2020-07-16 14:12:16 +00:00
-												Add callback support

											
										
										
											2020-01-27 13:32:31 +00:00
+								    def __init__(self, n_steps: int, callback: BaseCallback):
 								        super(EveryNTimesteps, self).__init__(callback)
 								        self.n_steps = n_steps
 								        self.last_time_trigger = 0
 								    def _on_step(self) -> bool:
 								        if (self.num_timesteps - self.last_time_trigger) >= self.n_steps:
 								            self.last_time_trigger = self.num_timesteps
 								            return self._on_event()
 								        return True
-												Add StopTrainingOnMaxEpisodes to callback collection (#147)

* Add StopTrainingOnMaxEpisodes class to pre-made callback collection

* Adjust instant when counters are incremented for both OnPolicy and OffPolicy algorithms

* Improv to StopTrainingOnMaxEpisodes including output, tests and doc

* Improv StopTrainingOnMaxEpisodes callback running _init_callback

* Update callbacks.py

* Update test_callbacks.py

* Fix style

* Update changelog.rst

* Fix test

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
Co-authored-by: Antonin Raffin <antonin.raffin@dlr.de>
											
										
										
											2020-08-28 09:36:33 +00:00
 								class StopTrainingOnMaxEpisodes(BaseCallback):
 								    """
 								    Stop the training once a maximum number of episodes are played.
 								    For multiple environments presumes that, the desired behavior is that the agent trains on each env for ``max_episodes``
 								    and in total for ``max_episodes * n_envs`` episodes.
-												Cleanup docstring types (#169)

* Cleanup docstring types

* Update style

* Test with js hack

* Revert "Test with js hack"

This reverts commit d091f438e8851ab8d01b66628e06a104f5e5ec69.

* Fix types

* Fix typo

* Update CONTRIBUTING example
											
										
										
											2020-10-02 17:05:55 +00:00
+								    :param max_episodes: Maximum number of episodes to stop training.
 								    :param verbose: Select whether to print information about when training ended by reaching ``max_episodes``
-												Add StopTrainingOnMaxEpisodes to callback collection (#147)

* Add StopTrainingOnMaxEpisodes class to pre-made callback collection

* Adjust instant when counters are incremented for both OnPolicy and OffPolicy algorithms

* Improv to StopTrainingOnMaxEpisodes including output, tests and doc

* Improv StopTrainingOnMaxEpisodes callback running _init_callback

* Update callbacks.py

* Update test_callbacks.py

* Fix style

* Update changelog.rst

* Fix test

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
Co-authored-by: Antonin Raffin <antonin.raffin@dlr.de>
											
										
										
											2020-08-28 09:36:33 +00:00
+								    """
 								    def __init__(self, max_episodes: int, verbose: int = 0):
 								        super(StopTrainingOnMaxEpisodes, self).__init__(verbose=verbose)
 								        self.max_episodes = max_episodes
 								        self._total_max_episodes = max_episodes
 								        self.n_episodes = 0
-												Improve typing coverage (#175)

* Improve typing coverage

* Even more types

* Fixes

* Update changelog

* Unified docstrings

* Improve error messages for unsupported spaces
											
										
										
											2020-10-07 08:51:49 +00:00
+								    def _init_callback(self) -> None:
-												Add StopTrainingOnMaxEpisodes to callback collection (#147)

* Add StopTrainingOnMaxEpisodes class to pre-made callback collection

* Adjust instant when counters are incremented for both OnPolicy and OffPolicy algorithms

* Improv to StopTrainingOnMaxEpisodes including output, tests and doc

* Improv StopTrainingOnMaxEpisodes callback running _init_callback

* Update callbacks.py

* Update test_callbacks.py

* Fix style

* Update changelog.rst

* Fix test

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
Co-authored-by: Antonin Raffin <antonin.raffin@dlr.de>
											
										
										
											2020-08-28 09:36:33 +00:00
+								        # At start set total max according to number of envirnments
 								        self._total_max_episodes = self.max_episodes * self.training_env.num_envs
 								    def _on_step(self) -> bool:
 								        # Checking for both 'done' and 'dones' keywords because:
 								        # Some models use keyword 'done' (e.g.,: SAC, TD3, DQN, DDPG)
 								        # While some models use keyword 'dones' (e.g.,: A2C, PPO)
 								        done_array = np.array(self.locals.get("done") if self.locals.get("done") is not None else self.locals.get("dones"))
 								        self.n_episodes += np.sum(done_array).item()
 								        continue_training = self.n_episodes < self._total_max_episodes
 								        if self.verbose > 0 and not continue_training:
 								            mean_episodes_per_env = self.n_episodes / self.training_env.num_envs
 								            mean_ep_str = (
-												Fix f-string in max episodes callback (#152)


											
										
										
											2020-08-29 18:04:19 +00:00
+								                f"with an average of {mean_episodes_per_env:.2f} episodes per env" if self.training_env.num_envs > 1 else ""
-												Add StopTrainingOnMaxEpisodes to callback collection (#147)

* Add StopTrainingOnMaxEpisodes class to pre-made callback collection

* Adjust instant when counters are incremented for both OnPolicy and OffPolicy algorithms

* Improv to StopTrainingOnMaxEpisodes including output, tests and doc

* Improv StopTrainingOnMaxEpisodes callback running _init_callback

* Update callbacks.py

* Update test_callbacks.py

* Fix style

* Update changelog.rst

* Fix test

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
Co-authored-by: Antonin Raffin <antonin.raffin@dlr.de>
											
										
										
											2020-08-28 09:36:33 +00:00
+								            )
 								            print(
 								                f"Stopping training with a total of {self.num_timesteps} steps because the "
 								                f"{self.locals.get('tb_log_name')} model reached max_episodes={self.max_episodes}, "
 								                f"by playing for {self.n_episodes} episodes "
 								                f"{mean_ep_str}"
 								            )
 								        return continue_training