From d6ddee9366fe8fc2c8fc5997371301ff85aac36c Mon Sep 17 00:00:00 2001 From: Sidney Tio <35787241+sidney-tio@users.noreply.github.com> Date: Wed, 3 May 2023 00:02:36 +0800 Subject: [PATCH] Add evalcallback example (#1468) * Moved 'Monitoring Training' to subsubsection of 'Using callbacks' * Added EvalCallback example * Updated Changelogs * Edited the language * Moved subsection headers up one level * added make_vec_env into Evalcallback example * Added parameters to the top for readability * Added note on multiple training environments * Added more clarity to eval_freq note * Apply suggestions from code review --------- Co-authored-by: Antonin RAFFIN --- docs/guide/examples.rst | 47 +++++++++++++++++++++++++++++++++++++++-- docs/misc/changelog.rst | 1 + 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/docs/guide/examples.rst b/docs/guide/examples.rst index 5935f50..818f282 100644 --- a/docs/guide/examples.rst +++ b/docs/guide/examples.rst @@ -209,8 +209,8 @@ These dictionaries are randomly initialized on the creation of the environment a model.learn(total_timesteps=100_000) -Using Callback: Monitoring Training ------------------------------------ +Callbacks: Monitoring Training +------------------------------ .. note:: @@ -308,6 +308,49 @@ If your callback returns False, training is aborted early. plt.show() +Callbacks: Evaluate Agent Performance +------------------------------------- +To periodically evaluate an agent's performance on a separate test environment, use ``EvalCallback``. +You can control the evaluation frequency with ``eval_freq`` to monitor your agent's progress during training. + +.. code-block:: python + + import os + import gymnasium as gym + + from stable_baselines3 import SAC + from stable_baselines3.common.callbacks import EvalCallback + from stable-baselines3.common.env_util import make_vec_env + + env_id = "Pendulum-v1" + n_training_envs = 1 + n_eval_envs = 5 + + # Create log dir where evaluation results will be saved + eval_log_dir = "./eval_logs/" + os.makedirs(eval_log_dir, exist_ok=True) + + # Initialize a vectorized training environment with default parameters + train_env = make_vec_env(env_id, n_env=n_training_envs, seed=0) + + # Separate evaluation env, with different parameters passed via env_kwargs + # Eval environments can be vectorized to speed up evaluation. + eval_env = make_vec_env(env_id, n_envs=n_eval_envs, seed=0, + env_kwargs={'g':0.7}) + + # Create callback that evaluates agent for 5 episodes every 500 training environment steps. + # When using multiple training environments, agent will be evaluated every + # eval_freq calls to train_env.step(), thus it will be evaluated every + # (eval_freq * n_envs) training steps. See EvalCallback doc for more information. + eval_callback = EvalCallback(eval_env, best_model_save_path=eval_log_dir, + log_path=eval_log_dir, eval_freq=max(500 // n_training_envs, 1), + n_eval_episodes=5, deterministic=True, + render=False) + + model = SAC("MlpPolicy", train_env) + model.learn(5000, callback=eval_callback) + + Atari Games ----------- diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst index 9492bd7..c08a923 100644 --- a/docs/misc/changelog.rst +++ b/docs/misc/changelog.rst @@ -61,6 +61,7 @@ Documentation: - Upgraded tutorials to Gymnasium API - Make it more explicit when using ``VecEnv`` vs Gym env - Added UAV_Navigation_DRL_AirSim to the project page (@heleidsn) +- Added ``EvalCallback`` example (@sidney-tio) Release 1.8.0 (2023-04-07)