mirror of
https://github.com/saymrwulf/stable-baselines3.git
synced 2026-05-14 20:58:03 +00:00
Add evalcallback example (#1468)
* Moved 'Monitoring Training' to subsubsection of 'Using callbacks' * Added EvalCallback example * Updated Changelogs * Edited the language * Moved subsection headers up one level * added make_vec_env into Evalcallback example * Added parameters to the top for readability * Added note on multiple training environments * Added more clarity to eval_freq note * Apply suggestions from code review --------- Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
This commit is contained in:
parent
4f9805eeb8
commit
d6ddee9366
2 changed files with 46 additions and 2 deletions
|
|
@ -209,8 +209,8 @@ These dictionaries are randomly initialized on the creation of the environment a
|
|||
model.learn(total_timesteps=100_000)
|
||||
|
||||
|
||||
Using Callback: Monitoring Training
|
||||
-----------------------------------
|
||||
Callbacks: Monitoring Training
|
||||
------------------------------
|
||||
|
||||
.. note::
|
||||
|
||||
|
|
@ -308,6 +308,49 @@ If your callback returns False, training is aborted early.
|
|||
plt.show()
|
||||
|
||||
|
||||
Callbacks: Evaluate Agent Performance
|
||||
-------------------------------------
|
||||
To periodically evaluate an agent's performance on a separate test environment, use ``EvalCallback``.
|
||||
You can control the evaluation frequency with ``eval_freq`` to monitor your agent's progress during training.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import os
|
||||
import gymnasium as gym
|
||||
|
||||
from stable_baselines3 import SAC
|
||||
from stable_baselines3.common.callbacks import EvalCallback
|
||||
from stable-baselines3.common.env_util import make_vec_env
|
||||
|
||||
env_id = "Pendulum-v1"
|
||||
n_training_envs = 1
|
||||
n_eval_envs = 5
|
||||
|
||||
# Create log dir where evaluation results will be saved
|
||||
eval_log_dir = "./eval_logs/"
|
||||
os.makedirs(eval_log_dir, exist_ok=True)
|
||||
|
||||
# Initialize a vectorized training environment with default parameters
|
||||
train_env = make_vec_env(env_id, n_env=n_training_envs, seed=0)
|
||||
|
||||
# Separate evaluation env, with different parameters passed via env_kwargs
|
||||
# Eval environments can be vectorized to speed up evaluation.
|
||||
eval_env = make_vec_env(env_id, n_envs=n_eval_envs, seed=0,
|
||||
env_kwargs={'g':0.7})
|
||||
|
||||
# Create callback that evaluates agent for 5 episodes every 500 training environment steps.
|
||||
# When using multiple training environments, agent will be evaluated every
|
||||
# eval_freq calls to train_env.step(), thus it will be evaluated every
|
||||
# (eval_freq * n_envs) training steps. See EvalCallback doc for more information.
|
||||
eval_callback = EvalCallback(eval_env, best_model_save_path=eval_log_dir,
|
||||
log_path=eval_log_dir, eval_freq=max(500 // n_training_envs, 1),
|
||||
n_eval_episodes=5, deterministic=True,
|
||||
render=False)
|
||||
|
||||
model = SAC("MlpPolicy", train_env)
|
||||
model.learn(5000, callback=eval_callback)
|
||||
|
||||
|
||||
Atari Games
|
||||
-----------
|
||||
|
||||
|
|
|
|||
|
|
@ -61,6 +61,7 @@ Documentation:
|
|||
- Upgraded tutorials to Gymnasium API
|
||||
- Make it more explicit when using ``VecEnv`` vs Gym env
|
||||
- Added UAV_Navigation_DRL_AirSim to the project page (@heleidsn)
|
||||
- Added ``EvalCallback`` example (@sidney-tio)
|
||||
|
||||
|
||||
Release 1.8.0 (2023-04-07)
|
||||
|
|
|
|||
Loading…
Reference in a new issue