stable-baselines3/tests/test_run.py

30 lines
1.3 KiB
Python
Raw Normal View History

2019-10-07 14:26:03 +00:00
import numpy as np
2020-03-12 10:12:10 +00:00
import pytest
2019-10-07 14:26:03 +00:00
2020-05-05 13:02:35 +00:00
from stable_baselines3 import A2C, PPO, SAC, TD3
from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise
2019-10-07 14:26:03 +00:00
2020-04-23 13:18:21 +00:00
normal_action_noise = NormalActionNoise(np.zeros(1), 0.1 * np.ones(1))
2019-09-18 13:35:17 +00:00
2019-09-21 15:17:09 +00:00
2020-04-23 13:18:21 +00:00
@pytest.mark.parametrize('action_noise', [normal_action_noise, OrnsteinUhlenbeckActionNoise(np.zeros(1), 0.1 * np.ones(1))])
2020-01-27 13:32:31 +00:00
def test_td3(action_noise):
model = TD3('MlpPolicy', 'Pendulum-v0', policy_kwargs=dict(net_arch=[64, 64]),
2019-10-07 14:26:03 +00:00
learning_starts=100, verbose=1, create_eval_env=True, action_noise=action_noise)
2019-09-24 12:15:12 +00:00
model.learn(total_timesteps=1000, eval_freq=500)
2019-09-18 20:12:32 +00:00
2019-09-21 15:17:09 +00:00
2019-10-25 08:59:15 +00:00
@pytest.mark.parametrize("model_class", [A2C, PPO])
@pytest.mark.parametrize("env_id", ['CartPole-v1', 'Pendulum-v0'])
2019-10-25 08:59:15 +00:00
def test_onpolicy(model_class, env_id):
2020-03-12 11:34:25 +00:00
model = model_class('MlpPolicy', env_id, seed=0, policy_kwargs=dict(net_arch=[16]), verbose=1, create_eval_env=True)
2019-09-18 13:35:17 +00:00
model.learn(total_timesteps=1000, eval_freq=500)
2019-09-24 12:15:12 +00:00
@pytest.mark.parametrize("ent_coef", ['auto', 0.01])
def test_sac(ent_coef):
2019-09-24 12:15:12 +00:00
model = SAC('MlpPolicy', 'Pendulum-v0', policy_kwargs=dict(net_arch=[64, 64]),
learning_starts=100, verbose=1, create_eval_env=True, ent_coef=ent_coef,
2019-10-07 14:26:03 +00:00
action_noise=NormalActionNoise(np.zeros(1), np.zeros(1)))
2019-09-24 12:15:12 +00:00
model.learn(total_timesteps=1000, eval_freq=500)