Bug fix when randomly sampling actions

This commit is contained in:
Antonin Raffin 2019-10-07 16:36:48 +02:00
parent 37ab9d10f1
commit 4d0c033bf2
2 changed files with 5 additions and 3 deletions

View file

@ -19,7 +19,6 @@ PyTorch version of [Stable Baselines](https://github.com/hill-a/stable-baselines
TODO:
- save/load
- predict
- better rescale (min + action * range)
- flexible mlp
- logger
- better monitor wrapper?

View file

@ -251,9 +251,12 @@ class BaseRLModel(object):
while not done:
# Select action randomly or according to policy
if num_timesteps < learning_starts:
action = [self.action_space.sample()]
action = np.array([self.action_space.sample()])
else:
action = self.scale_action(self.predict(obs, deterministic=deterministic))
action = self.predict(obs, deterministic=deterministic)
# Rescale the action from [low, high] to [-1, 1]
action = self.scale_action(action)
# Add noise to the action (improve exploration)
if action_noise is not None: