mirror of
https://github.com/saymrwulf/stable-baselines3.git
synced 2026-05-26 22:45:15 +00:00
Bug fix when randomly sampling actions
This commit is contained in:
parent
37ab9d10f1
commit
4d0c033bf2
2 changed files with 5 additions and 3 deletions
|
|
@ -19,7 +19,6 @@ PyTorch version of [Stable Baselines](https://github.com/hill-a/stable-baselines
|
|||
TODO:
|
||||
- save/load
|
||||
- predict
|
||||
- better rescale (min + action * range)
|
||||
- flexible mlp
|
||||
- logger
|
||||
- better monitor wrapper?
|
||||
|
|
|
|||
|
|
@ -251,9 +251,12 @@ class BaseRLModel(object):
|
|||
while not done:
|
||||
# Select action randomly or according to policy
|
||||
if num_timesteps < learning_starts:
|
||||
action = [self.action_space.sample()]
|
||||
action = np.array([self.action_space.sample()])
|
||||
else:
|
||||
action = self.scale_action(self.predict(obs, deterministic=deterministic))
|
||||
action = self.predict(obs, deterministic=deterministic)
|
||||
|
||||
# Rescale the action from [low, high] to [-1, 1]
|
||||
action = self.scale_action(action)
|
||||
|
||||
# Add noise to the action (improve exploration)
|
||||
if action_noise is not None:
|
||||
|
|
|
|||
Loading…
Reference in a new issue