Bug fix when randomly sampling actions

2026-07-11 17:48:55 +00:00 · 2019-10-07 16:36:48 +02:00 · 2019-10-07 16:36:48 +02:00 · 4d0c033bf2
commit 4d0c033bf2
parent 37ab9d10f1
2 changed files with 5 additions and 3 deletions
--- a/README.md
+++ b/README.md
@ -19,7 +19,6 @@ PyTorch version of [Stable Baselines](https://github.com/hill-a/stable-baselines
 TODO:
 - save/load
 - predict
- better rescale (min + action * range)
 - flexible mlp
 - logger
 - better monitor wrapper?
--- a/torchy_baselines/common/base_class.py
+++ b/torchy_baselines/common/base_class.py
@ -251,9 +251,12 @@ class BaseRLModel(object):
            while not done:
                # Select action randomly or according to policy
                if num_timesteps < learning_starts:
-                    action = [self.action_space.sample()]
+                    action = np.array([self.action_space.sample()])
                else:
-                    action = self.scale_action(self.predict(obs, deterministic=deterministic))
+                    action = self.predict(obs, deterministic=deterministic)
+
+                # Rescale the action from [low, high] to [-1, 1]
+                action = self.scale_action(action)

                # Add noise to the action (improve exploration)
                if action_noise is not None: