diff --git a/torchy_baselines/sac/sac.py b/torchy_baselines/sac/sac.py index 56c741c..9d77cc2 100644 --- a/torchy_baselines/sac/sac.py +++ b/torchy_baselines/sac/sac.py @@ -251,7 +251,7 @@ class SAC(BaseRLModel): episode_num += n_episodes timesteps_since_eval += episode_timesteps - if self.num_timesteps > 0: + if self.num_timesteps > 0 and self.num_timesteps > self.learning_starts: if self.verbose > 1: print("Total T: {} Episode Num: {} Episode T: {} Reward: {}".format( self.num_timesteps, episode_num, episode_timesteps, episode_reward)) diff --git a/torchy_baselines/td3/td3.py b/torchy_baselines/td3/td3.py index 8fc00ae..7f34970 100644 --- a/torchy_baselines/td3/td3.py +++ b/torchy_baselines/td3/td3.py @@ -100,7 +100,7 @@ class TD3(BaseRLModel): observation = np.array(observation) with th.no_grad(): observation = th.FloatTensor(observation.reshape(1, -1)).to(self.device) - return self.actor(observation).cpu().data.numpy() + return self.actor(observation).cpu().numpy() def predict(self, observation, state=None, mask=None, deterministic=True): """ @@ -222,7 +222,7 @@ class TD3(BaseRLModel): self.num_timesteps += episode_timesteps timesteps_since_eval += episode_timesteps - if self.num_timesteps > 0: + if self.num_timesteps > 0 and self.num_timesteps > self.learning_starts: if self.verbose > 1: print("Total T: {} Episode Num: {} Episode T: {} Reward: {}".format( self.num_timesteps, episode_num, episode_timesteps, episode_reward))