Fix learning starts

This commit is contained in:
Antonin RAFFIN 2019-10-01 21:56:37 +02:00
parent 440166fe26
commit 12f854e1aa
2 changed files with 3 additions and 3 deletions

View file

@ -251,7 +251,7 @@ class SAC(BaseRLModel):
episode_num += n_episodes
timesteps_since_eval += episode_timesteps
if self.num_timesteps > 0:
if self.num_timesteps > 0 and self.num_timesteps > self.learning_starts:
if self.verbose > 1:
print("Total T: {} Episode Num: {} Episode T: {} Reward: {}".format(
self.num_timesteps, episode_num, episode_timesteps, episode_reward))

View file

@ -100,7 +100,7 @@ class TD3(BaseRLModel):
observation = np.array(observation)
with th.no_grad():
observation = th.FloatTensor(observation.reshape(1, -1)).to(self.device)
return self.actor(observation).cpu().data.numpy()
return self.actor(observation).cpu().numpy()
def predict(self, observation, state=None, mask=None, deterministic=True):
"""
@ -222,7 +222,7 @@ class TD3(BaseRLModel):
self.num_timesteps += episode_timesteps
timesteps_since_eval += episode_timesteps
if self.num_timesteps > 0:
if self.num_timesteps > 0 and self.num_timesteps > self.learning_starts:
if self.verbose > 1:
print("Total T: {} Episode Num: {} Episode T: {} Reward: {}".format(
self.num_timesteps, episode_num, episode_timesteps, episode_reward))