From 233f346d53d5bf4ef432a3ab3d193f356fb7d7b9 Mon Sep 17 00:00:00 2001 From: Antonin Raffin Date: Fri, 6 Dec 2019 17:46:56 +0100 Subject: [PATCH] Update todos --- README.md | 3 ++- torchy_baselines/common/distributions.py | 9 --------- torchy_baselines/ppo/ppo.py | 4 ++-- 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index bf14287..388570a 100644 --- a/README.md +++ b/README.md @@ -18,11 +18,11 @@ PyTorch version of [Stable Baselines](https://github.com/hill-a/stable-baselines ## Roadmap TODO: -- save/load - better predict - complete logger - Refactor: buffer with numpy array instead of pytorch - Refactor: remove duplicated code for evaluation +- double check the shape of log prob - plotting? -> zoo @@ -30,6 +30,7 @@ Later: - get_parameters / set_parameters - SDE: use [affine transform](https://www.tensorflow.org/probability/api_docs/python/tfp/bijectors/Affine) to scale the noise after a tanh transform? +- Use MultivariateNormal with full covariance matrix? - CNN policies + normalization - tensorboard support - DQN diff --git a/torchy_baselines/common/distributions.py b/torchy_baselines/common/distributions.py index 05a5588..97eaf6f 100644 --- a/torchy_baselines/common/distributions.py +++ b/torchy_baselines/common/distributions.py @@ -17,15 +17,6 @@ class Distribution(object): """ raise NotImplementedError - # def kl_div(self, other): - # """ - # Calculates the Kullback-Leibler divergence from the given probabilty distribution - # - # :param other: ([float]) the distribution to compare with - # :return: (float) the KL divergence of the two distributions - # """ - # raise NotImplementedError - def entropy(self): """ Returns shannon's entropy of the probability diff --git a/torchy_baselines/ppo/ppo.py b/torchy_baselines/ppo/ppo.py index 6fa2875..113e050 100644 --- a/torchy_baselines/ppo/ppo.py +++ b/torchy_baselines/ppo/ppo.py @@ -311,8 +311,8 @@ class PPO(BaseRLModel): def get_opt_parameters(self): """ Returns a dict of all the optimizers and their parameters - - :return: (dict) of optimizer names and their state_dict + + :return: (dict) of optimizer names and their state_dict """ return {"opt": self.policy.optimizer.state_dict()}