From 233f346d53d5bf4ef432a3ab3d193f356fb7d7b9 Mon Sep 17 00:00:00 2001
From: Antonin Raffin <antonin.raffin@dlr.de>
Date: Fri, 6 Dec 2019 17:46:56 +0100
Subject: [PATCH] Update todos

---
 README.md                                | 3 ++-
 torchy_baselines/common/distributions.py | 9 ---------
 torchy_baselines/ppo/ppo.py              | 4 ++--
 3 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index bf14287..388570a 100644
--- a/README.md
+++ b/README.md
@@ -18,11 +18,11 @@ PyTorch version of [Stable Baselines](https://github.com/hill-a/stable-baselines
 ## Roadmap
 
 TODO:
-- save/load
 - better predict
 - complete logger
 - Refactor: buffer with numpy array instead of pytorch
 - Refactor: remove duplicated code for evaluation
+- double check the shape of log prob
 
 - plotting? -> zoo
 
@@ -30,6 +30,7 @@ Later:
 - get_parameters / set_parameters
 - SDE: use [affine transform](https://www.tensorflow.org/probability/api_docs/python/tfp/bijectors/Affine)
   to scale the noise after a tanh transform?
+- Use MultivariateNormal with full covariance matrix?
 - CNN policies + normalization
 - tensorboard support
 - DQN
diff --git a/torchy_baselines/common/distributions.py b/torchy_baselines/common/distributions.py
index 05a5588..97eaf6f 100644
--- a/torchy_baselines/common/distributions.py
+++ b/torchy_baselines/common/distributions.py
@@ -17,15 +17,6 @@ class Distribution(object):
         """
         raise NotImplementedError
 
-    # def kl_div(self, other):
-    #     """
-    #     Calculates the Kullback-Leibler divergence from the given probabilty distribution
-    #
-    #     :param other: ([float]) the distribution to compare with
-    #     :return: (float) the KL divergence of the two distributions
-    #     """
-    #     raise NotImplementedError
-
     def entropy(self):
         """
         Returns shannon's entropy of the probability
diff --git a/torchy_baselines/ppo/ppo.py b/torchy_baselines/ppo/ppo.py
index 6fa2875..113e050 100644
--- a/torchy_baselines/ppo/ppo.py
+++ b/torchy_baselines/ppo/ppo.py
@@ -311,8 +311,8 @@ class PPO(BaseRLModel):
     def get_opt_parameters(self):
         """
         Returns a dict of all the optimizers and their parameters
-        
-        :return: (dict) of optimizer names and their state_dict 
+
+        :return: (dict) of optimizer names and their state_dict
         """
         return {"opt": self.policy.optimizer.state_dict()}