diff --git a/README.md b/README.md index bf14287..388570a 100644 --- a/README.md +++ b/README.md @@ -18,11 +18,11 @@ PyTorch version of [Stable Baselines](https://github.com/hill-a/stable-baselines ## Roadmap TODO: -- save/load - better predict - complete logger - Refactor: buffer with numpy array instead of pytorch - Refactor: remove duplicated code for evaluation +- double check the shape of log prob - plotting? -> zoo @@ -30,6 +30,7 @@ Later: - get_parameters / set_parameters - SDE: use [affine transform](https://www.tensorflow.org/probability/api_docs/python/tfp/bijectors/Affine) to scale the noise after a tanh transform? +- Use MultivariateNormal with full covariance matrix? - CNN policies + normalization - tensorboard support - DQN diff --git a/torchy_baselines/common/distributions.py b/torchy_baselines/common/distributions.py index 05a5588..97eaf6f 100644 --- a/torchy_baselines/common/distributions.py +++ b/torchy_baselines/common/distributions.py @@ -17,15 +17,6 @@ class Distribution(object): """ raise NotImplementedError - # def kl_div(self, other): - # """ - # Calculates the Kullback-Leibler divergence from the given probabilty distribution - # - # :param other: ([float]) the distribution to compare with - # :return: (float) the KL divergence of the two distributions - # """ - # raise NotImplementedError - def entropy(self): """ Returns shannon's entropy of the probability diff --git a/torchy_baselines/ppo/ppo.py b/torchy_baselines/ppo/ppo.py index 6fa2875..113e050 100644 --- a/torchy_baselines/ppo/ppo.py +++ b/torchy_baselines/ppo/ppo.py @@ -311,8 +311,8 @@ class PPO(BaseRLModel): def get_opt_parameters(self): """ Returns a dict of all the optimizers and their parameters - - :return: (dict) of optimizer names and their state_dict + + :return: (dict) of optimizer names and their state_dict """ return {"opt": self.policy.optimizer.state_dict()}