mirror of
https://github.com/saymrwulf/stable-baselines3.git
synced 2026-07-04 04:07:27 +00:00
Update todos
This commit is contained in:
parent
6c423add8d
commit
233f346d53
3 changed files with 4 additions and 12 deletions
|
|
@ -18,11 +18,11 @@ PyTorch version of [Stable Baselines](https://github.com/hill-a/stable-baselines
|
|||
## Roadmap
|
||||
|
||||
TODO:
|
||||
- save/load
|
||||
- better predict
|
||||
- complete logger
|
||||
- Refactor: buffer with numpy array instead of pytorch
|
||||
- Refactor: remove duplicated code for evaluation
|
||||
- double check the shape of log prob
|
||||
|
||||
- plotting? -> zoo
|
||||
|
||||
|
|
@ -30,6 +30,7 @@ Later:
|
|||
- get_parameters / set_parameters
|
||||
- SDE: use [affine transform](https://www.tensorflow.org/probability/api_docs/python/tfp/bijectors/Affine)
|
||||
to scale the noise after a tanh transform?
|
||||
- Use MultivariateNormal with full covariance matrix?
|
||||
- CNN policies + normalization
|
||||
- tensorboard support
|
||||
- DQN
|
||||
|
|
|
|||
|
|
@ -17,15 +17,6 @@ class Distribution(object):
|
|||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
# def kl_div(self, other):
|
||||
# """
|
||||
# Calculates the Kullback-Leibler divergence from the given probabilty distribution
|
||||
#
|
||||
# :param other: ([float]) the distribution to compare with
|
||||
# :return: (float) the KL divergence of the two distributions
|
||||
# """
|
||||
# raise NotImplementedError
|
||||
|
||||
def entropy(self):
|
||||
"""
|
||||
Returns shannon's entropy of the probability
|
||||
|
|
|
|||
|
|
@ -311,8 +311,8 @@ class PPO(BaseRLModel):
|
|||
def get_opt_parameters(self):
|
||||
"""
|
||||
Returns a dict of all the optimizers and their parameters
|
||||
|
||||
:return: (dict) of optimizer names and their state_dict
|
||||
|
||||
:return: (dict) of optimizer names and their state_dict
|
||||
"""
|
||||
return {"opt": self.policy.optimizer.state_dict()}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue