stable-baselines3/stable_baselines3/common/vec_env/vec_transpose.py
M. Ernestus 0c50d75ecb
TD3 Code review (#245)
* Removed unneeded overrides of feature_extractor and normalize_images in the TD3 Actor.

* Add learning rate schedule example (#248)

* Add learning rate schedule example

* Update docs/guide/examples.rst

Co-authored-by: Adam Gleave <adam@gleave.me>

* Address comments

Co-authored-by: Adam Gleave <adam@gleave.me>

* Add supported action spaces checks (#254)

* Add supported action spaces checks

* Address comment

* Use `pass` in an abstractmethod instead of deleting the arguments.

* Remove the "deterministic" keyword from the forward method of the TD3 Actor since it always is deterministic anyways.

* Rename _get_data to _get_data_to_reconstruct_model.

_get_data was too generic and could have meant anything.

* Remove the n_episodes_rollout parameter and allow passing tuples as train_freq instead.

* Fix docstring of `train_freq` parameter.

* Black fixes.

* Fix TD3 delayed update + rename `_get_data()`

* Fix TD3 test

* Normalize `train_freq` to a tuple in the constructor and turn the warning into an assert.

* Make one step the default train frequency.

* Black fixes.

* Change np.bool to bool.

* Use the tuple format to specify an amount of steps in terms of steps or episodes in the collect_collouts of the off policy algorithm.

* Use the tuple format to specify an amount of steps in terms of steps or episodes in the collect_collouts of HER.

* Use named tuple for train freq

* Rename train_freq to train_every and TrainFreq to ExperienceDuration. Also add some type annotations and documentation.

* Black fixes.

* Revert to train_freq

* Fix terminal observation issues

* Typo

* Fix action noise bug in HER

* Add assert when loading HER models

* Update version

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
Co-authored-by: Adam Gleave <adam@gleave.me>
2021-02-27 17:33:50 +01:00

65 lines
2.1 KiB
Python

import numpy as np
from gym import spaces
from stable_baselines3.common.preprocessing import is_image_space
from stable_baselines3.common.vec_env.base_vec_env import VecEnv, VecEnvStepReturn, VecEnvWrapper
class VecTransposeImage(VecEnvWrapper):
"""
Re-order channels, from HxWxC to CxHxW.
It is required for PyTorch convolution layers.
:param venv:
"""
def __init__(self, venv: VecEnv):
assert is_image_space(venv.observation_space), "The observation space must be an image"
observation_space = self.transpose_space(venv.observation_space)
super(VecTransposeImage, self).__init__(venv, observation_space=observation_space)
@staticmethod
def transpose_space(observation_space: spaces.Box) -> spaces.Box:
"""
Transpose an observation space (re-order channels).
:param observation_space:
:return:
"""
assert is_image_space(observation_space), "The observation space must be an image"
width, height, channels = observation_space.shape
new_shape = (channels, width, height)
return spaces.Box(low=0, high=255, shape=new_shape, dtype=observation_space.dtype)
@staticmethod
def transpose_image(image: np.ndarray) -> np.ndarray:
"""
Transpose an image or batch of images (re-order channels).
:param image:
:return:
"""
if len(image.shape) == 3:
return np.transpose(image, (2, 0, 1))
return np.transpose(image, (0, 3, 1, 2))
def step_wait(self) -> VecEnvStepReturn:
observations, rewards, dones, infos = self.venv.step_wait()
# Transpose the terminal observations
for idx, done in enumerate(dones):
if not done:
continue
infos[idx]["terminal_observation"] = self.transpose_image(infos[idx]["terminal_observation"])
return self.transpose_image(observations), rewards, dones, infos
def reset(self) -> np.ndarray:
"""
Reset all environments
"""
return self.transpose_image(self.venv.reset())
def close(self) -> None:
self.venv.close()