mirror of
https://github.com/saymrwulf/stable-baselines3.git
synced 2026-05-16 21:10:08 +00:00
* Removed unneeded overrides of feature_extractor and normalize_images in the TD3 Actor. * Add learning rate schedule example (#248) * Add learning rate schedule example * Update docs/guide/examples.rst Co-authored-by: Adam Gleave <adam@gleave.me> * Address comments Co-authored-by: Adam Gleave <adam@gleave.me> * Add supported action spaces checks (#254) * Add supported action spaces checks * Address comment * Use `pass` in an abstractmethod instead of deleting the arguments. * Remove the "deterministic" keyword from the forward method of the TD3 Actor since it always is deterministic anyways. * Rename _get_data to _get_data_to_reconstruct_model. _get_data was too generic and could have meant anything. * Remove the n_episodes_rollout parameter and allow passing tuples as train_freq instead. * Fix docstring of `train_freq` parameter. * Black fixes. * Fix TD3 delayed update + rename `_get_data()` * Fix TD3 test * Normalize `train_freq` to a tuple in the constructor and turn the warning into an assert. * Make one step the default train frequency. * Black fixes. * Change np.bool to bool. * Use the tuple format to specify an amount of steps in terms of steps or episodes in the collect_collouts of the off policy algorithm. * Use the tuple format to specify an amount of steps in terms of steps or episodes in the collect_collouts of HER. * Use named tuple for train freq * Rename train_freq to train_every and TrainFreq to ExperienceDuration. Also add some type annotations and documentation. * Black fixes. * Revert to train_freq * Fix terminal observation issues * Typo * Fix action noise bug in HER * Add assert when loading HER models * Update version Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org> Co-authored-by: Adam Gleave <adam@gleave.me>
65 lines
2.1 KiB
Python
65 lines
2.1 KiB
Python
import numpy as np
|
|
from gym import spaces
|
|
|
|
from stable_baselines3.common.preprocessing import is_image_space
|
|
from stable_baselines3.common.vec_env.base_vec_env import VecEnv, VecEnvStepReturn, VecEnvWrapper
|
|
|
|
|
|
class VecTransposeImage(VecEnvWrapper):
|
|
"""
|
|
Re-order channels, from HxWxC to CxHxW.
|
|
It is required for PyTorch convolution layers.
|
|
|
|
:param venv:
|
|
"""
|
|
|
|
def __init__(self, venv: VecEnv):
|
|
assert is_image_space(venv.observation_space), "The observation space must be an image"
|
|
|
|
observation_space = self.transpose_space(venv.observation_space)
|
|
super(VecTransposeImage, self).__init__(venv, observation_space=observation_space)
|
|
|
|
@staticmethod
|
|
def transpose_space(observation_space: spaces.Box) -> spaces.Box:
|
|
"""
|
|
Transpose an observation space (re-order channels).
|
|
|
|
:param observation_space:
|
|
:return:
|
|
"""
|
|
assert is_image_space(observation_space), "The observation space must be an image"
|
|
width, height, channels = observation_space.shape
|
|
new_shape = (channels, width, height)
|
|
return spaces.Box(low=0, high=255, shape=new_shape, dtype=observation_space.dtype)
|
|
|
|
@staticmethod
|
|
def transpose_image(image: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Transpose an image or batch of images (re-order channels).
|
|
|
|
:param image:
|
|
:return:
|
|
"""
|
|
if len(image.shape) == 3:
|
|
return np.transpose(image, (2, 0, 1))
|
|
return np.transpose(image, (0, 3, 1, 2))
|
|
|
|
def step_wait(self) -> VecEnvStepReturn:
|
|
observations, rewards, dones, infos = self.venv.step_wait()
|
|
|
|
# Transpose the terminal observations
|
|
for idx, done in enumerate(dones):
|
|
if not done:
|
|
continue
|
|
infos[idx]["terminal_observation"] = self.transpose_image(infos[idx]["terminal_observation"])
|
|
|
|
return self.transpose_image(observations), rewards, dones, infos
|
|
|
|
def reset(self) -> np.ndarray:
|
|
"""
|
|
Reset all environments
|
|
"""
|
|
return self.transpose_image(self.venv.reset())
|
|
|
|
def close(self) -> None:
|
|
self.venv.close()
|