mirror of
https://github.com/saymrwulf/stable-baselines3.git
synced 2026-05-14 20:58:03 +00:00
Adding FRASA to the projects page (#2059)
* Adding FRASA to the projects page * Updating changelog.rst * Ignore mypy errors for np arrays (python 3.11+) --------- Co-authored-by: Antonin Raffin <antonin.raffin@dlr.de>
This commit is contained in:
parent
9caa168686
commit
f432a6fcdc
8 changed files with 24 additions and 7 deletions
|
|
@ -39,6 +39,7 @@ Documentation:
|
|||
- Added Decisions and Dragons to resources. (@jmacglashan)
|
||||
- Updated PyBullet example, now compatible with Gymnasium
|
||||
- Added link to policies for ``policy_kwargs`` parameter (@kplers)
|
||||
- Added FRASA to the project page (@MarcDcls)
|
||||
|
||||
Release 2.4.0 (2024-11-18)
|
||||
--------------------------
|
||||
|
|
@ -1739,4 +1740,4 @@ And all the contributors:
|
|||
@DavyMorgan @luizapozzobon @Bonifatius94 @theSquaredError @harveybellini @DavyMorgan @FieteO @jonasreiher @npit @WeberSamuel @troiganto
|
||||
@lutogniew @lbergmann1 @lukashass @BertrandDecoster @pseudo-rnd-thoughts @stefanbschneider @kyle-he @PatrickHelm @corentinlger
|
||||
@marekm4 @stagoverflow @rushitnshah @markscsmith @NickLucche @cschindlbeck @peteole @jak3122 @will-maclean
|
||||
@brn-dev @jmacglashan @kplers
|
||||
@brn-dev @jmacglashan @kplers @MarcDcls
|
||||
|
|
|
|||
|
|
@ -250,3 +250,19 @@ It enables solving environments involving partial observability or locomotion (e
|
|||
| Authors: Corentin Léger, Gautier Hamon, Eleni Nisioti, Xavier Hinaut, Clément Moulin-Frier
|
||||
| Github: https://github.com/corentinlger/ER-MRL
|
||||
| Paper: https://arxiv.org/abs/2312.06695
|
||||
|
||||
|
||||
FRASA: Fall Recovery And Stand up agent
|
||||
---------------------------------------
|
||||
|
||||
A Deep Reinforcement Learning agent for a humanoid robot that learns to recover from falls and stand up.
|
||||
|
||||
The agent is trained using the MuJoCo physics engine. Real world experiments are conducted on the
|
||||
Sigmaban humanoid robot, a small-sized humanoid designed by the *Rhoban Team* to compete in the RoboCup Kidsize League.
|
||||
The results, detailled in the paper and the video, show that the agent is able to recover from
|
||||
various external disturbances and stand up in a few seconds.
|
||||
|
||||
Authors: Marc Duclusaud, Clément Gaspard, Grégoire Passault, Mélodie Daniel, Olivier Ly
|
||||
Github: https://github.com/Rhoban/frasa
|
||||
Paper: https://arxiv.org/abs/2410.08655
|
||||
Video: https://www.youtube.com/watch?v=NL65XW0O0mk
|
||||
|
|
@ -490,7 +490,7 @@ class EvalCallback(EventCallback):
|
|||
timesteps=self.evaluations_timesteps,
|
||||
results=self.evaluations_results,
|
||||
ep_lengths=self.evaluations_length,
|
||||
**kwargs,
|
||||
**kwargs, # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
mean_reward, std_reward = np.mean(episode_rewards), np.std(episode_rewards)
|
||||
|
|
|
|||
|
|
@ -103,7 +103,7 @@ class BitFlippingEnv(Env):
|
|||
# Convert to binary representation
|
||||
bit_vector = ((bit_vector[:, :] & (1 << np.arange(len(self.state)))) > 0).astype(int)
|
||||
elif self.image_obs_space:
|
||||
bit_vector = state.reshape(batch_size, -1)[:, : len(self.state)] / 255
|
||||
bit_vector = state.reshape(batch_size, -1)[:, : len(self.state)] / 255 # type: ignore[assignment]
|
||||
else:
|
||||
bit_vector = np.array(state).reshape(batch_size, -1)
|
||||
return bit_vector
|
||||
|
|
|
|||
|
|
@ -487,7 +487,7 @@ class OffPolicyAlgorithm(BaseAlgorithm):
|
|||
next_obs[i] = infos[i]["terminal_observation"]
|
||||
# VecNormalize normalizes the terminal observation
|
||||
if self._vec_normalize_env is not None:
|
||||
next_obs[i] = self._vec_normalize_env.unnormalize_obs(next_obs[i, :])
|
||||
next_obs[i] = self._vec_normalize_env.unnormalize_obs(next_obs[i, :]) # type: ignore[assignment]
|
||||
|
||||
replay_buffer.add(
|
||||
self._last_original_obs, # type: ignore[arg-type]
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ def tile_images(images_nhwc: Sequence[np.ndarray]) -> np.ndarray: # pragma: no
|
|||
# img_HhWwc
|
||||
out_image = out_image.transpose(0, 2, 1, 3, 4)
|
||||
# img_Hh_Ww_c
|
||||
out_image = out_image.reshape((new_height * height, new_width * width, n_channels))
|
||||
out_image = out_image.reshape((new_height * height, new_width * width, n_channels)) # type: ignore[assignment]
|
||||
return out_image
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ class DummyVecEnv(VecEnv):
|
|||
def step_wait(self) -> VecEnvStepReturn:
|
||||
# Avoid circular imports
|
||||
for env_idx in range(self.num_envs):
|
||||
obs, self.buf_rews[env_idx], terminated, truncated, self.buf_infos[env_idx] = self.envs[env_idx].step(
|
||||
obs, self.buf_rews[env_idx], terminated, truncated, self.buf_infos[env_idx] = self.envs[env_idx].step( # type: ignore[assignment]
|
||||
self.actions[env_idx]
|
||||
)
|
||||
# convert to SB3 VecEnv api
|
||||
|
|
|
|||
|
|
@ -157,7 +157,7 @@ class HerReplayBuffer(DictReplayBuffer):
|
|||
self.ep_start[self.pos] = self._current_ep_start.copy()
|
||||
|
||||
if self.copy_info_dict:
|
||||
self.infos[self.pos] = infos
|
||||
self.infos[self.pos] = infos # type: ignore[assignment]
|
||||
# Store the transition
|
||||
super().add(obs, next_obs, action, reward, done, infos)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue