Adding FRASA to the projects page (#2059)

* Adding FRASA to the projects page

* Updating changelog.rst

* Ignore mypy errors for np arrays (python 3.11+)

---------

Co-authored-by: Antonin Raffin <antonin.raffin@dlr.de>
This commit is contained in:
Marc Duclusaud 2024-12-17 14:53:07 +01:00 committed by GitHub
parent 9caa168686
commit f432a6fcdc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 24 additions and 7 deletions

View file

@ -39,6 +39,7 @@ Documentation:
- Added Decisions and Dragons to resources. (@jmacglashan)
- Updated PyBullet example, now compatible with Gymnasium
- Added link to policies for ``policy_kwargs`` parameter (@kplers)
- Added FRASA to the project page (@MarcDcls)
Release 2.4.0 (2024-11-18)
--------------------------
@ -1739,4 +1740,4 @@ And all the contributors:
@DavyMorgan @luizapozzobon @Bonifatius94 @theSquaredError @harveybellini @DavyMorgan @FieteO @jonasreiher @npit @WeberSamuel @troiganto
@lutogniew @lbergmann1 @lukashass @BertrandDecoster @pseudo-rnd-thoughts @stefanbschneider @kyle-he @PatrickHelm @corentinlger
@marekm4 @stagoverflow @rushitnshah @markscsmith @NickLucche @cschindlbeck @peteole @jak3122 @will-maclean
@brn-dev @jmacglashan @kplers
@brn-dev @jmacglashan @kplers @MarcDcls

View file

@ -250,3 +250,19 @@ It enables solving environments involving partial observability or locomotion (e
| Authors: Corentin Léger, Gautier Hamon, Eleni Nisioti, Xavier Hinaut, Clément Moulin-Frier
| Github: https://github.com/corentinlger/ER-MRL
| Paper: https://arxiv.org/abs/2312.06695
FRASA: Fall Recovery And Stand up agent
---------------------------------------
A Deep Reinforcement Learning agent for a humanoid robot that learns to recover from falls and stand up.
The agent is trained using the MuJoCo physics engine. Real world experiments are conducted on the
Sigmaban humanoid robot, a small-sized humanoid designed by the *Rhoban Team* to compete in the RoboCup Kidsize League.
The results, detailled in the paper and the video, show that the agent is able to recover from
various external disturbances and stand up in a few seconds.
Authors: Marc Duclusaud, Clément Gaspard, Grégoire Passault, Mélodie Daniel, Olivier Ly
Github: https://github.com/Rhoban/frasa
Paper: https://arxiv.org/abs/2410.08655
Video: https://www.youtube.com/watch?v=NL65XW0O0mk

View file

@ -490,7 +490,7 @@ class EvalCallback(EventCallback):
timesteps=self.evaluations_timesteps,
results=self.evaluations_results,
ep_lengths=self.evaluations_length,
**kwargs,
**kwargs, # type: ignore[arg-type]
)
mean_reward, std_reward = np.mean(episode_rewards), np.std(episode_rewards)

View file

@ -103,7 +103,7 @@ class BitFlippingEnv(Env):
# Convert to binary representation
bit_vector = ((bit_vector[:, :] & (1 << np.arange(len(self.state)))) > 0).astype(int)
elif self.image_obs_space:
bit_vector = state.reshape(batch_size, -1)[:, : len(self.state)] / 255
bit_vector = state.reshape(batch_size, -1)[:, : len(self.state)] / 255 # type: ignore[assignment]
else:
bit_vector = np.array(state).reshape(batch_size, -1)
return bit_vector

View file

@ -487,7 +487,7 @@ class OffPolicyAlgorithm(BaseAlgorithm):
next_obs[i] = infos[i]["terminal_observation"]
# VecNormalize normalizes the terminal observation
if self._vec_normalize_env is not None:
next_obs[i] = self._vec_normalize_env.unnormalize_obs(next_obs[i, :])
next_obs[i] = self._vec_normalize_env.unnormalize_obs(next_obs[i, :]) # type: ignore[assignment]
replay_buffer.add(
self._last_original_obs, # type: ignore[arg-type]

View file

@ -43,7 +43,7 @@ def tile_images(images_nhwc: Sequence[np.ndarray]) -> np.ndarray: # pragma: no
# img_HhWwc
out_image = out_image.transpose(0, 2, 1, 3, 4)
# img_Hh_Ww_c
out_image = out_image.reshape((new_height * height, new_width * width, n_channels))
out_image = out_image.reshape((new_height * height, new_width * width, n_channels)) # type: ignore[assignment]
return out_image

View file

@ -56,7 +56,7 @@ class DummyVecEnv(VecEnv):
def step_wait(self) -> VecEnvStepReturn:
# Avoid circular imports
for env_idx in range(self.num_envs):
obs, self.buf_rews[env_idx], terminated, truncated, self.buf_infos[env_idx] = self.envs[env_idx].step(
obs, self.buf_rews[env_idx], terminated, truncated, self.buf_infos[env_idx] = self.envs[env_idx].step( # type: ignore[assignment]
self.actions[env_idx]
)
# convert to SB3 VecEnv api

View file

@ -157,7 +157,7 @@ class HerReplayBuffer(DictReplayBuffer):
self.ep_start[self.pos] = self._current_ep_start.copy()
if self.copy_info_dict:
self.infos[self.pos] = infos
self.infos[self.pos] = infos # type: ignore[assignment]
# Store the transition
super().add(obs, next_obs, action, reward, done, infos)