mirror of
https://github.com/saymrwulf/stable-baselines3.git
synced 2026-06-29 03:31:08 +00:00
Add results plotter
This commit is contained in:
parent
7bafdb3a67
commit
9caea35a11
3 changed files with 129 additions and 0 deletions
|
|
@ -3,6 +3,8 @@ branch = False
|
|||
omit =
|
||||
tests/*
|
||||
setup.py
|
||||
# Require graphical interface
|
||||
torchy_baselines/common/results_plotter.py
|
||||
|
||||
[report]
|
||||
exclude_lines =
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ New Features:
|
|||
- Add methods for saving and loading replay buffer
|
||||
- Add `extend()` method to the buffers
|
||||
- Add `get_vec_normalize_env()` to `BaseRLModel` to retrieve `VecNormalize` wrapper when it exists
|
||||
- Add `¶results_plotter` from Stable Baselines
|
||||
|
||||
Bug Fixes:
|
||||
^^^^^^^^^^
|
||||
|
|
|
|||
126
torchy_baselines/common/results_plotter.py
Normal file
126
torchy_baselines/common/results_plotter.py
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
from typing import Tuple, Callable, List, Optional
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import matplotlib
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from torchy_baselines.common.monitor import load_results
|
||||
|
||||
# matplotlib.use('TkAgg') # Can change to 'Agg' for non-interactive mode
|
||||
plt.rcParams['svg.fonttype'] = 'none'
|
||||
|
||||
X_TIMESTEPS = 'timesteps'
|
||||
X_EPISODES = 'episodes'
|
||||
X_WALLTIME = 'walltime_hrs'
|
||||
POSSIBLE_X_AXES = [X_TIMESTEPS, X_EPISODES, X_WALLTIME]
|
||||
EPISODES_WINDOW = 100
|
||||
COLORS = ['blue', 'green', 'red', 'cyan', 'magenta', 'yellow', 'black', 'purple', 'pink',
|
||||
'brown', 'orange', 'teal', 'coral', 'lightblue', 'lime', 'lavender', 'turquoise',
|
||||
'darkgreen', 'tan', 'salmon', 'gold', 'lightpurple', 'darkred', 'darkblue']
|
||||
|
||||
|
||||
def rolling_window(array: np.ndarray, window: int) -> np.ndarray:
|
||||
"""
|
||||
Apply a rolling window to a np.ndarray
|
||||
|
||||
:param array: (np.ndarray) the input Array
|
||||
:param window: (int) length of the rolling window
|
||||
:return: (np.ndarray) rolling window on the input array
|
||||
"""
|
||||
shape = array.shape[:-1] + (array.shape[-1] - window + 1, window)
|
||||
strides = array.strides + (array.strides[-1],)
|
||||
return np.lib.stride_tricks.as_strided(array, shape=shape, strides=strides)
|
||||
|
||||
|
||||
def window_func(var_1: np.ndarray, var_2: np.ndarray,
|
||||
window: int, func: Callable) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Apply a function to the rolling window of 2 arrays
|
||||
|
||||
:param var_1: (np.ndarray) variable 1
|
||||
:param var_2: (np.ndarray) variable 2
|
||||
:param window: (int) length of the rolling window
|
||||
:param func: (numpy function) function to apply on the rolling window on variable 2 (such as np.mean)
|
||||
:return: (Tuple[np.ndarray, np.ndarray]) the rolling output with applied function
|
||||
"""
|
||||
var_2_window = rolling_window(var_2, window)
|
||||
function_on_var2 = func(var_2_window, axis=-1)
|
||||
return var_1[window - 1:], function_on_var2
|
||||
|
||||
|
||||
def ts2xy(timesteps: pd.DataFrame, x_axis: str) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Decompose a timesteps variable to x ans ys
|
||||
|
||||
:param timesteps: (pd.DataFrame) the input data
|
||||
:param x_axis: (str) the axis for the x and y output
|
||||
(can be X_TIMESTEPS='timesteps', X_EPISODES='episodes' or X_WALLTIME='walltime_hrs')
|
||||
:return: (Tuple[np.ndarray, np.ndarray]) the x and y output
|
||||
"""
|
||||
if x_axis == X_TIMESTEPS:
|
||||
x_var = np.cumsum(timesteps.l.values)
|
||||
y_var = timesteps.r.values
|
||||
elif x_axis == X_EPISODES:
|
||||
x_var = np.arange(len(timesteps))
|
||||
y_var = timesteps.r.values
|
||||
elif x_axis == X_WALLTIME:
|
||||
# Convert to hours
|
||||
x_var = timesteps.t.values / 3600.
|
||||
y_var = timesteps.r.values
|
||||
else:
|
||||
raise NotImplementedError
|
||||
return x_var, y_var
|
||||
|
||||
|
||||
def plot_curves(xy_list: List[Tuple[np.ndarray, np.ndarray]],
|
||||
x_axis: str, title: str, figsize: Tuple[int, int] = (8, 2)) -> None:
|
||||
"""
|
||||
plot the curves
|
||||
|
||||
:param xy_list: (List[Tuple[np.ndarray, np.ndarray]]) the x and y coordinates to plot
|
||||
:param x_axis: (str) the axis for the x and y output
|
||||
(can be X_TIMESTEPS='timesteps', X_EPISODES='episodes' or X_WALLTIME='walltime_hrs')
|
||||
:param title: (str) the title of the plot
|
||||
:param figsize: (Tuple[int, int]) Size of the figure (width, height)
|
||||
"""
|
||||
|
||||
plt.figure(figsize=figsize)
|
||||
max_x = max(xy[0][-1] for xy in xy_list)
|
||||
min_x = 0
|
||||
for (i, (x, y)) in enumerate(xy_list):
|
||||
color = COLORS[i]
|
||||
plt.scatter(x, y, s=2)
|
||||
# Do not plot the smoothed curve at all if the timeseries is shorter than window size.
|
||||
if x.shape[0] >= EPISODES_WINDOW:
|
||||
# Compute and plot rolling mean with window of size EPISODE_WINDOW
|
||||
x, y_mean = window_func(x, y, EPISODES_WINDOW, np.mean)
|
||||
plt.plot(x, y_mean, color=color)
|
||||
plt.xlim(min_x, max_x)
|
||||
plt.title(title)
|
||||
plt.xlabel(x_axis)
|
||||
plt.ylabel("Episode Rewards")
|
||||
plt.tight_layout()
|
||||
|
||||
|
||||
def plot_results(dirs: List[str], num_timesteps: Optional[int],
|
||||
x_axis: str, task_name: str, figsize: Tuple[int, int] = (8, 2)) -> None:
|
||||
"""
|
||||
plot the results
|
||||
|
||||
:param dirs: ([str]) the save location of the results to plot
|
||||
:param num_timesteps: (int or None) only plot the points below this value
|
||||
:param x_axis: (str) the axis for the x and y output
|
||||
(can be X_TIMESTEPS='timesteps', X_EPISODES='episodes' or X_WALLTIME='walltime_hrs')
|
||||
:param task_name: (str) the title of the task to plot
|
||||
:param figsize: (Tuple[int, int]) Size of the figure (width, height)
|
||||
"""
|
||||
|
||||
timesteps_list = []
|
||||
for folder in dirs:
|
||||
timesteps = load_results(folder)
|
||||
if num_timesteps is not None:
|
||||
timesteps = timesteps[timesteps.l.cumsum() <= num_timesteps]
|
||||
timesteps_list.append(timesteps)
|
||||
xy_list = [ts2xy(timesteps_item, x_axis) for timesteps_item in timesteps_list]
|
||||
plot_curves(xy_list, x_axis, task_name, figsize)
|
||||
Loading…
Reference in a new issue