diff --git a/.coveragerc b/.coveragerc index a8fc2af..511f20d 100644 --- a/.coveragerc +++ b/.coveragerc @@ -3,6 +3,8 @@ branch = False omit = tests/* setup.py + # Require graphical interface + torchy_baselines/common/results_plotter.py [report] exclude_lines = diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst index 0bb3910..b083e39 100644 --- a/docs/misc/changelog.rst +++ b/docs/misc/changelog.rst @@ -20,6 +20,7 @@ New Features: - Add methods for saving and loading replay buffer - Add `extend()` method to the buffers - Add `get_vec_normalize_env()` to `BaseRLModel` to retrieve `VecNormalize` wrapper when it exists +- Add `¶results_plotter` from Stable Baselines Bug Fixes: ^^^^^^^^^^ diff --git a/torchy_baselines/common/results_plotter.py b/torchy_baselines/common/results_plotter.py new file mode 100644 index 0000000..a5464da --- /dev/null +++ b/torchy_baselines/common/results_plotter.py @@ -0,0 +1,126 @@ +from typing import Tuple, Callable, List, Optional + +import numpy as np +import pandas as pd +import matplotlib +import matplotlib.pyplot as plt + +from torchy_baselines.common.monitor import load_results + +# matplotlib.use('TkAgg') # Can change to 'Agg' for non-interactive mode +plt.rcParams['svg.fonttype'] = 'none' + +X_TIMESTEPS = 'timesteps' +X_EPISODES = 'episodes' +X_WALLTIME = 'walltime_hrs' +POSSIBLE_X_AXES = [X_TIMESTEPS, X_EPISODES, X_WALLTIME] +EPISODES_WINDOW = 100 +COLORS = ['blue', 'green', 'red', 'cyan', 'magenta', 'yellow', 'black', 'purple', 'pink', + 'brown', 'orange', 'teal', 'coral', 'lightblue', 'lime', 'lavender', 'turquoise', + 'darkgreen', 'tan', 'salmon', 'gold', 'lightpurple', 'darkred', 'darkblue'] + + +def rolling_window(array: np.ndarray, window: int) -> np.ndarray: + """ + Apply a rolling window to a np.ndarray + + :param array: (np.ndarray) the input Array + :param window: (int) length of the rolling window + :return: (np.ndarray) rolling window on the input array + """ + shape = array.shape[:-1] + (array.shape[-1] - window + 1, window) + strides = array.strides + (array.strides[-1],) + return np.lib.stride_tricks.as_strided(array, shape=shape, strides=strides) + + +def window_func(var_1: np.ndarray, var_2: np.ndarray, + window: int, func: Callable) -> Tuple[np.ndarray, np.ndarray]: + """ + Apply a function to the rolling window of 2 arrays + + :param var_1: (np.ndarray) variable 1 + :param var_2: (np.ndarray) variable 2 + :param window: (int) length of the rolling window + :param func: (numpy function) function to apply on the rolling window on variable 2 (such as np.mean) + :return: (Tuple[np.ndarray, np.ndarray]) the rolling output with applied function + """ + var_2_window = rolling_window(var_2, window) + function_on_var2 = func(var_2_window, axis=-1) + return var_1[window - 1:], function_on_var2 + + +def ts2xy(timesteps: pd.DataFrame, x_axis: str) -> Tuple[np.ndarray, np.ndarray]: + """ + Decompose a timesteps variable to x ans ys + + :param timesteps: (pd.DataFrame) the input data + :param x_axis: (str) the axis for the x and y output + (can be X_TIMESTEPS='timesteps', X_EPISODES='episodes' or X_WALLTIME='walltime_hrs') + :return: (Tuple[np.ndarray, np.ndarray]) the x and y output + """ + if x_axis == X_TIMESTEPS: + x_var = np.cumsum(timesteps.l.values) + y_var = timesteps.r.values + elif x_axis == X_EPISODES: + x_var = np.arange(len(timesteps)) + y_var = timesteps.r.values + elif x_axis == X_WALLTIME: + # Convert to hours + x_var = timesteps.t.values / 3600. + y_var = timesteps.r.values + else: + raise NotImplementedError + return x_var, y_var + + +def plot_curves(xy_list: List[Tuple[np.ndarray, np.ndarray]], + x_axis: str, title: str, figsize: Tuple[int, int] = (8, 2)) -> None: + """ + plot the curves + + :param xy_list: (List[Tuple[np.ndarray, np.ndarray]]) the x and y coordinates to plot + :param x_axis: (str) the axis for the x and y output + (can be X_TIMESTEPS='timesteps', X_EPISODES='episodes' or X_WALLTIME='walltime_hrs') + :param title: (str) the title of the plot + :param figsize: (Tuple[int, int]) Size of the figure (width, height) + """ + + plt.figure(figsize=figsize) + max_x = max(xy[0][-1] for xy in xy_list) + min_x = 0 + for (i, (x, y)) in enumerate(xy_list): + color = COLORS[i] + plt.scatter(x, y, s=2) + # Do not plot the smoothed curve at all if the timeseries is shorter than window size. + if x.shape[0] >= EPISODES_WINDOW: + # Compute and plot rolling mean with window of size EPISODE_WINDOW + x, y_mean = window_func(x, y, EPISODES_WINDOW, np.mean) + plt.plot(x, y_mean, color=color) + plt.xlim(min_x, max_x) + plt.title(title) + plt.xlabel(x_axis) + plt.ylabel("Episode Rewards") + plt.tight_layout() + + +def plot_results(dirs: List[str], num_timesteps: Optional[int], + x_axis: str, task_name: str, figsize: Tuple[int, int] = (8, 2)) -> None: + """ + plot the results + + :param dirs: ([str]) the save location of the results to plot + :param num_timesteps: (int or None) only plot the points below this value + :param x_axis: (str) the axis for the x and y output + (can be X_TIMESTEPS='timesteps', X_EPISODES='episodes' or X_WALLTIME='walltime_hrs') + :param task_name: (str) the title of the task to plot + :param figsize: (Tuple[int, int]) Size of the figure (width, height) + """ + + timesteps_list = [] + for folder in dirs: + timesteps = load_results(folder) + if num_timesteps is not None: + timesteps = timesteps[timesteps.l.cumsum() <= num_timesteps] + timesteps_list.append(timesteps) + xy_list = [ts2xy(timesteps_item, x_axis) for timesteps_item in timesteps_list] + plot_curves(xy_list, x_axis, task_name, figsize)