2018-12-20 23:33:23 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
2017-08-10 18:14:23 +00:00
|
|
|
# Copyright (c) 2017-present, Facebook, Inc.
|
|
|
|
|
# All rights reserved.
|
|
|
|
|
#
|
|
|
|
|
# This source code is licensed under the BSD-style license found in the
|
|
|
|
|
# LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
|
# of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
|
|
2018-12-20 23:33:23 +00:00
|
|
|
from __future__ import absolute_import, division, print_function
|
2017-08-10 18:14:23 +00:00
|
|
|
|
2018-12-20 23:33:23 +00:00
|
|
|
import logging
|
2018-05-03 18:42:10 +00:00
|
|
|
from copy import deepcopy
|
|
|
|
|
from functools import reduce
|
2017-08-10 18:14:23 +00:00
|
|
|
|
|
|
|
|
import numpy as np
|
|
|
|
|
import pandas as pd
|
2018-05-03 18:42:10 +00:00
|
|
|
|
2018-12-04 00:29:07 +00:00
|
|
|
logger = logging.getLogger('fbprophet')
|
2017-08-10 18:14:23 +00:00
|
|
|
|
|
|
|
|
|
2018-05-30 19:09:54 +00:00
|
|
|
def generate_cutoffs(df, horizon, initial, period):
|
2017-08-10 18:14:23 +00:00
|
|
|
"""Generate cutoff dates
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
2018-05-30 19:09:54 +00:00
|
|
|
df: pd.DataFrame with historical data.
|
|
|
|
|
horizon: pd.Timedelta forecast horizon.
|
|
|
|
|
initial: pd.Timedelta window of the initial forecast period.
|
|
|
|
|
period: pd.Timedelta simulated forecasts are done with this period.
|
2017-08-10 18:14:23 +00:00
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
|
|
|
|
list of pd.Timestamp
|
|
|
|
|
"""
|
|
|
|
|
# Last cutoff is 'latest date in data - horizon' date
|
|
|
|
|
cutoff = df['ds'].max() - horizon
|
2017-08-26 21:31:33 +00:00
|
|
|
if cutoff < df['ds'].min():
|
|
|
|
|
raise ValueError('Less data than horizon.')
|
2017-08-10 18:14:23 +00:00
|
|
|
result = [cutoff]
|
2018-05-30 19:09:54 +00:00
|
|
|
while result[-1] >= min(df['ds']) + initial:
|
2017-08-10 18:14:23 +00:00
|
|
|
cutoff -= period
|
|
|
|
|
# If data does not exist in data range (cutoff, cutoff + horizon]
|
|
|
|
|
if not (((df['ds'] > cutoff) & (df['ds'] <= cutoff + horizon)).any()):
|
2017-09-02 17:53:38 +00:00
|
|
|
# Next cutoff point is 'last date before cutoff in data - horizon'
|
2019-05-03 15:40:20 +00:00
|
|
|
if cutoff > df['ds'].min():
|
|
|
|
|
closest_date = df[df['ds'] <= cutoff].max()['ds']
|
|
|
|
|
cutoff = closest_date - horizon
|
|
|
|
|
# else no data left, leave cutoff as is, it will be dropped.
|
2017-08-10 18:14:23 +00:00
|
|
|
result.append(cutoff)
|
2018-05-30 19:09:54 +00:00
|
|
|
result = result[:-1]
|
|
|
|
|
if len(result) == 0:
|
|
|
|
|
raise ValueError(
|
|
|
|
|
'Less data than horizon after initial window. '
|
|
|
|
|
'Make horizon or initial shorter.'
|
|
|
|
|
)
|
|
|
|
|
logger.info('Making {} forecasts with cutoffs between {} and {}'.format(
|
|
|
|
|
len(result), result[-1], result[0]
|
|
|
|
|
))
|
2017-08-10 18:14:23 +00:00
|
|
|
return reversed(result)
|
|
|
|
|
|
|
|
|
|
|
2018-05-30 19:09:54 +00:00
|
|
|
def cross_validation(model, horizon, period=None, initial=None):
|
|
|
|
|
"""Cross-Validation for time series.
|
2017-09-02 17:53:38 +00:00
|
|
|
|
2018-05-30 19:09:54 +00:00
|
|
|
Computes forecasts from historical cutoff points. Beginning from
|
|
|
|
|
(end - horizon), works backwards making cutoffs with a spacing of period
|
|
|
|
|
until initial is reached.
|
|
|
|
|
|
|
|
|
|
When period is equal to the time interval of the data, this is the
|
|
|
|
|
technique described in https://robjhyndman.com/hyndsight/tscv/ .
|
2017-08-10 18:14:23 +00:00
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
2018-05-30 19:09:54 +00:00
|
|
|
model: Prophet class object. Fitted Prophet model
|
2017-09-02 17:53:38 +00:00
|
|
|
horizon: string with pd.Timedelta compatible style, e.g., '5 days',
|
|
|
|
|
'3 hours', '10 seconds'.
|
2018-05-30 19:09:54 +00:00
|
|
|
period: string with pd.Timedelta compatible style. Simulated forecast will
|
|
|
|
|
be done at every this period. If not provided, 0.5 * horizon is used.
|
|
|
|
|
initial: string with pd.Timedelta compatible style. The first training
|
|
|
|
|
period will begin here. If not provided, 3 * horizon is used.
|
2017-08-10 18:14:23 +00:00
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
|
|
|
|
A pd.DataFrame with the forecast, actual value and cutoff.
|
|
|
|
|
"""
|
|
|
|
|
df = model.history.copy().reset_index(drop=True)
|
|
|
|
|
horizon = pd.Timedelta(horizon)
|
2019-05-03 15:40:20 +00:00
|
|
|
# Set period
|
2017-08-10 18:14:23 +00:00
|
|
|
period = 0.5 * horizon if period is None else pd.Timedelta(period)
|
2019-05-03 15:40:20 +00:00
|
|
|
# Identify largest seasonality period
|
|
|
|
|
period_max = 0.
|
|
|
|
|
for s in model.seasonalities.values():
|
|
|
|
|
period_max = max(period_max, s['period'])
|
|
|
|
|
seasonality_dt = pd.Timedelta(str(period_max) + ' days')
|
|
|
|
|
# Set initial
|
|
|
|
|
if initial is None:
|
|
|
|
|
initial = max(3 * horizon, seasonality_dt)
|
|
|
|
|
else:
|
|
|
|
|
initial = pd.Timedelta(initial)
|
|
|
|
|
if initial < seasonality_dt:
|
|
|
|
|
msg = 'Seasonality has period of {} days '.format(period_max)
|
|
|
|
|
msg += 'which is larger than initial window. '
|
|
|
|
|
msg += 'Consider increasing initial.'
|
|
|
|
|
logger.warning(msg)
|
2018-05-30 19:09:54 +00:00
|
|
|
|
|
|
|
|
cutoffs = generate_cutoffs(df, horizon, initial, period)
|
2017-08-10 18:14:23 +00:00
|
|
|
predicts = []
|
|
|
|
|
for cutoff in cutoffs:
|
|
|
|
|
# Generate new object with copying fitting options
|
2018-05-03 18:42:10 +00:00
|
|
|
m = prophet_copy(model, cutoff)
|
2017-08-10 18:14:23 +00:00
|
|
|
# Train model
|
2018-05-30 19:09:54 +00:00
|
|
|
history_c = df[df['ds'] <= cutoff]
|
|
|
|
|
if history_c.shape[0] < 2:
|
|
|
|
|
raise Exception(
|
|
|
|
|
'Less than two datapoints before cutoff. '
|
|
|
|
|
'Increase initial window.'
|
|
|
|
|
)
|
|
|
|
|
m.fit(history_c)
|
2017-08-10 18:14:23 +00:00
|
|
|
# Calculate yhat
|
|
|
|
|
index_predicted = (df['ds'] > cutoff) & (df['ds'] <= cutoff + horizon)
|
2017-12-23 00:30:18 +00:00
|
|
|
# Get the columns for the future dataframe
|
2017-08-28 16:06:00 +00:00
|
|
|
columns = ['ds']
|
|
|
|
|
if m.growth == 'logistic':
|
|
|
|
|
columns.append('cap')
|
|
|
|
|
if m.logistic_floor:
|
|
|
|
|
columns.append('floor')
|
2017-12-23 00:30:18 +00:00
|
|
|
columns.extend(m.extra_regressors.keys())
|
2019-03-18 17:04:35 +00:00
|
|
|
columns.extend([
|
|
|
|
|
props['condition_name']
|
|
|
|
|
for props in m.seasonalities.values()
|
|
|
|
|
if props['condition_name'] is not None])
|
2017-08-10 18:14:23 +00:00
|
|
|
yhat = m.predict(df[index_predicted][columns])
|
|
|
|
|
# Merge yhat(predicts), y(df, original data) and cutoff
|
|
|
|
|
predicts.append(pd.concat([
|
|
|
|
|
yhat[['ds', 'yhat', 'yhat_lower', 'yhat_upper']],
|
|
|
|
|
df[index_predicted][['y']].reset_index(drop=True),
|
|
|
|
|
pd.DataFrame({'cutoff': [cutoff] * len(yhat)})
|
|
|
|
|
], axis=1))
|
|
|
|
|
|
|
|
|
|
# Combine all predicted pd.DataFrame into one pd.DataFrame
|
|
|
|
|
return reduce(lambda x, y: x.append(y), predicts).reset_index(drop=True)
|
|
|
|
|
|
2018-05-03 18:42:10 +00:00
|
|
|
def prophet_copy(m, cutoff=None):
|
|
|
|
|
"""Copy Prophet object
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
m: Prophet model.
|
|
|
|
|
cutoff: pd.Timestamp or None, default None.
|
|
|
|
|
cuttoff Timestamp for changepoints member variable.
|
|
|
|
|
changepoints are only retained if 'changepoints <= cutoff'
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
|
|
|
|
Prophet class object with the same parameter with model variable
|
|
|
|
|
"""
|
|
|
|
|
if m.history is None:
|
|
|
|
|
raise Exception('This is for copying a fitted Prophet object.')
|
|
|
|
|
|
|
|
|
|
if m.specified_changepoints:
|
|
|
|
|
changepoints = m.changepoints
|
|
|
|
|
if cutoff is not None:
|
|
|
|
|
# Filter change points '<= cutoff'
|
|
|
|
|
changepoints = changepoints[changepoints <= cutoff]
|
|
|
|
|
else:
|
|
|
|
|
changepoints = None
|
|
|
|
|
|
|
|
|
|
# Auto seasonalities are set to False because they are already set in
|
|
|
|
|
# m.seasonalities.
|
|
|
|
|
m2 = m.__class__(
|
|
|
|
|
growth=m.growth,
|
|
|
|
|
n_changepoints=m.n_changepoints,
|
2018-05-25 23:45:24 +00:00
|
|
|
changepoint_range=m.changepoint_range,
|
2018-05-03 18:42:10 +00:00
|
|
|
changepoints=changepoints,
|
|
|
|
|
yearly_seasonality=False,
|
|
|
|
|
weekly_seasonality=False,
|
|
|
|
|
daily_seasonality=False,
|
|
|
|
|
holidays=m.holidays,
|
2018-05-09 19:25:29 +00:00
|
|
|
seasonality_mode=m.seasonality_mode,
|
2018-05-03 18:42:10 +00:00
|
|
|
seasonality_prior_scale=m.seasonality_prior_scale,
|
|
|
|
|
changepoint_prior_scale=m.changepoint_prior_scale,
|
|
|
|
|
holidays_prior_scale=m.holidays_prior_scale,
|
|
|
|
|
mcmc_samples=m.mcmc_samples,
|
|
|
|
|
interval_width=m.interval_width,
|
|
|
|
|
uncertainty_samples=m.uncertainty_samples,
|
|
|
|
|
)
|
|
|
|
|
m2.extra_regressors = deepcopy(m.extra_regressors)
|
|
|
|
|
m2.seasonalities = deepcopy(m.seasonalities)
|
2018-12-04 06:21:37 +00:00
|
|
|
m2.country_holidays = deepcopy(m.country_holidays)
|
2018-05-03 18:42:10 +00:00
|
|
|
return m2
|
|
|
|
|
|
|
|
|
|
|
2018-05-04 18:21:40 +00:00
|
|
|
def performance_metrics(df, metrics=None, rolling_window=0.1):
|
2018-05-04 00:39:31 +00:00
|
|
|
"""Compute performance metrics from cross-validation results.
|
|
|
|
|
|
|
|
|
|
Computes a suite of performance metrics on the output of cross-validation.
|
|
|
|
|
By default the following metrics are included:
|
|
|
|
|
'mse': mean squared error
|
2018-05-04 17:24:15 +00:00
|
|
|
'rmse': root mean squared error
|
2018-05-04 00:39:31 +00:00
|
|
|
'mae': mean absolute error
|
|
|
|
|
'mape': mean percent error
|
|
|
|
|
'coverage': coverage of the upper and lower intervals
|
|
|
|
|
|
|
|
|
|
A subset of these can be specified by passing a list of names as the
|
|
|
|
|
`metrics` argument.
|
|
|
|
|
|
2018-05-04 02:48:41 +00:00
|
|
|
Metrics are calculated over a rolling window of cross validation
|
2019-05-08 17:47:26 +00:00
|
|
|
predictions, after sorting by horizon. Averaging is first done within each
|
|
|
|
|
value of horizon, and then across horizons as needed to reach the window
|
|
|
|
|
size. The size of that window (number of simulated forecast points) is
|
|
|
|
|
determined by the rolling_window argument, which specifies a proportion of
|
|
|
|
|
simulated forecast points to include in each window. rolling_window=0 will
|
|
|
|
|
compute it separately for each horizon. The default of rolling_window=0.1
|
|
|
|
|
will use 10% of the rows in df in each window. rolling_window=1 will
|
|
|
|
|
compute the metric across all simulated forecast points. The results are
|
|
|
|
|
set to the right edge of the window.
|
|
|
|
|
|
|
|
|
|
If rolling_window < 0, then metrics are computed at each datapoint with no
|
|
|
|
|
averaging (i.e., 'mse' will actually be squared error with no mean).
|
2018-05-04 02:48:41 +00:00
|
|
|
|
|
|
|
|
The output is a dataframe containing column 'horizon' along with columns
|
2018-05-04 00:39:31 +00:00
|
|
|
for each of the metrics computed.
|
2018-02-02 23:52:09 +00:00
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
2018-05-04 00:39:31 +00:00
|
|
|
df: The dataframe returned by cross_validation.
|
|
|
|
|
metrics: A list of performance metrics to compute. If not provided, will
|
2018-05-04 18:21:40 +00:00
|
|
|
use ['mse', 'rmse', 'mae', 'mape', 'coverage'].
|
2018-05-04 02:48:41 +00:00
|
|
|
rolling_window: Proportion of data to use in each rolling window for
|
2019-05-08 17:47:26 +00:00
|
|
|
computing the metrics. Should be in [0, 1] to average
|
2018-02-02 23:52:09 +00:00
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
2018-05-04 02:48:41 +00:00
|
|
|
Dataframe with a column for each metric, and column 'horizon'
|
2018-02-02 23:52:09 +00:00
|
|
|
"""
|
2018-05-04 17:24:15 +00:00
|
|
|
valid_metrics = ['mse', 'rmse', 'mae', 'mape', 'coverage']
|
2018-05-04 00:39:31 +00:00
|
|
|
if metrics is None:
|
|
|
|
|
metrics = valid_metrics
|
|
|
|
|
if len(set(metrics)) != len(metrics):
|
|
|
|
|
raise ValueError('Input metrics must be a list of unique values')
|
|
|
|
|
if not set(metrics).issubset(set(valid_metrics)):
|
|
|
|
|
raise ValueError(
|
|
|
|
|
'Valid values for metrics are: {}'.format(valid_metrics)
|
|
|
|
|
)
|
|
|
|
|
df_m = df.copy()
|
|
|
|
|
df_m['horizon'] = df_m['ds'] - df_m['cutoff']
|
2018-05-04 02:48:41 +00:00
|
|
|
df_m.sort_values('horizon', inplace=True)
|
2019-05-08 17:47:26 +00:00
|
|
|
if 'mape' in metrics and df_m['y'].abs().min() < 1e-8:
|
|
|
|
|
logger.info('Skipping MAPE because y close to 0')
|
|
|
|
|
metrics.remove('mape')
|
|
|
|
|
if len(metrics) == 0:
|
|
|
|
|
return None
|
2018-05-04 02:48:41 +00:00
|
|
|
w = int(rolling_window * df_m.shape[0])
|
2019-05-08 17:47:26 +00:00
|
|
|
if w >= 0:
|
|
|
|
|
w = max(w, 1)
|
|
|
|
|
w = min(w, df_m.shape[0])
|
|
|
|
|
# Compute all metrics
|
|
|
|
|
dfs = {}
|
2018-05-04 02:48:41 +00:00
|
|
|
for metric in metrics:
|
2019-05-08 17:47:26 +00:00
|
|
|
dfs[metric] = eval(metric)(df_m, w)
|
|
|
|
|
res = dfs[metrics[0]]
|
|
|
|
|
for i in range(1, len(metrics)):
|
|
|
|
|
res_m = dfs[metrics[i]]
|
|
|
|
|
assert np.array_equal(res['horizon'].values, res_m['horizon'].values)
|
|
|
|
|
res[metrics[i]] = res_m[metrics[i]]
|
|
|
|
|
return res
|
2018-05-04 02:48:41 +00:00
|
|
|
|
|
|
|
|
|
2019-05-08 17:47:26 +00:00
|
|
|
def rolling_mean_by_h(x, h, w, name):
|
|
|
|
|
"""Compute a rolling mean of x, after first aggregating by h.
|
2018-05-04 22:07:35 +00:00
|
|
|
|
2019-05-08 17:47:26 +00:00
|
|
|
Right-aligned. Computes a single mean for each unique value of h. Each
|
|
|
|
|
mean is over at least w samples.
|
2018-05-04 22:07:35 +00:00
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
x: Array.
|
2019-05-08 17:47:26 +00:00
|
|
|
h: Array of horizon for each value in x.
|
2018-05-04 22:07:35 +00:00
|
|
|
w: Integer window size (number of elements).
|
2019-05-08 17:47:26 +00:00
|
|
|
name: Name for metric in result dataframe
|
2018-05-04 22:07:35 +00:00
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
2019-05-08 17:47:26 +00:00
|
|
|
Dataframe with columns horizon and name, the rolling mean of x.
|
2018-05-04 22:07:35 +00:00
|
|
|
"""
|
2019-05-08 17:47:26 +00:00
|
|
|
# Aggregate over h
|
|
|
|
|
df = pd.DataFrame({'x': x, 'h': h})
|
|
|
|
|
df2 = (
|
|
|
|
|
df.groupby('h').agg(['mean', 'count']).reset_index().sort_values('h')
|
|
|
|
|
)
|
|
|
|
|
xm = df2['x']['mean'].values
|
|
|
|
|
ns = df2['x']['count'].values
|
|
|
|
|
hs = df2['h'].values
|
|
|
|
|
|
|
|
|
|
res_h = []
|
|
|
|
|
res_x = []
|
|
|
|
|
# Start from the right and work backwards
|
|
|
|
|
i = len(hs) - 1
|
|
|
|
|
while i >= 0:
|
|
|
|
|
# Construct a mean of at least w samples.
|
|
|
|
|
n = int(ns[i])
|
|
|
|
|
xbar = float(xm[i])
|
|
|
|
|
j = i - 1
|
|
|
|
|
while ((n < w) and j >= 0):
|
|
|
|
|
# Include points from the previous horizon. All of them if still
|
|
|
|
|
# less than w, otherwise just enough to get to w.
|
|
|
|
|
n2 = min(w - n, ns[j])
|
|
|
|
|
xbar = xbar * (n / (n + n2)) + xm[j] * (n2 / (n + n2))
|
|
|
|
|
n += n2
|
|
|
|
|
j -= 1
|
|
|
|
|
if n < w:
|
|
|
|
|
# Ran out of horizons before enough points.
|
|
|
|
|
break
|
|
|
|
|
res_h.append(hs[i])
|
|
|
|
|
res_x.append(xbar)
|
|
|
|
|
i -= 1
|
|
|
|
|
res_h.reverse()
|
|
|
|
|
res_x.reverse()
|
|
|
|
|
return pd.DataFrame({'horizon': res_h, name: res_x})
|
|
|
|
|
|
2018-05-04 00:39:31 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
# The functions below specify performance metrics for cross-validation results.
|
2018-05-04 02:48:41 +00:00
|
|
|
# Each takes as input the output of cross_validation, and returns the statistic
|
2019-05-08 17:47:26 +00:00
|
|
|
# as a dataframe, given a window size for rolling aggregation.
|
2018-05-04 00:39:31 +00:00
|
|
|
|
|
|
|
|
|
2018-05-04 02:48:41 +00:00
|
|
|
def mse(df, w):
|
2018-05-04 00:39:31 +00:00
|
|
|
"""Mean squared error
|
2018-05-04 22:07:35 +00:00
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
df: Cross-validation results dataframe.
|
|
|
|
|
w: Aggregation window size.
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
2019-05-08 17:47:26 +00:00
|
|
|
Dataframe with columns horizon and mse.
|
2018-05-04 00:39:31 +00:00
|
|
|
"""
|
|
|
|
|
se = (df['y'] - df['yhat']) ** 2
|
2019-05-08 17:47:26 +00:00
|
|
|
if w < 0:
|
|
|
|
|
return pd.DataFrame({'horizon': df['horizon'], 'mse': se})
|
|
|
|
|
return rolling_mean_by_h(
|
|
|
|
|
x=se.values, h=df['horizon'].values, w=w, name='mse'
|
|
|
|
|
)
|
2018-02-02 23:52:09 +00:00
|
|
|
|
|
|
|
|
|
2018-05-04 17:24:15 +00:00
|
|
|
def rmse(df, w):
|
|
|
|
|
"""Root mean squared error
|
2018-05-04 22:07:35 +00:00
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
df: Cross-validation results dataframe.
|
|
|
|
|
w: Aggregation window size.
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
2019-05-08 17:47:26 +00:00
|
|
|
Dataframe with columns horizon and rmse.
|
2018-05-04 17:24:15 +00:00
|
|
|
"""
|
2019-05-08 17:47:26 +00:00
|
|
|
res = mse(df, w)
|
|
|
|
|
res['mse'] = np.sqrt(res['mse'])
|
|
|
|
|
res.rename({'mse': 'rmse'}, axis='columns', inplace=True)
|
|
|
|
|
return res
|
2018-05-04 17:24:15 +00:00
|
|
|
|
|
|
|
|
|
2018-05-04 02:48:41 +00:00
|
|
|
def mae(df, w):
|
2018-05-04 00:39:31 +00:00
|
|
|
"""Mean absolute error
|
2018-05-04 22:07:35 +00:00
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
df: Cross-validation results dataframe.
|
|
|
|
|
w: Aggregation window size.
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
2019-05-08 17:47:26 +00:00
|
|
|
Dataframe with columns horizon and mae.
|
2018-05-04 00:39:31 +00:00
|
|
|
"""
|
|
|
|
|
ae = np.abs(df['y'] - df['yhat'])
|
2019-05-08 17:47:26 +00:00
|
|
|
if w < 0:
|
|
|
|
|
return pd.DataFrame({'horizon': df['horizon'], 'mae': ae})
|
|
|
|
|
return rolling_mean_by_h(
|
|
|
|
|
x=ae.values, h=df['horizon'].values, w=w, name='mae'
|
|
|
|
|
)
|
2018-02-02 23:52:09 +00:00
|
|
|
|
2018-05-04 00:39:31 +00:00
|
|
|
|
2018-05-04 02:48:41 +00:00
|
|
|
def mape(df, w):
|
2018-05-04 00:39:31 +00:00
|
|
|
"""Mean absolute percent error
|
2018-05-04 22:07:35 +00:00
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
df: Cross-validation results dataframe.
|
|
|
|
|
w: Aggregation window size.
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
2019-05-08 17:47:26 +00:00
|
|
|
Dataframe with columns horizon and mape.
|
2018-05-04 00:39:31 +00:00
|
|
|
"""
|
|
|
|
|
ape = np.abs((df['y'] - df['yhat']) / df['y'])
|
2019-05-08 17:47:26 +00:00
|
|
|
if w < 0:
|
|
|
|
|
return pd.DataFrame({'horizon': df['horizon'], 'mape': ape})
|
|
|
|
|
return rolling_mean_by_h(
|
|
|
|
|
x=ape.values, h=df['horizon'].values, w=w, name='mape'
|
|
|
|
|
)
|
2018-05-04 00:39:31 +00:00
|
|
|
|
|
|
|
|
|
2018-11-02 20:08:40 +00:00
|
|
|
def smape(df, w):
|
|
|
|
|
"""Symmetric mean absolute percentage error
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
df: Cross-validation results dataframe.
|
|
|
|
|
w: Aggregation window size.
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
2019-05-08 17:47:26 +00:00
|
|
|
Dataframe with columns horizon and smape.
|
2018-11-02 20:08:40 +00:00
|
|
|
"""
|
|
|
|
|
sape = np.abs(df['yhat']-df['y']) / ((np.abs(df['y']) + np.abs(df['yhat'])) /2)
|
2019-05-08 17:47:26 +00:00
|
|
|
if w < 0:
|
|
|
|
|
return pd.DataFrame({'horizon': df['horizon'], 'smape': sape})
|
|
|
|
|
return rolling_mean_by_h(
|
|
|
|
|
x=sape.values, h=df['horizon'].values, w=w, name='smape'
|
|
|
|
|
)
|
2018-11-02 20:08:40 +00:00
|
|
|
|
|
|
|
|
|
2018-05-04 02:48:41 +00:00
|
|
|
def coverage(df, w):
|
2018-05-04 00:39:31 +00:00
|
|
|
"""Coverage
|
2018-05-04 22:07:35 +00:00
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
df: Cross-validation results dataframe.
|
|
|
|
|
w: Aggregation window size.
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
2019-05-08 17:47:26 +00:00
|
|
|
Dataframe with columns horizon and coverage.
|
2018-05-04 00:39:31 +00:00
|
|
|
"""
|
|
|
|
|
is_covered = (df['y'] >= df['yhat_lower']) & (df['y'] <= df['yhat_upper'])
|
2019-05-08 17:47:26 +00:00
|
|
|
if w < 0:
|
|
|
|
|
return pd.DataFrame({'horizon': df['horizon'], 'coverage': is_covered})
|
|
|
|
|
return rolling_mean_by_h(
|
|
|
|
|
x=is_covered.values, h=df['horizon'].values, w=w, name='coverage'
|
|
|
|
|
)
|