mirror of
https://github.com/saymrwulf/prophet.git
synced 2026-05-16 21:00:16 +00:00
Refactor diagnostic metrics to allow/include grouping by horizon (Py)
This commit is contained in:
parent
3afdaaf4e1
commit
994db64942
2 changed files with 160 additions and 52 deletions
|
|
@ -196,67 +196,114 @@ def prophet_copy(m, cutoff=None):
|
|||
return m2
|
||||
|
||||
|
||||
def me(df):
|
||||
return((df['yhat'] - df['y']).sum()/len(df['yhat']))
|
||||
def mse(df):
|
||||
return((df['yhat'] - df['y']).pow(2).sum()/len(df))
|
||||
def rmse(df):
|
||||
return(np.sqrt((df['yhat'] - df['y']).pow(2).sum()/len(df)))
|
||||
def mae(df):
|
||||
return((df['yhat'] - df['y']).abs().sum()/len(df))
|
||||
def mpe(df):
|
||||
return((df['yhat'] - df['y']).div(df['y']).sum()*(1/len(df)))
|
||||
def mape(df):
|
||||
return((df['yhat'] - df['y']).div(df['y']).abs().sum()*(1/len(df)))
|
||||
def performance_metrics(df, metrics=None, aggregation='horizon'):
|
||||
"""Compute performance metrics from cross-validation results.
|
||||
|
||||
def all_metrics(model, df_cv = None):
|
||||
"""Compute model fit metrics for time series.
|
||||
Computes a suite of performance metrics on the output of cross-validation.
|
||||
By default the following metrics are included:
|
||||
'mse': mean squared error
|
||||
'mae': mean absolute error
|
||||
'mape': mean percent error
|
||||
'coverage': coverage of the upper and lower intervals
|
||||
|
||||
Computes the following metrics about each time series that has been through
|
||||
Cross Validation;
|
||||
A subset of these can be specified by passing a list of names as the
|
||||
`metrics` argument.
|
||||
|
||||
Mean Error (ME)
|
||||
Mean Squared Error (MSE)
|
||||
Root Mean Square Error (RMSE,
|
||||
Mean Absolute Error (MAE)
|
||||
Mean Percentage Error (MPE)
|
||||
Mean Absolute Percentage Error (MAPE)
|
||||
By default, metrics will be computed for each horizon (ds - cutoff).
|
||||
Alternatively, metrics can be computed at the level of individual ds/cutoff
|
||||
pairs (aggregation='none'), or aggregated over all ds/cutoffs
|
||||
(aggregation='all').
|
||||
|
||||
The output is a dataframe containing the columns corresponding to the level
|
||||
of aggregation ('horizon', 'ds' and 'cutoff', or none) along with columns
|
||||
for each of the metrics computed.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
df: A pandas dataframe. Contains y and yhat produced by cross-validation
|
||||
df: The dataframe returned by cross_validation.
|
||||
metrics: A list of performance metrics to compute. If not provided, will
|
||||
use ['mse', 'mae', 'mape', 'coverage'].
|
||||
aggregation: Level of aggregation for computing performance statistics.
|
||||
Must be 'horizon', 'none', or 'all'.
|
||||
|
||||
Returns
|
||||
-------
|
||||
A dictionary where the key = the error type, and value is the value of the error
|
||||
Dataframe with a column for each metric, and a combination of columns 'ds',
|
||||
'cutoff', and 'horizon', depending on the aggregation level.
|
||||
"""
|
||||
|
||||
|
||||
|
||||
df = []
|
||||
|
||||
if df_cv is not None:
|
||||
df = df_cv
|
||||
else:
|
||||
# run a forecast on your own data with period = 0 so that it is in-sample data onlyl
|
||||
#df = model.predict(model.make_future_dataframe(periods=0))[['y', 'yhat']]
|
||||
df = (model
|
||||
.history[['ds', 'y']]
|
||||
.merge(
|
||||
model.predict(model.make_future_dataframe(periods=0))[['ds', 'yhat']],
|
||||
how='inner', on='ds'
|
||||
)
|
||||
)
|
||||
|
||||
if 'yhat' not in df.columns:
|
||||
# Input validation
|
||||
valid_aggregations = ['horizon', 'all', 'none']
|
||||
if aggregation not in valid_aggregations:
|
||||
raise ValueError(
|
||||
'Please run Cross-Validation first before computing quality metrics.')
|
||||
'Aggregation {} is not valid; must be one of {}'.format(
|
||||
aggregation, valid_agggregations
|
||||
)
|
||||
)
|
||||
valid_metrics = ['mse', 'mae', 'mape', 'coverage']
|
||||
if metrics is None:
|
||||
metrics = valid_metrics
|
||||
if len(set(metrics)) != len(metrics):
|
||||
raise ValueError('Input metrics must be a list of unique values')
|
||||
if not set(metrics).issubset(set(valid_metrics)):
|
||||
raise ValueError(
|
||||
'Valid values for metrics are: {}'.format(valid_metrics)
|
||||
)
|
||||
# Get function for the metrics we want
|
||||
metric_fns = {m: eval(m) for m in metrics}
|
||||
def all_metrics(df_g):
|
||||
return pd.Series({name: fn(df_g) for name, fn in metric_fns.items()})
|
||||
# Apply functions to groupby
|
||||
if aggregation == 'all':
|
||||
return all_metrics(df)
|
||||
# else,
|
||||
df_m = df.copy()
|
||||
df_m['horizon'] = df_m['ds'] - df_m['cutoff']
|
||||
if aggregation == 'horizon':
|
||||
return df_m.groupby('horizon').apply(all_metrics).reset_index()
|
||||
# else,
|
||||
for name, fn in metric_fns.items():
|
||||
df_m[name] = fn(df_m, agg=False)
|
||||
return df_m
|
||||
|
||||
return {
|
||||
'ME':me(df),
|
||||
'MSE':mse(df),
|
||||
'RMSE': rmse(df),
|
||||
'MAE': mae(df),
|
||||
'MPE': mpe(df),
|
||||
'MAPE': mape(df)
|
||||
}
|
||||
|
||||
# The functions below specify performance metrics for cross-validation results.
|
||||
# Each takes as input the output of cross_validation, and has two modes of
|
||||
# return: if agg=True, returns a float that is the metric aggregated over the
|
||||
# input. If agg=False, returns results without aggregation (for
|
||||
# aggregation='none' in performance_metrics).
|
||||
|
||||
|
||||
def mse(df, agg=True):
|
||||
"""Mean squared error
|
||||
"""
|
||||
se = (df['y'] - df['yhat']) ** 2
|
||||
if agg:
|
||||
return np.mean(se)
|
||||
return se
|
||||
|
||||
|
||||
def mae(df, agg=True):
|
||||
"""Mean absolute error
|
||||
"""
|
||||
ae = np.abs(df['y'] - df['yhat'])
|
||||
if agg:
|
||||
return np.mean(ae)
|
||||
return ae
|
||||
|
||||
|
||||
def mape(df, agg=True):
|
||||
"""Mean absolute percent error
|
||||
"""
|
||||
ape = np.abs((df['y'] - df['yhat']) / df['y'])
|
||||
if agg:
|
||||
return np.mean(ape)
|
||||
return ape
|
||||
|
||||
|
||||
def coverage(df, agg=True):
|
||||
"""Coverage
|
||||
"""
|
||||
is_covered = (df['y'] >= df['yhat_lower']) & (df['y'] <= df['yhat_upper'])
|
||||
if agg:
|
||||
return np.mean(is_covered)
|
||||
return is_covered
|
||||
|
|
|
|||
|
|
@ -135,3 +135,64 @@ class TestDiagnostics(TestCase):
|
|||
((df_cv1['y'] - df_cv2['y']) ** 2).sum(), 0.0)
|
||||
self.assertAlmostEqual(
|
||||
((df_cv1['yhat'] - df_cv2['yhat']) ** 2).sum(), 0.0)
|
||||
|
||||
def test_performance_metrics(self):
|
||||
m = Prophet()
|
||||
m.fit(self.__df)
|
||||
df_cv = diagnostics.cross_validation(
|
||||
m, horizon='4 days', period='10 days', initial='90 days')
|
||||
# Aggregation level none
|
||||
df_none = diagnostics.performance_metrics(df_cv, aggregation='none')
|
||||
self.assertEqual(
|
||||
set(df_none.columns),
|
||||
{
|
||||
'y', 'yhat', 'yhat_lower', 'yhat_upper', 'ds', 'cutoff',
|
||||
'horizon', 'coverage', 'mae', 'mape', 'mse',
|
||||
},
|
||||
)
|
||||
# Check each metric
|
||||
self.assertEqual(
|
||||
np.abs(df_cv['yhat'][0] - df_cv['y'][0]),
|
||||
df_none['mae'][0],
|
||||
)
|
||||
self.assertEqual(
|
||||
np.abs((df_cv['yhat'][0] - df_cv['y'][0]) / df_cv['y'][0]),
|
||||
df_none['mape'][0],
|
||||
)
|
||||
self.assertEqual(
|
||||
(df_cv['yhat'][0] - df_cv['y'][0]) ** 2,
|
||||
df_none['mse'][0],
|
||||
)
|
||||
self.assertEqual(
|
||||
(
|
||||
(df_cv['y'][0] >= df_cv['yhat_lower'][0])
|
||||
and (df_cv['y'][0] <= df_cv['yhat_upper'][0])
|
||||
),
|
||||
df_none['coverage'][0],
|
||||
)
|
||||
# Aggregation level horizon (default)
|
||||
df_horizon = diagnostics.performance_metrics(df_cv)
|
||||
self.assertEqual(len(df_horizon['horizon'].unique()), 4)
|
||||
self.assertEqual(
|
||||
set(df_horizon.columns),
|
||||
{'coverage', 'mse', 'mape', 'mae', 'horizon'},
|
||||
)
|
||||
self.assertEqual(df_horizon.shape[0], 4)
|
||||
# Check aggregation
|
||||
agg = df_none.groupby('horizon', as_index=False).agg('mean')
|
||||
for metric in ['mse', 'mape', 'mae', 'horizon']:
|
||||
self.assertTrue((agg[metric] == df_horizon[metric]).all())
|
||||
# Aggregation level all
|
||||
df_all = diagnostics.performance_metrics(df_cv, aggregation='all')
|
||||
self.assertEqual(df_all.shape, (4,))
|
||||
self.assertEqual(set(df_all.index), {'coverage', 'mse', 'mae', 'mape'})
|
||||
for metric in ['mse', 'mape', 'mae', 'coverage']:
|
||||
self.assertEqual(df_all[metric], df_all[metric].mean())
|
||||
# Custom list of metrics
|
||||
df_horizon = diagnostics.performance_metrics(
|
||||
df_cv, metrics=['coverage', 'mse'],
|
||||
)
|
||||
self.assertEqual(
|
||||
set(df_horizon.columns),
|
||||
{'coverage', 'mse', 'horizon'},
|
||||
)
|
||||
|
|
|
|||
Loading…
Reference in a new issue