mirror of
https://github.com/saymrwulf/prophet.git
synced 2026-05-15 20:50:48 +00:00
* adding changes to python error metrics to new branch * adding diagnosts to a new base branch v0.3 * adding code with broken out function definitions for each metric and handling of model for calculating insample and outsample metrics.
212 lines
No EOL
7.1 KiB
Python
212 lines
No EOL
7.1 KiB
Python
# Copyright (c) 2017-present, Facebook, Inc.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the BSD-style license found in the
|
|
# LICENSE file in the root directory of this source tree. An additional grant
|
|
# of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
from __future__ import unicode_literals
|
|
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
from functools import reduce
|
|
|
|
|
|
def _cutoffs(df, horizon, k, period):
|
|
"""Generate cutoff dates
|
|
|
|
Parameters
|
|
----------
|
|
df: pd.DataFrame with historical data
|
|
horizon: pd.Timedelta.
|
|
Forecast horizon
|
|
k: Int number.
|
|
The number of forecasts point.
|
|
period: pd.Timedelta.
|
|
Simulated Forecast will be done at every this period.
|
|
|
|
Returns
|
|
-------
|
|
list of pd.Timestamp
|
|
"""
|
|
# Last cutoff is 'latest date in data - horizon' date
|
|
cutoff = df['ds'].max() - horizon
|
|
if cutoff < df['ds'].min():
|
|
raise ValueError('Less data than horizon.')
|
|
result = [cutoff]
|
|
|
|
for i in range(1, k):
|
|
cutoff -= period
|
|
# If data does not exist in data range (cutoff, cutoff + horizon]
|
|
if not (((df['ds'] > cutoff) & (df['ds'] <= cutoff + horizon)).any()):
|
|
# Next cutoff point is 'last date before cutoff in data - horizon'
|
|
closest_date = df[df['ds'] <= cutoff].max()['ds']
|
|
cutoff = closest_date - horizon
|
|
if cutoff < df['ds'].min():
|
|
logger.warning(
|
|
'Not enough data for requested number of cutoffs! '
|
|
'Using {}.'.format(i))
|
|
break
|
|
result.append(cutoff)
|
|
|
|
# Sort lines in ascending order
|
|
return reversed(result)
|
|
|
|
|
|
def simulated_historical_forecasts(model, horizon, k, period=None):
|
|
"""Simulated Historical Forecasts.
|
|
|
|
Make forecasts from k historical cutoff points, working backwards from
|
|
(end - horizon) with a spacing of period between each cutoff.
|
|
|
|
Parameters
|
|
----------
|
|
model: Prophet class object.
|
|
Fitted Prophet model
|
|
horizon: string with pd.Timedelta compatible style, e.g., '5 days',
|
|
'3 hours', '10 seconds'.
|
|
k: Int number of forecasts point.
|
|
period: Optional string with pd.Timedelta compatible style. Simulated
|
|
forecast will be done at every this period. If not provided,
|
|
0.5 * horizon is used.
|
|
|
|
Returns
|
|
-------
|
|
A pd.DataFrame with the forecast, actual value and cutoff.
|
|
"""
|
|
df = model.history.copy().reset_index(drop=True)
|
|
horizon = pd.Timedelta(horizon)
|
|
period = 0.5 * horizon if period is None else pd.Timedelta(period)
|
|
cutoffs = _cutoffs(df, horizon, k, period)
|
|
predicts = []
|
|
for cutoff in cutoffs:
|
|
# Generate new object with copying fitting options
|
|
m = model.copy(cutoff)
|
|
# Train model
|
|
m.fit(df[df['ds'] <= cutoff])
|
|
# Calculate yhat
|
|
index_predicted = (df['ds'] > cutoff) & (df['ds'] <= cutoff + horizon)
|
|
# Get the columns for the future dataframe
|
|
columns = ['ds']
|
|
if m.growth == 'logistic':
|
|
columns.append('cap')
|
|
if m.logistic_floor:
|
|
columns.append('floor')
|
|
columns.extend(m.extra_regressors.keys())
|
|
yhat = m.predict(df[index_predicted][columns])
|
|
# Merge yhat(predicts), y(df, original data) and cutoff
|
|
predicts.append(pd.concat([
|
|
yhat[['ds', 'yhat', 'yhat_lower', 'yhat_upper']],
|
|
df[index_predicted][['y']].reset_index(drop=True),
|
|
pd.DataFrame({'cutoff': [cutoff] * len(yhat)})
|
|
], axis=1))
|
|
|
|
# Combine all predicted pd.DataFrame into one pd.DataFrame
|
|
return reduce(lambda x, y: x.append(y), predicts).reset_index(drop=True)
|
|
|
|
|
|
def cross_validation(model, horizon, period=None, initial=None):
|
|
"""Cross-Validation for time series.
|
|
|
|
Computes forecasts from historical cutoff points. Beginning from initial,
|
|
makes cutoffs with a spacing of period up to (end - horizon).
|
|
|
|
When period is equal to the time interval of the data, this is the
|
|
technique described in https://robjhyndman.com/hyndsight/tscv/ .
|
|
|
|
Parameters
|
|
----------
|
|
model: Prophet class object. Fitted Prophet model
|
|
horizon: string with pd.Timedelta compatible style, e.g., '5 days',
|
|
'3 hours', '10 seconds'.
|
|
period: string with pd.Timedelta compatible style. Simulated forecast will
|
|
be done at every this period. If not provided, 0.5 * horizon is used.
|
|
initial: string with pd.Timedelta compatible style. The first training
|
|
period will begin here. If not provided, 3 * horizon is used.
|
|
|
|
Returns
|
|
-------
|
|
A pd.DataFrame with the forecast, actual value and cutoff.
|
|
"""
|
|
te = model.history['ds'].max()
|
|
ts = model.history['ds'].min()
|
|
horizon = pd.Timedelta(horizon)
|
|
period = 0.5 * horizon if period is None else pd.Timedelta(period)
|
|
initial = 3 * horizon if initial is None else pd.Timedelta(initial)
|
|
k = int(np.ceil(((te - horizon) - (ts + initial)) / period))
|
|
if k < 1:
|
|
raise ValueError(
|
|
'Not enough data for specified horizon, period, and initial.')
|
|
return simulated_historical_forecasts(model, horizon, k, period)
|
|
|
|
def me(df):
|
|
return((df['yhat'] - df['y']).sum()/len(df['yhat']))
|
|
def mse(df):
|
|
return((df['yhat'] - df['y']).pow(2).sum()/len(df))
|
|
def rmse(df):
|
|
return(np.sqrt((df['yhat'] - df['y']).pow(2).sum()/len(df)))
|
|
def mae(df):
|
|
return((df['yhat'] - df['y']).abs().sum()/len(df))
|
|
def mpe(df):
|
|
return((df['yhat'] - df['y']).div(df['y']).sum()*(1/len(df)))
|
|
def mape(df):
|
|
return((df['yhat'] - df['y']).div(df['y']).abs().sum()*(1/len(df)))
|
|
|
|
def all_metrics(model, df_cv = None):
|
|
"""Compute model fit metrics for time series.
|
|
|
|
Computes the following metrics about each time series that has been through
|
|
Cross Validation;
|
|
|
|
Mean Error (ME)
|
|
Mean Squared Error (MSE)
|
|
Root Mean Square Error (RMSE,
|
|
Mean Absolute Error (MAE)
|
|
Mean Percentage Error (MPE)
|
|
Mean Absolute Percentage Error (MAPE)
|
|
|
|
Parameters
|
|
----------
|
|
df: A pandas dataframe. Contains y and yhat produced by cross-validation
|
|
|
|
Returns
|
|
-------
|
|
A dictionary where the key = the error type, and value is the value of the error
|
|
"""
|
|
|
|
|
|
|
|
df = []
|
|
|
|
if df_cv is not None:
|
|
df = df_cv
|
|
else:
|
|
# run a forecast on your own data with period = 0 so that it is in-sample data onlyl
|
|
#df = model.predict(model.make_future_dataframe(periods=0))[['y', 'yhat']]
|
|
df = (model
|
|
.history[['ds', 'y']]
|
|
.merge(
|
|
model.predict(model.make_future_dataframe(periods=0))[['ds', 'yhat']],
|
|
how='inner', on='ds'
|
|
)
|
|
)
|
|
|
|
if 'yhat' not in df.columns:
|
|
raise ValueError(
|
|
'Please run Cross-Validation first before computing quality metrics.')
|
|
|
|
return {
|
|
'ME':me(df),
|
|
'MSE':mse(df),
|
|
'RMSE': rmse(df),
|
|
'MAE': mae(df),
|
|
'MPE': mpe(df),
|
|
'MAPE': mape(df)
|
|
} |