2018-12-20 23:33:23 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
2019-05-21 18:40:04 +00:00
|
|
|
# Copyright (c) Facebook, Inc. and its affiliates.
|
|
|
|
|
|
|
|
|
|
# This source code is licensed under the MIT license found in the
|
|
|
|
|
# LICENSE file in the root directory of this source tree.
|
2017-02-22 23:59:43 +00:00
|
|
|
|
2018-12-20 23:33:23 +00:00
|
|
|
from __future__ import absolute_import, division, print_function
|
2017-02-22 23:59:43 +00:00
|
|
|
|
2017-06-21 21:37:07 +00:00
|
|
|
import logging
|
2018-12-20 23:33:23 +00:00
|
|
|
from collections import OrderedDict, defaultdict
|
|
|
|
|
from datetime import timedelta
|
|
|
|
|
|
2017-02-22 23:59:43 +00:00
|
|
|
import numpy as np
|
|
|
|
|
import pandas as pd
|
2018-12-03 23:22:19 +00:00
|
|
|
import pystan # noqa F401
|
2017-02-22 23:59:43 +00:00
|
|
|
|
2018-05-30 23:35:17 +00:00
|
|
|
from fbprophet.diagnostics import prophet_copy
|
2018-08-27 20:52:34 +00:00
|
|
|
from fbprophet.make_holidays import get_holiday_names, make_holidays_df
|
2018-12-20 23:33:23 +00:00
|
|
|
from fbprophet.models import prophet_stan_model
|
|
|
|
|
from fbprophet.plot import (plot, plot_components, plot_forecast_component,
|
|
|
|
|
plot_seasonality, plot_weekly, plot_yearly,
|
|
|
|
|
seasonality_plot_df)
|
2017-02-22 23:59:43 +00:00
|
|
|
|
2018-12-04 00:29:07 +00:00
|
|
|
logger = logging.getLogger('fbprophet')
|
|
|
|
|
logger.addHandler(logging.NullHandler())
|
|
|
|
|
if len(logger.handlers) == 1:
|
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
2018-05-03 17:23:56 +00:00
|
|
|
|
2017-06-21 21:37:07 +00:00
|
|
|
|
2017-02-22 23:59:43 +00:00
|
|
|
class Prophet(object):
|
2017-03-23 15:27:44 +00:00
|
|
|
"""Prophet forecaster.
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
growth: String 'linear' or 'logistic' to specify a linear or logistic
|
|
|
|
|
trend.
|
|
|
|
|
changepoints: List of dates at which to include potential changepoints. If
|
|
|
|
|
not specified, potential changepoints are selected automatically.
|
|
|
|
|
n_changepoints: Number of potential changepoints to include. Not used
|
|
|
|
|
if input `changepoints` is supplied. If `changepoints` is not supplied,
|
2017-05-15 22:31:13 +00:00
|
|
|
then n_changepoints potential changepoints are selected uniformly from
|
2018-05-25 23:45:24 +00:00
|
|
|
the first `changepoint_range` proportion of the history.
|
|
|
|
|
changepoint_range: Proportion of history in which trend changepoints will
|
|
|
|
|
be estimated. Defaults to 0.8 for the first 80%. Not used if
|
|
|
|
|
`changepoints` is specified.
|
2018-05-25 22:53:19 +00:00
|
|
|
Not used if input `changepoints` is supplied.
|
2017-07-04 15:24:54 +00:00
|
|
|
yearly_seasonality: Fit yearly seasonality.
|
|
|
|
|
Can be 'auto', True, False, or a number of Fourier terms to generate.
|
|
|
|
|
weekly_seasonality: Fit weekly seasonality.
|
|
|
|
|
Can be 'auto', True, False, or a number of Fourier terms to generate.
|
|
|
|
|
daily_seasonality: Fit daily seasonality.
|
|
|
|
|
Can be 'auto', True, False, or a number of Fourier terms to generate.
|
2017-03-23 15:27:44 +00:00
|
|
|
holidays: pd.DataFrame with columns holiday (string) and ds (date type)
|
|
|
|
|
and optionally columns lower_window and upper_window which specify a
|
|
|
|
|
range of days around the date to be included as holidays.
|
2017-08-27 06:29:10 +00:00
|
|
|
lower_window=-2 will include 2 days prior to the date as holidays. Also
|
|
|
|
|
optionally can have a column prior_scale specifying the prior scale for
|
|
|
|
|
that holiday.
|
2018-05-09 19:25:29 +00:00
|
|
|
seasonality_mode: 'additive' (default) or 'multiplicative'.
|
2017-03-23 15:27:44 +00:00
|
|
|
seasonality_prior_scale: Parameter modulating the strength of the
|
|
|
|
|
seasonality model. Larger values allow the model to fit larger seasonal
|
2017-08-27 21:32:39 +00:00
|
|
|
fluctuations, smaller values dampen the seasonality. Can be specified
|
|
|
|
|
for individual seasonalities using add_seasonality.
|
2017-03-23 15:27:44 +00:00
|
|
|
holidays_prior_scale: Parameter modulating the strength of the holiday
|
2017-08-28 18:38:02 +00:00
|
|
|
components model, unless overridden in the holidays input.
|
2017-03-23 15:27:44 +00:00
|
|
|
changepoint_prior_scale: Parameter modulating the flexibility of the
|
|
|
|
|
automatic changepoint selection. Large values will allow many
|
|
|
|
|
changepoints, small values will allow few changepoints.
|
2017-04-08 02:11:37 +00:00
|
|
|
mcmc_samples: Integer, if greater than 0, will do full Bayesian inference
|
2017-03-23 15:27:44 +00:00
|
|
|
with the specified number of MCMC samples. If 0, will do MAP
|
|
|
|
|
estimation.
|
|
|
|
|
interval_width: Float, width of the uncertainty intervals provided
|
|
|
|
|
for the forecast. If mcmc_samples=0, this will be only the uncertainty
|
|
|
|
|
in the trend using the MAP estimate of the extrapolated generative
|
|
|
|
|
model. If mcmc.samples>0, this will be integrated over all model
|
|
|
|
|
parameters, which will include uncertainty in seasonality.
|
|
|
|
|
uncertainty_samples: Number of simulated draws used to estimate
|
2019-07-23 08:59:28 +00:00
|
|
|
uncertainty intervals. Settings this value to 0 or False will disable
|
|
|
|
|
uncertainty estimation and speed up the calculation.
|
2017-03-23 15:27:44 +00:00
|
|
|
"""
|
|
|
|
|
|
2017-02-22 23:59:43 +00:00
|
|
|
def __init__(
|
|
|
|
|
self,
|
|
|
|
|
growth='linear',
|
|
|
|
|
changepoints=None,
|
|
|
|
|
n_changepoints=25,
|
2018-05-25 23:45:24 +00:00
|
|
|
changepoint_range=0.8,
|
2017-04-13 08:25:03 +00:00
|
|
|
yearly_seasonality='auto',
|
|
|
|
|
weekly_seasonality='auto',
|
2017-07-04 15:24:54 +00:00
|
|
|
daily_seasonality='auto',
|
2017-02-22 23:59:43 +00:00
|
|
|
holidays=None,
|
2018-05-09 19:25:29 +00:00
|
|
|
seasonality_mode='additive',
|
2017-02-22 23:59:43 +00:00
|
|
|
seasonality_prior_scale=10.0,
|
|
|
|
|
holidays_prior_scale=10.0,
|
|
|
|
|
changepoint_prior_scale=0.05,
|
|
|
|
|
mcmc_samples=0,
|
|
|
|
|
interval_width=0.80,
|
|
|
|
|
uncertainty_samples=1000,
|
|
|
|
|
):
|
|
|
|
|
self.growth = growth
|
|
|
|
|
|
|
|
|
|
self.changepoints = pd.to_datetime(changepoints)
|
|
|
|
|
if self.changepoints is not None:
|
|
|
|
|
self.n_changepoints = len(self.changepoints)
|
2017-08-26 21:31:33 +00:00
|
|
|
self.specified_changepoints = True
|
2017-02-22 23:59:43 +00:00
|
|
|
else:
|
|
|
|
|
self.n_changepoints = n_changepoints
|
2017-08-26 21:31:33 +00:00
|
|
|
self.specified_changepoints = False
|
2017-02-22 23:59:43 +00:00
|
|
|
|
2018-05-25 23:45:24 +00:00
|
|
|
self.changepoint_range = changepoint_range
|
2017-02-22 23:59:43 +00:00
|
|
|
self.yearly_seasonality = yearly_seasonality
|
|
|
|
|
self.weekly_seasonality = weekly_seasonality
|
2017-07-04 15:12:08 +00:00
|
|
|
self.daily_seasonality = daily_seasonality
|
2017-02-22 23:59:43 +00:00
|
|
|
self.holidays = holidays
|
|
|
|
|
|
2018-05-09 19:25:29 +00:00
|
|
|
self.seasonality_mode = seasonality_mode
|
2017-02-22 23:59:43 +00:00
|
|
|
self.seasonality_prior_scale = float(seasonality_prior_scale)
|
|
|
|
|
self.changepoint_prior_scale = float(changepoint_prior_scale)
|
|
|
|
|
self.holidays_prior_scale = float(holidays_prior_scale)
|
|
|
|
|
|
|
|
|
|
self.mcmc_samples = mcmc_samples
|
|
|
|
|
self.interval_width = interval_width
|
|
|
|
|
self.uncertainty_samples = uncertainty_samples
|
|
|
|
|
|
2018-12-01 01:20:22 +00:00
|
|
|
# Set during fitting or by other methods
|
2017-02-22 23:59:43 +00:00
|
|
|
self.start = None
|
|
|
|
|
self.y_scale = None
|
2017-08-28 16:06:00 +00:00
|
|
|
self.logistic_floor = False
|
2017-02-28 08:08:37 +00:00
|
|
|
self.t_scale = None
|
|
|
|
|
self.changepoints_t = None
|
2019-05-03 17:44:23 +00:00
|
|
|
self.seasonalities = OrderedDict({})
|
2018-10-19 01:07:42 +00:00
|
|
|
self.extra_regressors = OrderedDict({})
|
2018-12-01 01:20:22 +00:00
|
|
|
self.country_holidays = None
|
2017-02-22 23:59:43 +00:00
|
|
|
self.stan_fit = None
|
|
|
|
|
self.params = {}
|
|
|
|
|
self.history = None
|
2017-03-23 13:47:29 +00:00
|
|
|
self.history_dates = None
|
2018-05-08 00:09:02 +00:00
|
|
|
self.train_component_cols = None
|
2018-05-12 00:54:29 +00:00
|
|
|
self.component_modes = None
|
2018-08-27 20:52:34 +00:00
|
|
|
self.train_holiday_names = None
|
2017-03-02 14:36:00 +00:00
|
|
|
self.validate_inputs()
|
|
|
|
|
|
|
|
|
|
def validate_inputs(self):
|
2017-03-23 15:27:44 +00:00
|
|
|
"""Validates the inputs to Prophet."""
|
2017-03-02 14:36:00 +00:00
|
|
|
if self.growth not in ('linear', 'logistic'):
|
|
|
|
|
raise ValueError(
|
2019-11-19 16:26:12 +00:00
|
|
|
'Parameter "growth" should be "linear" or "logistic".')
|
2018-05-25 23:45:24 +00:00
|
|
|
if ((self.changepoint_range < 0) or (self.changepoint_range > 1)):
|
2019-11-19 16:26:12 +00:00
|
|
|
raise ValueError('Parameter "changepoint_range" must be in [0, 1]')
|
2017-03-02 14:36:00 +00:00
|
|
|
if self.holidays is not None:
|
2018-12-03 19:54:55 +00:00
|
|
|
if not (
|
2018-12-03 23:22:19 +00:00
|
|
|
isinstance(self.holidays, pd.DataFrame)
|
|
|
|
|
and 'ds' in self.holidays # noqa W503
|
|
|
|
|
and 'holiday' in self.holidays # noqa W503
|
2018-12-03 19:54:55 +00:00
|
|
|
):
|
2019-11-19 16:26:12 +00:00
|
|
|
raise ValueError('holidays must be a DataFrame with "ds" and '
|
|
|
|
|
'"holiday" columns.')
|
2018-12-03 23:22:19 +00:00
|
|
|
self.holidays['ds'] = pd.to_datetime(self.holidays['ds'])
|
2017-03-12 14:01:02 +00:00
|
|
|
has_lower = 'lower_window' in self.holidays
|
|
|
|
|
has_upper = 'upper_window' in self.holidays
|
|
|
|
|
if has_lower + has_upper == 1:
|
|
|
|
|
raise ValueError('Holidays must have both lower_window and ' +
|
|
|
|
|
'upper_window, or neither')
|
|
|
|
|
if has_lower:
|
2018-04-21 01:48:21 +00:00
|
|
|
if self.holidays['lower_window'].max() > 0:
|
2017-03-12 14:01:02 +00:00
|
|
|
raise ValueError('Holiday lower_window should be <= 0')
|
2018-04-21 01:48:21 +00:00
|
|
|
if self.holidays['upper_window'].min() < 0:
|
2017-03-12 14:01:02 +00:00
|
|
|
raise ValueError('Holiday upper_window should be >= 0')
|
2017-03-02 14:36:00 +00:00
|
|
|
for h in self.holidays['holiday'].unique():
|
2017-07-21 14:05:16 +00:00
|
|
|
self.validate_column_name(h, check_holidays=False)
|
2018-05-09 19:25:29 +00:00
|
|
|
if self.seasonality_mode not in ['additive', 'multiplicative']:
|
|
|
|
|
raise ValueError(
|
2019-11-19 16:26:12 +00:00
|
|
|
'seasonality_mode must be "additive" or "multiplicative"'
|
2018-05-09 19:25:29 +00:00
|
|
|
)
|
2017-07-21 14:05:16 +00:00
|
|
|
|
|
|
|
|
def validate_column_name(self, name, check_holidays=True,
|
|
|
|
|
check_seasonalities=True, check_regressors=True):
|
|
|
|
|
"""Validates the name of a seasonality, holiday, or regressor.
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
name: string
|
|
|
|
|
check_holidays: bool check if name already used for holiday
|
|
|
|
|
check_seasonalities: bool check if name already used for seasonality
|
|
|
|
|
check_regressors: bool check if name already used for regressor
|
|
|
|
|
"""
|
|
|
|
|
if '_delim_' in name:
|
|
|
|
|
raise ValueError('Name cannot contain "_delim_"')
|
|
|
|
|
reserved_names = [
|
2018-05-08 00:09:02 +00:00
|
|
|
'trend', 'additive_terms', 'daily', 'weekly', 'yearly',
|
2018-08-27 20:52:34 +00:00
|
|
|
'holidays', 'zeros', 'extra_regressors_additive', 'yhat',
|
2018-05-12 00:54:29 +00:00
|
|
|
'extra_regressors_multiplicative', 'multiplicative_terms',
|
2017-07-21 14:05:16 +00:00
|
|
|
]
|
|
|
|
|
rn_l = [n + '_lower' for n in reserved_names]
|
|
|
|
|
rn_u = [n + '_upper' for n in reserved_names]
|
|
|
|
|
reserved_names.extend(rn_l)
|
|
|
|
|
reserved_names.extend(rn_u)
|
2017-08-28 16:06:00 +00:00
|
|
|
reserved_names.extend([
|
|
|
|
|
'ds', 'y', 'cap', 'floor', 'y_scaled', 'cap_scaled'])
|
2017-07-21 14:05:16 +00:00
|
|
|
if name in reserved_names:
|
2019-11-19 16:26:12 +00:00
|
|
|
raise ValueError(f'Name "{name}" is reserved.')
|
2017-07-21 14:05:16 +00:00
|
|
|
if (check_holidays and self.holidays is not None and
|
|
|
|
|
name in self.holidays['holiday'].unique()):
|
|
|
|
|
raise ValueError(
|
2019-11-19 16:26:12 +00:00
|
|
|
f'Name "{name}" already used for a holiday.')
|
2018-12-01 01:20:22 +00:00
|
|
|
if (check_holidays and self.country_holidays is not None and
|
|
|
|
|
name in get_holiday_names(self.country_holidays)):
|
2018-08-27 20:52:34 +00:00
|
|
|
raise ValueError(
|
2019-11-19 16:26:12 +00:00
|
|
|
f'Name "{name}" is a holiday name in {self.country_holidays}.')
|
2017-07-21 14:05:16 +00:00
|
|
|
if check_seasonalities and name in self.seasonalities:
|
|
|
|
|
raise ValueError(
|
2019-11-19 16:26:12 +00:00
|
|
|
f'Name "{name}" already used for a seasonality.')
|
2017-07-21 14:05:16 +00:00
|
|
|
if check_regressors and name in self.extra_regressors:
|
|
|
|
|
raise ValueError(
|
2019-11-19 17:35:49 +00:00
|
|
|
f'Name "{name}" already used for an added regressor.')
|
2017-02-22 23:59:43 +00:00
|
|
|
|
|
|
|
|
def setup_dataframe(self, df, initialize_scales=False):
|
2017-03-23 15:27:44 +00:00
|
|
|
"""Prepare dataframe for fitting or predicting.
|
|
|
|
|
|
2017-07-04 15:39:49 +00:00
|
|
|
Adds a time index and scales y. Creates auxiliary columns 't', 't_ix',
|
2017-03-23 15:27:44 +00:00
|
|
|
'y_scaled', and 'cap_scaled'. These columns are used during both
|
|
|
|
|
fitting and predicting.
|
2017-02-22 23:59:43 +00:00
|
|
|
|
2017-03-23 15:27:44 +00:00
|
|
|
Parameters
|
|
|
|
|
----------
|
2017-07-21 14:05:16 +00:00
|
|
|
df: pd.DataFrame with columns ds, y, and cap if logistic growth. Any
|
|
|
|
|
specified additional regressors must also be present.
|
2017-03-23 15:27:44 +00:00
|
|
|
initialize_scales: Boolean set scaling factors in self from df.
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
|
|
|
|
pd.DataFrame prepared for fitting or predicting.
|
2017-02-22 23:59:43 +00:00
|
|
|
"""
|
2019-11-19 17:35:49 +00:00
|
|
|
if 'y' in df: # 'y' will be in training data
|
|
|
|
|
df['y'] = pd.to_numeric(df['y'])
|
|
|
|
|
if np.isinf(df['y'].values).any():
|
|
|
|
|
raise ValueError('Found infinity in column y.')
|
2019-05-06 23:38:13 +00:00
|
|
|
if df['ds'].dtype == np.int64:
|
|
|
|
|
df['ds'] = df['ds'].astype(str)
|
2017-02-22 23:59:43 +00:00
|
|
|
df['ds'] = pd.to_datetime(df['ds'])
|
2019-05-13 22:00:45 +00:00
|
|
|
if df['ds'].dt.tz is not None:
|
|
|
|
|
raise ValueError(
|
|
|
|
|
'Column ds has timezone specified, which is not supported. '
|
|
|
|
|
'Remove timezone.'
|
|
|
|
|
)
|
2017-07-04 07:26:22 +00:00
|
|
|
if df['ds'].isnull().any():
|
|
|
|
|
raise ValueError('Found NaN in column ds.')
|
2017-07-21 14:05:16 +00:00
|
|
|
for name in self.extra_regressors:
|
|
|
|
|
if name not in df:
|
|
|
|
|
raise ValueError(
|
2019-11-19 16:26:12 +00:00
|
|
|
f'Regressor "{name}" missing from dataframe')
|
2018-12-03 23:22:19 +00:00
|
|
|
df[name] = pd.to_numeric(df[name])
|
|
|
|
|
if df[name].isnull().any():
|
2019-11-19 16:26:12 +00:00
|
|
|
raise ValueError(f'Found NaN in column {name}')
|
2019-03-18 17:04:35 +00:00
|
|
|
for props in self.seasonalities.values():
|
|
|
|
|
condition_name = props['condition_name']
|
|
|
|
|
if condition_name is not None:
|
|
|
|
|
if condition_name not in df:
|
|
|
|
|
raise ValueError(
|
2019-11-19 17:35:49 +00:00
|
|
|
f'Condition "{condition_name}" missing from dataframe')
|
2019-03-18 17:04:35 +00:00
|
|
|
if not df[condition_name].isin([True, False]).all():
|
2019-11-19 16:26:12 +00:00
|
|
|
raise ValueError(
|
|
|
|
|
f'Found non-boolean in column {condition_name}')
|
2019-03-18 17:04:35 +00:00
|
|
|
df[condition_name] = df[condition_name].astype('bool')
|
2017-02-22 23:59:43 +00:00
|
|
|
|
2019-08-29 13:05:43 +00:00
|
|
|
if df.index.name == 'ds':
|
|
|
|
|
df.index.name = None
|
2017-02-22 23:59:43 +00:00
|
|
|
df = df.sort_values('ds')
|
2019-08-29 13:05:43 +00:00
|
|
|
df = df.reset_index(drop=True)
|
2017-02-22 23:59:43 +00:00
|
|
|
|
2017-08-28 18:38:02 +00:00
|
|
|
self.initialize_scales(initialize_scales, df)
|
2017-02-22 23:59:43 +00:00
|
|
|
|
2017-08-28 16:06:00 +00:00
|
|
|
if self.logistic_floor:
|
|
|
|
|
if 'floor' not in df:
|
2019-11-19 16:26:12 +00:00
|
|
|
raise ValueError('Expected column "floor".')
|
2017-08-28 16:06:00 +00:00
|
|
|
else:
|
|
|
|
|
df['floor'] = 0
|
2017-02-22 23:59:43 +00:00
|
|
|
if self.growth == 'logistic':
|
2018-05-30 21:36:36 +00:00
|
|
|
if 'cap' not in df:
|
|
|
|
|
raise ValueError(
|
2019-11-19 16:26:12 +00:00
|
|
|
'Capacities must be supplied for logistic growth in '
|
|
|
|
|
'column "cap"'
|
2018-05-30 21:36:36 +00:00
|
|
|
)
|
2019-05-13 20:11:07 +00:00
|
|
|
if (df['cap'] <= df['floor']).any():
|
|
|
|
|
raise ValueError(
|
|
|
|
|
'cap must be greater than floor (which defaults to 0).'
|
|
|
|
|
)
|
2017-08-28 16:06:00 +00:00
|
|
|
df['cap_scaled'] = (df['cap'] - df['floor']) / self.y_scale
|
|
|
|
|
|
|
|
|
|
df['t'] = (df['ds'] - self.start) / self.t_scale
|
2019-11-19 17:35:49 +00:00
|
|
|
if 'y' in df:
|
|
|
|
|
df['y_scaled'] = (df['y'] - df['floor']) / self.y_scale
|
2017-02-22 23:59:43 +00:00
|
|
|
|
2017-07-21 14:05:16 +00:00
|
|
|
for name, props in self.extra_regressors.items():
|
|
|
|
|
df[name] = ((df[name] - props['mu']) / props['std'])
|
2017-02-22 23:59:43 +00:00
|
|
|
return df
|
|
|
|
|
|
2017-08-28 18:38:02 +00:00
|
|
|
def initialize_scales(self, initialize_scales, df):
|
2017-09-01 19:27:23 +00:00
|
|
|
"""Initialize model scales.
|
|
|
|
|
|
|
|
|
|
Sets model scaling factors using df.
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
initialize_scales: Boolean set the scales or not.
|
|
|
|
|
df: pd.DataFrame for setting scales.
|
|
|
|
|
"""
|
2017-08-28 18:38:02 +00:00
|
|
|
if not initialize_scales:
|
|
|
|
|
return
|
|
|
|
|
if self.growth == 'logistic' and 'floor' in df:
|
|
|
|
|
self.logistic_floor = True
|
|
|
|
|
floor = df['floor']
|
|
|
|
|
else:
|
|
|
|
|
floor = 0.
|
|
|
|
|
self.y_scale = (df['y'] - floor).abs().max()
|
|
|
|
|
if self.y_scale == 0:
|
|
|
|
|
self.y_scale = 1
|
|
|
|
|
self.start = df['ds'].min()
|
|
|
|
|
self.t_scale = df['ds'].max() - self.start
|
|
|
|
|
for name, props in self.extra_regressors.items():
|
|
|
|
|
standardize = props['standardize']
|
2017-11-08 18:09:08 +00:00
|
|
|
n_vals = len(df[name].unique())
|
|
|
|
|
if n_vals < 2:
|
2018-06-01 21:53:45 +00:00
|
|
|
standardize = False
|
2017-08-28 18:38:02 +00:00
|
|
|
if standardize == 'auto':
|
|
|
|
|
if set(df[name].unique()) == set([1, 0]):
|
2019-11-19 16:26:12 +00:00
|
|
|
standardize = False # Don't standardize binary variables.
|
2017-08-28 18:38:02 +00:00
|
|
|
else:
|
|
|
|
|
standardize = True
|
|
|
|
|
if standardize:
|
|
|
|
|
mu = df[name].mean()
|
|
|
|
|
std = df[name].std()
|
|
|
|
|
self.extra_regressors[name]['mu'] = mu
|
|
|
|
|
self.extra_regressors[name]['std'] = std
|
|
|
|
|
|
2017-02-22 23:59:43 +00:00
|
|
|
def set_changepoints(self):
|
2017-03-23 15:27:44 +00:00
|
|
|
"""Set changepoints
|
|
|
|
|
|
|
|
|
|
Sets m$changepoints to the dates of changepoints. Either:
|
|
|
|
|
1) The changepoints were passed in explicitly.
|
|
|
|
|
A) They are empty.
|
|
|
|
|
B) They are not empty, and need validation.
|
|
|
|
|
2) We are generating a grid of them.
|
|
|
|
|
3) The user prefers no changepoints be used.
|
2017-02-22 23:59:43 +00:00
|
|
|
"""
|
|
|
|
|
if self.changepoints is not None:
|
|
|
|
|
if len(self.changepoints) == 0:
|
|
|
|
|
pass
|
|
|
|
|
else:
|
|
|
|
|
too_low = min(self.changepoints) < self.history['ds'].min()
|
|
|
|
|
too_high = max(self.changepoints) > self.history['ds'].max()
|
|
|
|
|
if too_low or too_high:
|
2017-08-19 18:20:53 +00:00
|
|
|
raise ValueError(
|
|
|
|
|
'Changepoints must fall within training data.')
|
2017-02-22 23:59:43 +00:00
|
|
|
else:
|
2018-05-25 23:45:24 +00:00
|
|
|
# Place potential changepoints evenly through first
|
2019-11-19 18:23:22 +00:00
|
|
|
# `changepoint_range` proportion of the history
|
|
|
|
|
hist_size = int(np.floor(self.history.shape[0]
|
|
|
|
|
* self.changepoint_range))
|
2017-08-19 18:20:53 +00:00
|
|
|
if self.n_changepoints + 1 > hist_size:
|
|
|
|
|
self.n_changepoints = hist_size - 1
|
|
|
|
|
logger.info(
|
2019-11-19 16:26:12 +00:00
|
|
|
f'n_changepoints greater than number of observations. '
|
|
|
|
|
'Using {self.n_changepoints}.'
|
2017-08-19 18:20:53 +00:00
|
|
|
)
|
|
|
|
|
if self.n_changepoints > 0:
|
|
|
|
|
cp_indexes = (
|
2018-05-26 00:00:27 +00:00
|
|
|
np.linspace(0, hist_size - 1, self.n_changepoints + 1)
|
2018-08-27 20:52:34 +00:00
|
|
|
.round()
|
|
|
|
|
.astype(np.int)
|
2017-08-19 18:20:53 +00:00
|
|
|
)
|
2017-08-20 04:26:59 +00:00
|
|
|
self.changepoints = (
|
|
|
|
|
self.history.iloc[cp_indexes]['ds'].tail(-1)
|
|
|
|
|
)
|
2017-08-19 18:20:53 +00:00
|
|
|
else:
|
|
|
|
|
# set empty changepoints
|
|
|
|
|
self.changepoints = []
|
2017-02-28 08:08:37 +00:00
|
|
|
if len(self.changepoints) > 0:
|
|
|
|
|
self.changepoints_t = np.sort(np.array(
|
|
|
|
|
(self.changepoints - self.start) / self.t_scale))
|
2017-02-22 23:59:43 +00:00
|
|
|
else:
|
2017-02-28 08:08:37 +00:00
|
|
|
self.changepoints_t = np.array([0]) # dummy changepoint
|
2017-02-22 23:59:43 +00:00
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def fourier_series(dates, period, series_order):
|
2017-03-23 15:27:44 +00:00
|
|
|
"""Provides Fourier series components with the specified frequency
|
|
|
|
|
and order.
|
2017-02-22 23:59:43 +00:00
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
2017-03-23 15:27:44 +00:00
|
|
|
dates: pd.Series containing timestamps.
|
|
|
|
|
period: Number of days of the period.
|
|
|
|
|
series_order: Number of components.
|
2017-02-22 23:59:43 +00:00
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
2017-03-23 15:27:44 +00:00
|
|
|
Matrix with seasonality features.
|
2017-02-22 23:59:43 +00:00
|
|
|
"""
|
|
|
|
|
# convert to days since epoch
|
|
|
|
|
t = np.array(
|
|
|
|
|
(dates - pd.datetime(1970, 1, 1))
|
2018-08-27 20:52:34 +00:00
|
|
|
.dt.total_seconds()
|
|
|
|
|
.astype(np.float)
|
2017-07-04 15:24:54 +00:00
|
|
|
) / (3600 * 24.)
|
2017-02-22 23:59:43 +00:00
|
|
|
return np.column_stack([
|
|
|
|
|
fun((2.0 * (i + 1) * np.pi * t / period))
|
|
|
|
|
for i in range(series_order)
|
|
|
|
|
for fun in (np.sin, np.cos)
|
|
|
|
|
])
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
def make_seasonality_features(cls, dates, period, series_order, prefix):
|
2017-03-23 15:27:44 +00:00
|
|
|
"""Data frame with seasonality features.
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
cls: Prophet class.
|
|
|
|
|
dates: pd.Series containing timestamps.
|
|
|
|
|
period: Number of days of the period.
|
|
|
|
|
series_order: Number of components.
|
|
|
|
|
prefix: Column name prefix.
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
|
|
|
|
pd.DataFrame with seasonality features.
|
|
|
|
|
"""
|
2017-02-22 23:59:43 +00:00
|
|
|
features = cls.fourier_series(dates, period, series_order)
|
|
|
|
|
columns = [
|
2017-03-02 14:36:00 +00:00
|
|
|
'{}_delim_{}'.format(prefix, i + 1)
|
2017-02-22 23:59:43 +00:00
|
|
|
for i in range(features.shape[1])
|
|
|
|
|
]
|
|
|
|
|
return pd.DataFrame(features, columns=columns)
|
|
|
|
|
|
2018-12-01 01:20:22 +00:00
|
|
|
def construct_holiday_dataframe(self, dates):
|
|
|
|
|
"""Construct a dataframe of holiday dates.
|
2018-12-20 23:33:23 +00:00
|
|
|
|
2018-12-01 01:20:22 +00:00
|
|
|
Will combine self.holidays with the built-in country holidays
|
|
|
|
|
corresponding to input dates, if self.country_holidays is set.
|
2018-12-20 23:33:23 +00:00
|
|
|
|
2017-03-23 15:27:44 +00:00
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
dates: pd.Series containing timestamps used for computing seasonality.
|
2018-12-20 23:33:23 +00:00
|
|
|
|
2017-03-23 15:27:44 +00:00
|
|
|
Returns
|
|
|
|
|
-------
|
2018-12-01 07:12:19 +00:00
|
|
|
dataframe of holiday dates, in holiday dataframe format used in
|
|
|
|
|
initialization.
|
2017-02-22 23:59:43 +00:00
|
|
|
"""
|
2018-12-01 01:20:22 +00:00
|
|
|
all_holidays = pd.DataFrame()
|
|
|
|
|
if self.holidays is not None:
|
2018-12-01 07:12:19 +00:00
|
|
|
all_holidays = self.holidays.copy()
|
2018-12-01 01:20:22 +00:00
|
|
|
if self.country_holidays is not None:
|
2018-08-27 20:52:34 +00:00
|
|
|
year_list = list({x.year for x in dates})
|
2018-12-01 01:20:22 +00:00
|
|
|
country_holidays_df = make_holidays_df(
|
|
|
|
|
year_list=year_list, country=self.country_holidays
|
|
|
|
|
)
|
2019-11-19 16:26:12 +00:00
|
|
|
all_holidays = pd.concat((all_holidays, country_holidays_df),
|
|
|
|
|
sort=False)
|
2018-08-27 20:52:34 +00:00
|
|
|
all_holidays.reset_index(drop=True, inplace=True)
|
2019-11-19 17:35:49 +00:00
|
|
|
# Drop future holidays not previously seen in training data
|
2018-08-27 20:52:34 +00:00
|
|
|
if self.train_holiday_names is not None:
|
|
|
|
|
# Remove holiday names didn't show up in fit
|
|
|
|
|
index_to_drop = all_holidays.index[
|
2018-12-01 01:20:22 +00:00
|
|
|
np.logical_not(
|
|
|
|
|
all_holidays.holiday.isin(self.train_holiday_names)
|
|
|
|
|
)
|
|
|
|
|
]
|
2018-08-27 20:52:34 +00:00
|
|
|
all_holidays = all_holidays.drop(index_to_drop)
|
2018-12-01 01:20:22 +00:00
|
|
|
# Add holiday names in fit but not in predict with ds as NA
|
|
|
|
|
holidays_to_add = pd.DataFrame({
|
|
|
|
|
'holiday': self.train_holiday_names[
|
2019-11-19 16:26:12 +00:00
|
|
|
np.logical_not(self.train_holiday_names
|
|
|
|
|
.isin(all_holidays.holiday))
|
2018-12-01 01:20:22 +00:00
|
|
|
]
|
|
|
|
|
})
|
2019-11-19 16:26:12 +00:00
|
|
|
all_holidays = pd.concat((all_holidays, holidays_to_add),
|
|
|
|
|
sort=False)
|
2018-08-27 20:52:34 +00:00
|
|
|
all_holidays.reset_index(drop=True, inplace=True)
|
2018-12-01 01:20:22 +00:00
|
|
|
return all_holidays
|
2018-12-01 07:12:19 +00:00
|
|
|
|
2018-12-01 01:20:22 +00:00
|
|
|
def make_holiday_features(self, dates, holidays):
|
|
|
|
|
"""Construct a dataframe of holiday features.
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
dates: pd.Series containing timestamps used for computing seasonality.
|
|
|
|
|
holidays: pd.Dataframe containing holidays, as returned by
|
|
|
|
|
construct_holiday_dataframe.
|
2018-08-27 20:52:34 +00:00
|
|
|
|
2018-12-01 01:20:22 +00:00
|
|
|
Returns
|
|
|
|
|
-------
|
|
|
|
|
holiday_features: pd.DataFrame with a column for each holiday.
|
|
|
|
|
prior_scale_list: List of prior scales for each holiday column.
|
|
|
|
|
holiday_names: List of names of holidays
|
|
|
|
|
"""
|
2017-02-22 23:59:43 +00:00
|
|
|
# Holds columns of our future matrix.
|
|
|
|
|
expanded_holidays = defaultdict(lambda: np.zeros(dates.shape[0]))
|
2017-08-27 06:29:10 +00:00
|
|
|
prior_scales = {}
|
2017-02-22 23:59:43 +00:00
|
|
|
# Makes an index so we can perform `get_loc` below.
|
2017-07-05 03:32:42 +00:00
|
|
|
# Strip to just dates.
|
2017-07-11 23:51:29 +00:00
|
|
|
row_index = pd.DatetimeIndex(dates.apply(lambda x: x.date()))
|
2017-02-22 23:59:43 +00:00
|
|
|
|
2018-12-01 01:20:22 +00:00
|
|
|
for _ix, row in holidays.iterrows():
|
2017-02-22 23:59:43 +00:00
|
|
|
dt = row.ds.date()
|
|
|
|
|
try:
|
|
|
|
|
lw = int(row.get('lower_window', 0))
|
|
|
|
|
uw = int(row.get('upper_window', 0))
|
|
|
|
|
except ValueError:
|
|
|
|
|
lw = 0
|
|
|
|
|
uw = 0
|
2017-09-01 17:04:20 +00:00
|
|
|
ps = float(row.get('prior_scale', self.holidays_prior_scale))
|
|
|
|
|
if np.isnan(ps):
|
2017-08-27 06:29:10 +00:00
|
|
|
ps = float(self.holidays_prior_scale)
|
2019-11-19 16:26:12 +00:00
|
|
|
if row.holiday in prior_scales and prior_scales[row.holiday] != ps:
|
2017-08-27 06:29:10 +00:00
|
|
|
raise ValueError(
|
2019-11-19 16:26:12 +00:00
|
|
|
f'Holiday {row.holiday} does not have consistent prior '
|
|
|
|
|
'scale specification.')
|
2017-08-27 21:32:39 +00:00
|
|
|
if ps <= 0:
|
|
|
|
|
raise ValueError('Prior scale must be > 0')
|
2017-08-27 06:29:10 +00:00
|
|
|
prior_scales[row.holiday] = ps
|
2017-08-28 18:38:02 +00:00
|
|
|
|
2017-02-22 23:59:43 +00:00
|
|
|
for offset in range(lw, uw + 1):
|
|
|
|
|
occurrence = dt + timedelta(days=offset)
|
|
|
|
|
try:
|
|
|
|
|
loc = row_index.get_loc(occurrence)
|
|
|
|
|
except KeyError:
|
|
|
|
|
loc = None
|
2017-03-02 14:36:00 +00:00
|
|
|
key = '{}_delim_{}{}'.format(
|
2017-02-22 23:59:43 +00:00
|
|
|
row.holiday,
|
|
|
|
|
'+' if offset >= 0 else '-',
|
|
|
|
|
abs(offset)
|
|
|
|
|
)
|
|
|
|
|
if loc is not None:
|
2017-07-21 14:05:16 +00:00
|
|
|
expanded_holidays[key][loc] = 1.
|
2017-02-22 23:59:43 +00:00
|
|
|
else:
|
2019-11-19 18:23:22 +00:00
|
|
|
expanded_holidays[key] # Access key to generate value
|
2017-08-27 06:29:10 +00:00
|
|
|
holiday_features = pd.DataFrame(expanded_holidays)
|
2018-12-01 07:12:19 +00:00
|
|
|
# Make sure column order is consistent
|
2019-11-19 16:26:12 +00:00
|
|
|
holiday_features = holiday_features[sorted(holiday_features.columns
|
|
|
|
|
.tolist())]
|
2017-08-27 06:29:10 +00:00
|
|
|
prior_scale_list = [
|
|
|
|
|
prior_scales[h.split('_delim_')[0]]
|
|
|
|
|
for h in holiday_features.columns
|
|
|
|
|
]
|
2018-08-27 20:52:34 +00:00
|
|
|
holiday_names = list(prior_scales.keys())
|
|
|
|
|
# Store holiday names used in fit
|
|
|
|
|
if self.train_holiday_names is None:
|
|
|
|
|
self.train_holiday_names = pd.Series(holiday_names)
|
|
|
|
|
return holiday_features, prior_scale_list, holiday_names
|
2017-02-22 23:59:43 +00:00
|
|
|
|
2019-11-19 16:26:12 +00:00
|
|
|
def add_regressor(self, name, prior_scale=None, standardize='auto',
|
|
|
|
|
mode=None):
|
2017-07-21 14:05:16 +00:00
|
|
|
"""Add an additional regressor to be used for fitting and predicting.
|
|
|
|
|
|
|
|
|
|
The dataframe passed to `fit` and `predict` will have a column with the
|
|
|
|
|
specified name to be used as a regressor. When standardize='auto', the
|
|
|
|
|
regressor will be standardized unless it is binary. The regression
|
|
|
|
|
coefficient is given a prior with the specified scale parameter.
|
|
|
|
|
Decreasing the prior scale will add additional regularization. If no
|
|
|
|
|
prior scale is provided, self.holidays_prior_scale will be used.
|
2018-05-09 19:25:29 +00:00
|
|
|
Mode can be specified as either 'additive' or 'multiplicative'. If not
|
|
|
|
|
specified, self.seasonality_mode will be used. 'additive' means the
|
|
|
|
|
effect of the regressor will be added to the trend, 'multiplicative'
|
|
|
|
|
means it will multiply the trend.
|
2017-07-21 14:05:16 +00:00
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
name: string name of the regressor.
|
|
|
|
|
prior_scale: optional float scale for the normal prior. If not
|
|
|
|
|
provided, self.holidays_prior_scale will be used.
|
|
|
|
|
standardize: optional, specify whether this regressor will be
|
|
|
|
|
standardized prior to fitting. Can be 'auto' (standardize if not
|
|
|
|
|
binary), True, or False.
|
2018-05-09 19:25:29 +00:00
|
|
|
mode: optional, 'additive' or 'multiplicative'. Defaults to
|
|
|
|
|
self.seasonality_mode.
|
2017-07-21 14:05:16 +00:00
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
|
|
|
|
The prophet object.
|
|
|
|
|
"""
|
|
|
|
|
if self.history is not None:
|
|
|
|
|
raise Exception(
|
|
|
|
|
"Regressors must be added prior to model fitting.")
|
|
|
|
|
self.validate_column_name(name, check_regressors=False)
|
|
|
|
|
if prior_scale is None:
|
|
|
|
|
prior_scale = float(self.holidays_prior_scale)
|
2018-05-09 19:25:29 +00:00
|
|
|
if mode is None:
|
|
|
|
|
mode = self.seasonality_mode
|
2018-05-30 21:36:36 +00:00
|
|
|
if prior_scale <= 0:
|
|
|
|
|
raise ValueError('Prior scale must be > 0')
|
2018-05-09 19:25:29 +00:00
|
|
|
if mode not in ['additive', 'multiplicative']:
|
|
|
|
|
raise ValueError("mode must be 'additive' or 'multiplicative'")
|
2017-07-21 14:05:16 +00:00
|
|
|
self.extra_regressors[name] = {
|
|
|
|
|
'prior_scale': prior_scale,
|
|
|
|
|
'standardize': standardize,
|
|
|
|
|
'mu': 0.,
|
|
|
|
|
'std': 1.,
|
2018-05-09 19:25:29 +00:00
|
|
|
'mode': mode,
|
2017-07-21 14:05:16 +00:00
|
|
|
}
|
|
|
|
|
return self
|
|
|
|
|
|
2019-11-19 16:26:12 +00:00
|
|
|
def add_seasonality(self, name, period, fourier_order, prior_scale=None,
|
|
|
|
|
mode=None, condition_name=None):
|
2017-08-27 21:32:39 +00:00
|
|
|
"""Add a seasonal component with specified period, number of Fourier
|
|
|
|
|
components, and prior scale.
|
2017-07-05 05:39:57 +00:00
|
|
|
|
|
|
|
|
Increasing the number of Fourier components allows the seasonality to
|
2017-07-30 00:36:03 +00:00
|
|
|
change more quickly (at risk of overfitting). Default values for yearly
|
|
|
|
|
and weekly seasonalities are 10 and 3 respectively.
|
2017-07-05 05:39:57 +00:00
|
|
|
|
2017-08-27 21:32:39 +00:00
|
|
|
Increasing prior scale will allow this seasonality component more
|
|
|
|
|
flexibility, decreasing will dampen it. If not provided, will use the
|
|
|
|
|
seasonality_prior_scale provided on Prophet initialization (defaults
|
|
|
|
|
to 10).
|
|
|
|
|
|
2018-05-09 19:25:29 +00:00
|
|
|
Mode can be specified as either 'additive' or 'multiplicative'. If not
|
|
|
|
|
specified, self.seasonality_mode will be used (defaults to additive).
|
|
|
|
|
Additive means the seasonality will be added to the trend,
|
|
|
|
|
multiplicative means it will multiply the trend.
|
|
|
|
|
|
2019-11-19 16:26:12 +00:00
|
|
|
If condition_name is provided, the dataframe passed to `fit` and
|
|
|
|
|
`predict` should have a column with the specified condition_name
|
|
|
|
|
containing booleans which decides when to apply seasonality.
|
2019-03-18 17:04:35 +00:00
|
|
|
|
2017-07-05 05:39:57 +00:00
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
name: string name of the seasonality component.
|
|
|
|
|
period: float number of days in one period.
|
|
|
|
|
fourier_order: int number of Fourier components to use.
|
2018-05-09 19:25:29 +00:00
|
|
|
prior_scale: optional float prior scale for this component.
|
|
|
|
|
mode: optional 'additive' or 'multiplicative'
|
2019-03-18 17:04:35 +00:00
|
|
|
condition_name: string name of the seasonality condition.
|
2017-07-21 14:05:16 +00:00
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
|
|
|
|
The prophet object.
|
2017-07-05 05:39:57 +00:00
|
|
|
"""
|
2017-07-21 14:05:16 +00:00
|
|
|
if self.history is not None:
|
|
|
|
|
raise Exception(
|
2019-11-19 16:26:12 +00:00
|
|
|
'Seasonality must be added prior to model fitting.')
|
2017-07-21 14:05:16 +00:00
|
|
|
if name not in ['daily', 'weekly', 'yearly']:
|
|
|
|
|
# Allow overwriting built-in seasonalities
|
|
|
|
|
self.validate_column_name(name, check_seasonalities=False)
|
2017-08-27 21:32:39 +00:00
|
|
|
if prior_scale is None:
|
|
|
|
|
ps = self.seasonality_prior_scale
|
|
|
|
|
else:
|
|
|
|
|
ps = float(prior_scale)
|
|
|
|
|
if ps <= 0:
|
|
|
|
|
raise ValueError('Prior scale must be > 0')
|
2019-05-28 20:51:01 +00:00
|
|
|
if fourier_order <= 0:
|
|
|
|
|
raise ValueError('Fourier Order must be > 0')
|
2018-05-09 19:25:29 +00:00
|
|
|
if mode is None:
|
|
|
|
|
mode = self.seasonality_mode
|
|
|
|
|
if mode not in ['additive', 'multiplicative']:
|
2019-11-19 16:26:12 +00:00
|
|
|
raise ValueError('mode must be "additive" or "multiplicative"')
|
2019-03-18 17:04:35 +00:00
|
|
|
if condition_name is not None:
|
|
|
|
|
self.validate_column_name(condition_name)
|
2017-08-27 21:32:39 +00:00
|
|
|
self.seasonalities[name] = {
|
|
|
|
|
'period': period,
|
|
|
|
|
'fourier_order': fourier_order,
|
|
|
|
|
'prior_scale': ps,
|
2018-05-09 19:25:29 +00:00
|
|
|
'mode': mode,
|
2019-03-18 17:04:35 +00:00
|
|
|
'condition_name': condition_name,
|
2017-08-27 21:32:39 +00:00
|
|
|
}
|
2017-07-21 14:05:16 +00:00
|
|
|
return self
|
2017-07-05 05:39:57 +00:00
|
|
|
|
2018-12-01 01:20:22 +00:00
|
|
|
def add_country_holidays(self, country_name):
|
|
|
|
|
"""Add in built-in holidays for the specified country.
|
|
|
|
|
|
|
|
|
|
These holidays will be included in addition to any specified on model
|
|
|
|
|
initialization.
|
|
|
|
|
|
|
|
|
|
Holidays will be calculated for arbitrary date ranges in the history
|
|
|
|
|
and future. See the online documentation for the list of countries with
|
|
|
|
|
built-in holidays.
|
|
|
|
|
|
|
|
|
|
Built-in country holidays can only be set for a single country.
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
country_name: Name of the country, like 'UnitedStates' or 'US'
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
|
|
|
|
The prophet object.
|
|
|
|
|
"""
|
|
|
|
|
if self.history is not None:
|
|
|
|
|
raise Exception(
|
|
|
|
|
"Country holidays must be added prior to model fitting."
|
|
|
|
|
)
|
|
|
|
|
# Validate names.
|
|
|
|
|
for name in get_holiday_names(country_name):
|
|
|
|
|
# Allow merging with existing holidays
|
|
|
|
|
self.validate_column_name(name, check_holidays=False)
|
|
|
|
|
# Set the holidays.
|
|
|
|
|
if self.country_holidays is not None:
|
|
|
|
|
logger.warning(
|
2019-11-19 16:26:12 +00:00
|
|
|
f'Changing country holidays from {self.country_holidays} to '
|
|
|
|
|
'{country_name}.'
|
2018-12-01 01:20:22 +00:00
|
|
|
)
|
|
|
|
|
self.country_holidays = country_name
|
|
|
|
|
return self
|
|
|
|
|
|
2017-02-22 23:59:43 +00:00
|
|
|
def make_all_seasonality_features(self, df):
|
2017-03-23 15:27:44 +00:00
|
|
|
"""Dataframe with seasonality features.
|
|
|
|
|
|
2017-07-21 14:05:16 +00:00
|
|
|
Includes seasonality features, holiday features, and added regressors.
|
|
|
|
|
|
2017-03-23 15:27:44 +00:00
|
|
|
Parameters
|
|
|
|
|
----------
|
2017-07-21 14:05:16 +00:00
|
|
|
df: pd.DataFrame with dates for computing seasonality features and any
|
|
|
|
|
added regressors.
|
2017-03-23 15:27:44 +00:00
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
2017-07-21 14:05:16 +00:00
|
|
|
pd.DataFrame with regression features.
|
|
|
|
|
list of prior scales for each column of the features dataframe.
|
2018-05-08 00:09:02 +00:00
|
|
|
Dataframe with indicators for which regression components correspond to
|
|
|
|
|
which columns.
|
2018-05-09 19:25:29 +00:00
|
|
|
Dictionary with keys 'additive' and 'multiplicative' listing the
|
|
|
|
|
component names for each mode of seasonality.
|
2017-03-23 15:27:44 +00:00
|
|
|
"""
|
2017-07-21 14:05:16 +00:00
|
|
|
seasonal_features = []
|
|
|
|
|
prior_scales = []
|
2018-05-09 19:25:29 +00:00
|
|
|
modes = {'additive': [], 'multiplicative': []}
|
2017-07-21 14:05:16 +00:00
|
|
|
|
|
|
|
|
# Seasonality features
|
2017-08-27 21:32:39 +00:00
|
|
|
for name, props in self.seasonalities.items():
|
2017-07-21 14:05:16 +00:00
|
|
|
features = self.make_seasonality_features(
|
2017-07-04 15:12:08 +00:00
|
|
|
df['ds'],
|
2017-08-27 21:32:39 +00:00
|
|
|
props['period'],
|
|
|
|
|
props['fourier_order'],
|
2017-07-04 18:06:03 +00:00
|
|
|
name,
|
2017-07-21 14:05:16 +00:00
|
|
|
)
|
2019-03-18 17:04:35 +00:00
|
|
|
if props['condition_name'] is not None:
|
|
|
|
|
features[~df[props['condition_name']]] = 0
|
2017-07-21 14:05:16 +00:00
|
|
|
seasonal_features.append(features)
|
|
|
|
|
prior_scales.extend(
|
2017-08-27 21:32:39 +00:00
|
|
|
[props['prior_scale']] * features.shape[1])
|
2018-05-09 19:25:29 +00:00
|
|
|
modes[props['mode']].append(name)
|
2017-07-04 15:12:08 +00:00
|
|
|
|
2017-07-21 14:05:16 +00:00
|
|
|
# Holiday features
|
2018-12-01 01:20:22 +00:00
|
|
|
holidays = self.construct_holiday_dataframe(df['ds'])
|
|
|
|
|
if len(holidays) > 0:
|
2018-05-09 19:25:29 +00:00
|
|
|
features, holiday_priors, holiday_names = (
|
2018-12-01 01:20:22 +00:00
|
|
|
self.make_holiday_features(df['ds'], holidays)
|
2018-05-09 19:25:29 +00:00
|
|
|
)
|
2017-07-21 14:05:16 +00:00
|
|
|
seasonal_features.append(features)
|
2017-08-27 06:29:10 +00:00
|
|
|
prior_scales.extend(holiday_priors)
|
2018-05-09 19:25:29 +00:00
|
|
|
modes[self.seasonality_mode].extend(holiday_names)
|
2017-07-21 14:05:16 +00:00
|
|
|
|
|
|
|
|
# Additional regressors
|
|
|
|
|
for name, props in self.extra_regressors.items():
|
|
|
|
|
seasonal_features.append(pd.DataFrame(df[name]))
|
|
|
|
|
prior_scales.append(props['prior_scale'])
|
2018-05-09 19:25:29 +00:00
|
|
|
modes[props['mode']].append(name)
|
2017-07-21 14:05:16 +00:00
|
|
|
|
2018-05-09 19:25:29 +00:00
|
|
|
# Dummy to prevent empty X
|
2017-07-21 14:05:16 +00:00
|
|
|
if len(seasonal_features) == 0:
|
|
|
|
|
seasonal_features.append(
|
|
|
|
|
pd.DataFrame({'zeros': np.zeros(df.shape[0])}))
|
|
|
|
|
prior_scales.append(1.)
|
2018-05-08 00:09:02 +00:00
|
|
|
|
|
|
|
|
seasonal_features = pd.concat(seasonal_features, axis=1)
|
2018-05-09 19:25:29 +00:00
|
|
|
component_cols, modes = self.regressor_column_matrix(
|
|
|
|
|
seasonal_features, modes
|
|
|
|
|
)
|
|
|
|
|
return seasonal_features, prior_scales, component_cols, modes
|
|
|
|
|
|
|
|
|
|
def regressor_column_matrix(self, seasonal_features, modes):
|
|
|
|
|
"""Dataframe indicating which columns of the feature matrix correspond
|
|
|
|
|
to which seasonality/regressor components.
|
|
|
|
|
|
|
|
|
|
Includes combination components, like 'additive_terms'. These
|
|
|
|
|
combination components will be added to the 'modes' input.
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
seasonal_features: Constructed seasonal features dataframe
|
|
|
|
|
modes: Dictionary with keys 'additive' and 'multiplicative' listing the
|
|
|
|
|
component names for each mode of seasonality.
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
|
|
|
|
component_cols: A binary indicator dataframe with columns seasonal
|
|
|
|
|
components and rows columns in seasonal_features. Entry is 1 if
|
|
|
|
|
that columns is used in that component.
|
|
|
|
|
modes: Updated input with combination components.
|
|
|
|
|
"""
|
|
|
|
|
components = pd.DataFrame({
|
|
|
|
|
'col': np.arange(seasonal_features.shape[1]),
|
|
|
|
|
'component': [
|
|
|
|
|
x.split('_delim_')[0] for x in seasonal_features.columns
|
|
|
|
|
],
|
|
|
|
|
})
|
|
|
|
|
# Add total for holidays
|
2018-08-27 20:52:34 +00:00
|
|
|
if self.train_holiday_names is not None:
|
2018-05-09 19:25:29 +00:00
|
|
|
components = self.add_group_component(
|
2018-08-27 20:52:34 +00:00
|
|
|
components, 'holidays', self.train_holiday_names.unique())
|
2018-05-09 19:25:29 +00:00
|
|
|
# Add totals additive and multiplicative components, and regressors
|
|
|
|
|
for mode in ['additive', 'multiplicative']:
|
|
|
|
|
components = self.add_group_component(
|
|
|
|
|
components, mode + '_terms', modes[mode]
|
|
|
|
|
)
|
|
|
|
|
regressors_by_mode = [
|
|
|
|
|
r for r, props in self.extra_regressors.items()
|
|
|
|
|
if props['mode'] == mode
|
|
|
|
|
]
|
|
|
|
|
components = self.add_group_component(
|
|
|
|
|
components, 'extra_regressors_' + mode, regressors_by_mode)
|
|
|
|
|
# Add combination components to modes
|
|
|
|
|
modes[mode].append(mode + '_terms')
|
|
|
|
|
modes[mode].append('extra_regressors_' + mode)
|
2018-05-12 00:54:29 +00:00
|
|
|
# After all of the additive/multiplicative groups have been added,
|
|
|
|
|
modes[self.seasonality_mode].append('holidays')
|
2018-05-09 19:25:29 +00:00
|
|
|
# Convert to a binary matrix
|
|
|
|
|
component_cols = pd.crosstab(
|
|
|
|
|
components['col'], components['component'],
|
2018-05-15 17:07:38 +00:00
|
|
|
).sort_index(level='col')
|
2018-05-09 19:25:29 +00:00
|
|
|
# Add columns for additive and multiplicative terms, if missing
|
|
|
|
|
for name in ['additive_terms', 'multiplicative_terms']:
|
|
|
|
|
if name not in component_cols:
|
|
|
|
|
component_cols[name] = 0
|
|
|
|
|
# Remove the placeholder
|
|
|
|
|
component_cols.drop('zeros', axis=1, inplace=True, errors='ignore')
|
|
|
|
|
# Validation
|
2019-11-19 16:26:12 +00:00
|
|
|
if (max(component_cols['additive_terms']
|
|
|
|
|
+ component_cols['multiplicative_terms']) > 1):
|
2018-05-09 19:25:29 +00:00
|
|
|
raise Exception('A bug occurred in seasonal components.')
|
|
|
|
|
# Compare to the training, if set.
|
|
|
|
|
if self.train_component_cols is not None:
|
|
|
|
|
component_cols = component_cols[self.train_component_cols.columns]
|
|
|
|
|
if not component_cols.equals(self.train_component_cols):
|
|
|
|
|
raise Exception('A bug occurred in constructing regressors.')
|
|
|
|
|
return component_cols, modes
|
|
|
|
|
|
|
|
|
|
def add_group_component(self, components, name, group):
|
|
|
|
|
"""Adds a component with given name that contains all of the components
|
|
|
|
|
in group.
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
components: Dataframe with components.
|
|
|
|
|
name: Name of new group component.
|
|
|
|
|
group: List of components that form the group.
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
|
|
|
|
Dataframe with components.
|
|
|
|
|
"""
|
|
|
|
|
new_comp = components[components['component'].isin(set(group))].copy()
|
2018-05-12 00:54:29 +00:00
|
|
|
group_cols = new_comp['col'].unique()
|
|
|
|
|
if len(group_cols) > 0:
|
2018-05-15 17:07:38 +00:00
|
|
|
new_comp = pd.DataFrame({'col': group_cols, 'component': name})
|
2018-05-12 00:54:29 +00:00
|
|
|
components = components.append(new_comp)
|
2018-05-09 19:25:29 +00:00
|
|
|
return components
|
2017-02-22 23:59:43 +00:00
|
|
|
|
2017-07-04 18:06:03 +00:00
|
|
|
def parse_seasonality_args(self, name, arg, auto_disable, default_order):
|
|
|
|
|
"""Get number of fourier components for built-in seasonalities.
|
2017-07-11 23:51:29 +00:00
|
|
|
|
2017-07-04 18:06:03 +00:00
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
name: string name of the seasonality component.
|
|
|
|
|
arg: 'auto', True, False, or number of fourier components as provided.
|
|
|
|
|
auto_disable: bool if seasonality should be disabled when 'auto'.
|
|
|
|
|
default_order: int default fourier order
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
|
|
|
|
Number of fourier components, or 0 for disabled.
|
|
|
|
|
"""
|
|
|
|
|
if arg == 'auto':
|
|
|
|
|
fourier_order = 0
|
|
|
|
|
if name in self.seasonalities:
|
|
|
|
|
logger.info(
|
2019-11-19 16:26:12 +00:00
|
|
|
f'Found custom seasonality named "{name}", '
|
|
|
|
|
'disabling built-in {name} seasonality.'
|
2017-07-04 18:06:03 +00:00
|
|
|
)
|
|
|
|
|
elif auto_disable:
|
|
|
|
|
logger.info(
|
2019-11-19 16:26:12 +00:00
|
|
|
f'Disabling {name} seasonality. Run prophet with '
|
|
|
|
|
'{name}_seasonality=True to override this.'
|
2017-07-04 18:06:03 +00:00
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
fourier_order = default_order
|
|
|
|
|
elif arg is True:
|
|
|
|
|
fourier_order = default_order
|
|
|
|
|
elif arg is False:
|
|
|
|
|
fourier_order = 0
|
|
|
|
|
else:
|
|
|
|
|
fourier_order = int(arg)
|
|
|
|
|
return fourier_order
|
|
|
|
|
|
2017-04-13 08:25:03 +00:00
|
|
|
def set_auto_seasonalities(self):
|
|
|
|
|
"""Set seasonalities that were left on auto.
|
|
|
|
|
|
|
|
|
|
Turns on yearly seasonality if there is >=2 years of history.
|
|
|
|
|
Turns on weekly seasonality if there is >=2 weeks of history, and the
|
|
|
|
|
spacing between dates in the history is <7 days.
|
2017-07-04 18:06:03 +00:00
|
|
|
Turns on daily seasonality if there is >=2 days of history, and the
|
|
|
|
|
spacing between dates in the history is <1 day.
|
2017-04-13 08:25:03 +00:00
|
|
|
"""
|
|
|
|
|
first = self.history['ds'].min()
|
|
|
|
|
last = self.history['ds'].max()
|
2017-07-04 18:06:03 +00:00
|
|
|
dt = self.history['ds'].diff()
|
2019-05-13 20:51:27 +00:00
|
|
|
min_dt = dt.iloc[dt.values.nonzero()[0]].min()
|
2017-07-04 18:06:03 +00:00
|
|
|
|
|
|
|
|
# Yearly seasonality
|
|
|
|
|
yearly_disable = last - first < pd.Timedelta(days=730)
|
|
|
|
|
fourier_order = self.parse_seasonality_args(
|
|
|
|
|
'yearly', self.yearly_seasonality, yearly_disable, 10)
|
|
|
|
|
if fourier_order > 0:
|
2017-08-27 21:32:39 +00:00
|
|
|
self.seasonalities['yearly'] = {
|
|
|
|
|
'period': 365.25,
|
|
|
|
|
'fourier_order': fourier_order,
|
|
|
|
|
'prior_scale': self.seasonality_prior_scale,
|
2018-05-09 19:25:29 +00:00
|
|
|
'mode': self.seasonality_mode,
|
2019-03-18 17:04:35 +00:00
|
|
|
'condition_name': None
|
2017-08-27 21:32:39 +00:00
|
|
|
}
|
2017-07-04 18:06:03 +00:00
|
|
|
|
|
|
|
|
# Weekly seasonality
|
|
|
|
|
weekly_disable = ((last - first < pd.Timedelta(weeks=2)) or
|
2017-07-11 23:51:29 +00:00
|
|
|
(min_dt >= pd.Timedelta(weeks=1)))
|
2017-07-04 18:06:03 +00:00
|
|
|
fourier_order = self.parse_seasonality_args(
|
|
|
|
|
'weekly', self.weekly_seasonality, weekly_disable, 3)
|
|
|
|
|
if fourier_order > 0:
|
2017-08-27 21:32:39 +00:00
|
|
|
self.seasonalities['weekly'] = {
|
|
|
|
|
'period': 7,
|
|
|
|
|
'fourier_order': fourier_order,
|
|
|
|
|
'prior_scale': self.seasonality_prior_scale,
|
2018-05-09 19:25:29 +00:00
|
|
|
'mode': self.seasonality_mode,
|
2019-03-18 17:04:35 +00:00
|
|
|
'condition_name': None
|
2017-08-27 21:32:39 +00:00
|
|
|
}
|
2017-07-04 18:06:03 +00:00
|
|
|
|
|
|
|
|
# Daily seasonality
|
|
|
|
|
daily_disable = ((last - first < pd.Timedelta(days=2)) or
|
2017-07-11 23:51:29 +00:00
|
|
|
(min_dt >= pd.Timedelta(days=1)))
|
2017-07-04 18:06:03 +00:00
|
|
|
fourier_order = self.parse_seasonality_args(
|
|
|
|
|
'daily', self.daily_seasonality, daily_disable, 4)
|
|
|
|
|
if fourier_order > 0:
|
2017-08-27 21:32:39 +00:00
|
|
|
self.seasonalities['daily'] = {
|
|
|
|
|
'period': 1,
|
|
|
|
|
'fourier_order': fourier_order,
|
|
|
|
|
'prior_scale': self.seasonality_prior_scale,
|
2018-05-09 19:25:29 +00:00
|
|
|
'mode': self.seasonality_mode,
|
2019-03-18 17:04:35 +00:00
|
|
|
'condition_name': None
|
2017-08-27 21:32:39 +00:00
|
|
|
}
|
2017-04-13 08:25:03 +00:00
|
|
|
|
2017-02-22 23:59:43 +00:00
|
|
|
@staticmethod
|
|
|
|
|
def linear_growth_init(df):
|
2017-03-23 15:27:44 +00:00
|
|
|
"""Initialize linear growth.
|
|
|
|
|
|
|
|
|
|
Provides a strong initialization for linear growth by calculating the
|
|
|
|
|
growth and offset parameters that pass the function through the first
|
|
|
|
|
and last points in the time series.
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
df: pd.DataFrame with columns ds (date), y_scaled (scaled time series),
|
|
|
|
|
and t (scaled time).
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
|
|
|
|
A tuple (k, m) with the rate (k) and offset (m) of the linear growth
|
|
|
|
|
function.
|
|
|
|
|
"""
|
2017-02-22 23:59:43 +00:00
|
|
|
i0, i1 = df['ds'].idxmin(), df['ds'].idxmax()
|
2017-08-20 04:26:59 +00:00
|
|
|
T = df['t'].iloc[i1] - df['t'].iloc[i0]
|
|
|
|
|
k = (df['y_scaled'].iloc[i1] - df['y_scaled'].iloc[i0]) / T
|
|
|
|
|
m = df['y_scaled'].iloc[i0] - k * df['t'].iloc[i0]
|
2017-02-22 23:59:43 +00:00
|
|
|
return (k, m)
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def logistic_growth_init(df):
|
2017-03-23 15:27:44 +00:00
|
|
|
"""Initialize logistic growth.
|
|
|
|
|
|
|
|
|
|
Provides a strong initialization for logistic growth by calculating the
|
|
|
|
|
growth and offset parameters that pass the function through the first
|
|
|
|
|
and last points in the time series.
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
df: pd.DataFrame with columns ds (date), cap_scaled (scaled capacity),
|
|
|
|
|
y_scaled (scaled time series), and t (scaled time).
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
|
|
|
|
A tuple (k, m) with the rate (k) and offset (m) of the logistic growth
|
|
|
|
|
function.
|
|
|
|
|
"""
|
2017-02-22 23:59:43 +00:00
|
|
|
i0, i1 = df['ds'].idxmin(), df['ds'].idxmax()
|
2017-08-20 04:26:59 +00:00
|
|
|
T = df['t'].iloc[i1] - df['t'].iloc[i0]
|
2017-02-22 23:59:43 +00:00
|
|
|
|
2017-08-28 16:06:00 +00:00
|
|
|
# Force valid values, in case y > cap or y < 0
|
|
|
|
|
C0 = df['cap_scaled'].iloc[i0]
|
|
|
|
|
C1 = df['cap_scaled'].iloc[i1]
|
|
|
|
|
y0 = max(0.01 * C0, min(0.99 * C0, df['y_scaled'].iloc[i0]))
|
|
|
|
|
y1 = max(0.01 * C1, min(0.99 * C1, df['y_scaled'].iloc[i1]))
|
|
|
|
|
|
|
|
|
|
r0 = C0 / y0
|
|
|
|
|
r1 = C1 / y1
|
2017-02-22 23:59:43 +00:00
|
|
|
|
|
|
|
|
if abs(r0 - r1) <= 0.01:
|
|
|
|
|
r0 = 1.05 * r0
|
|
|
|
|
|
|
|
|
|
L0 = np.log(r0 - 1)
|
|
|
|
|
L1 = np.log(r1 - 1)
|
|
|
|
|
|
|
|
|
|
# Initialize the offset
|
|
|
|
|
m = L0 * T / (L0 - L1)
|
|
|
|
|
# And the rate
|
2017-07-17 18:13:22 +00:00
|
|
|
k = (L0 - L1) / T
|
2017-02-22 23:59:43 +00:00
|
|
|
return (k, m)
|
|
|
|
|
|
2017-02-26 11:24:22 +00:00
|
|
|
def fit(self, df, **kwargs):
|
2017-03-23 15:27:44 +00:00
|
|
|
"""Fit the Prophet model.
|
2017-02-26 11:24:22 +00:00
|
|
|
|
2017-04-04 01:24:54 +00:00
|
|
|
This sets self.params to contain the fitted model parameters. It is a
|
|
|
|
|
dictionary parameter names as keys and the following items:
|
|
|
|
|
k (Mx1 array): M posterior samples of the initial slope.
|
|
|
|
|
m (Mx1 array): The initial intercept.
|
|
|
|
|
delta (MxN array): The slope change at each of N changepoints.
|
|
|
|
|
beta (MxK matrix): Coefficients for K seasonality features.
|
|
|
|
|
sigma_obs (Mx1 array): Noise level.
|
|
|
|
|
Note that M=1 if MAP estimation.
|
|
|
|
|
|
2017-02-26 11:24:22 +00:00
|
|
|
Parameters
|
|
|
|
|
----------
|
2017-03-23 15:27:44 +00:00
|
|
|
df: pd.DataFrame containing the history. Must have columns ds (date
|
|
|
|
|
type) and y, the time series. If self.growth is 'logistic', then
|
|
|
|
|
df must also have a column cap that specifies the capacity at
|
|
|
|
|
each ds.
|
|
|
|
|
kwargs: Additional arguments passed to the optimizing or sampling
|
|
|
|
|
functions in Stan.
|
2017-02-26 11:24:22 +00:00
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
|
|
|
|
The fitted Prophet object.
|
|
|
|
|
"""
|
2017-04-05 17:44:21 +00:00
|
|
|
if self.history is not None:
|
|
|
|
|
raise Exception('Prophet object can only be fit once. '
|
|
|
|
|
'Instantiate a new object.')
|
2018-05-29 23:17:59 +00:00
|
|
|
if ('ds' not in df) or ('y' not in df):
|
|
|
|
|
raise ValueError(
|
2019-11-19 16:26:12 +00:00
|
|
|
'Dataframe must have columns "ds" and "y" with the dates and '
|
|
|
|
|
'values respectively.'
|
2018-05-29 23:17:59 +00:00
|
|
|
)
|
2017-02-22 23:59:43 +00:00
|
|
|
history = df[df['y'].notnull()].copy()
|
2017-09-01 22:50:19 +00:00
|
|
|
if history.shape[0] < 2:
|
|
|
|
|
raise ValueError('Dataframe has less than 2 non-NaN rows.')
|
2017-03-23 13:47:29 +00:00
|
|
|
self.history_dates = pd.to_datetime(df['ds']).sort_values()
|
2017-02-22 23:59:43 +00:00
|
|
|
|
|
|
|
|
history = self.setup_dataframe(history, initialize_scales=True)
|
|
|
|
|
self.history = history
|
2017-04-13 08:25:03 +00:00
|
|
|
self.set_auto_seasonalities()
|
2018-05-12 00:54:29 +00:00
|
|
|
seasonal_features, prior_scales, component_cols, modes = (
|
2017-07-21 14:05:16 +00:00
|
|
|
self.make_all_seasonality_features(history))
|
2018-05-08 00:09:02 +00:00
|
|
|
self.train_component_cols = component_cols
|
2018-05-12 00:54:29 +00:00
|
|
|
self.component_modes = modes
|
2017-02-22 23:59:43 +00:00
|
|
|
|
|
|
|
|
self.set_changepoints()
|
|
|
|
|
|
|
|
|
|
dat = {
|
|
|
|
|
'T': history.shape[0],
|
|
|
|
|
'K': seasonal_features.shape[1],
|
2017-02-28 08:08:37 +00:00
|
|
|
'S': len(self.changepoints_t),
|
2017-02-22 23:59:43 +00:00
|
|
|
'y': history['y_scaled'],
|
|
|
|
|
't': history['t'],
|
2017-02-28 08:08:37 +00:00
|
|
|
't_change': self.changepoints_t,
|
2017-02-22 23:59:43 +00:00
|
|
|
'X': seasonal_features,
|
2017-07-21 14:05:16 +00:00
|
|
|
'sigmas': prior_scales,
|
2017-02-22 23:59:43 +00:00
|
|
|
'tau': self.changepoint_prior_scale,
|
2018-05-04 23:04:29 +00:00
|
|
|
'trend_indicator': int(self.growth == 'logistic'),
|
2018-05-09 19:25:29 +00:00
|
|
|
's_a': component_cols['additive_terms'],
|
|
|
|
|
's_m': component_cols['multiplicative_terms'],
|
2017-02-22 23:59:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if self.growth == 'linear':
|
2018-05-04 23:04:29 +00:00
|
|
|
dat['cap'] = np.zeros(self.history.shape[0])
|
2017-02-22 23:59:43 +00:00
|
|
|
kinit = self.linear_growth_init(history)
|
|
|
|
|
else:
|
|
|
|
|
dat['cap'] = history['cap_scaled']
|
|
|
|
|
kinit = self.logistic_growth_init(history)
|
2017-04-14 06:23:44 +00:00
|
|
|
|
2018-05-04 23:04:29 +00:00
|
|
|
model = prophet_stan_model
|
2017-02-22 23:59:43 +00:00
|
|
|
|
|
|
|
|
def stan_init():
|
|
|
|
|
return {
|
|
|
|
|
'k': kinit[0],
|
|
|
|
|
'm': kinit[1],
|
2017-02-28 08:08:37 +00:00
|
|
|
'delta': np.zeros(len(self.changepoints_t)),
|
2017-02-22 23:59:43 +00:00
|
|
|
'beta': np.zeros(seasonal_features.shape[1]),
|
|
|
|
|
'sigma_obs': 1,
|
|
|
|
|
}
|
|
|
|
|
|
2019-11-19 17:35:49 +00:00
|
|
|
if (history['y'].min() == history['y'].max()
|
|
|
|
|
and self.growth == 'linear'):
|
2017-08-19 21:03:00 +00:00
|
|
|
# Nothing to fit.
|
|
|
|
|
self.params = stan_init()
|
2017-09-26 00:40:36 +00:00
|
|
|
self.params['sigma_obs'] = 1e-9
|
2017-08-19 21:03:00 +00:00
|
|
|
for par in self.params:
|
|
|
|
|
self.params[par] = np.array([self.params[par]])
|
|
|
|
|
elif self.mcmc_samples > 0:
|
2018-12-04 03:05:47 +00:00
|
|
|
args = dict(
|
|
|
|
|
data=dat,
|
2017-02-22 23:59:43 +00:00
|
|
|
init=stan_init,
|
|
|
|
|
iter=self.mcmc_samples,
|
|
|
|
|
)
|
2018-12-04 03:05:47 +00:00
|
|
|
args.update(kwargs)
|
2019-09-29 20:14:40 +00:00
|
|
|
self.stan_fit = model.sampling(**args)
|
|
|
|
|
for par in self.stan_fit.model_pars:
|
|
|
|
|
self.params[par] = self.stan_fit[par]
|
2018-12-03 23:22:19 +00:00
|
|
|
# Shape vector parameters
|
2019-11-19 16:26:12 +00:00
|
|
|
if (par in ['delta', 'beta']
|
|
|
|
|
and len(self.params[par].shape) < 2):
|
2018-12-03 23:22:19 +00:00
|
|
|
self.params[par] = self.params[par].reshape((-1, 1))
|
2017-02-22 23:59:43 +00:00
|
|
|
else:
|
2018-12-04 03:05:47 +00:00
|
|
|
args = dict(
|
|
|
|
|
data=dat,
|
|
|
|
|
init=stan_init,
|
2019-05-07 18:47:36 +00:00
|
|
|
algorithm='Newton' if dat['T'] < 100 else 'LBFGS',
|
2018-12-04 03:05:47 +00:00
|
|
|
iter=1e4,
|
|
|
|
|
)
|
|
|
|
|
args.update(kwargs)
|
2017-07-04 07:06:53 +00:00
|
|
|
try:
|
2019-09-29 20:24:38 +00:00
|
|
|
self.stan_fit = model.optimizing(**args)
|
2017-07-04 07:06:53 +00:00
|
|
|
except RuntimeError:
|
2019-05-06 16:49:29 +00:00
|
|
|
logger.warning(
|
2019-11-19 16:26:12 +00:00
|
|
|
'Optimization terminated abnormally. '
|
|
|
|
|
'Falling back to Newton.'
|
2019-05-06 16:49:29 +00:00
|
|
|
)
|
2019-03-18 16:41:45 +00:00
|
|
|
args['algorithm'] = 'Newton'
|
2019-09-29 20:24:38 +00:00
|
|
|
self.stan_fit = model.optimizing(**args)
|
2019-03-18 16:41:45 +00:00
|
|
|
|
2019-09-29 20:24:38 +00:00
|
|
|
for par in self.stan_fit:
|
|
|
|
|
self.params[par] = self.stan_fit[par].reshape((1, -1))
|
2017-02-22 23:59:43 +00:00
|
|
|
|
|
|
|
|
# If no changepoints were requested, replace delta with 0s
|
|
|
|
|
if len(self.changepoints) == 0:
|
|
|
|
|
# Fold delta into the base rate k
|
2019-11-19 16:26:12 +00:00
|
|
|
self.params['k'] = (self.params['k']
|
|
|
|
|
+ self.params['delta'].reshape(-1))
|
|
|
|
|
self.params['delta'] = (np.zeros(self.params['delta'].shape)
|
|
|
|
|
.reshape((-1, 1)))
|
2017-02-22 23:59:43 +00:00
|
|
|
|
|
|
|
|
return self
|
|
|
|
|
|
|
|
|
|
def predict(self, df=None):
|
2017-03-23 15:27:44 +00:00
|
|
|
"""Predict using the prophet model.
|
2017-02-22 23:59:43 +00:00
|
|
|
|
2017-03-23 15:27:44 +00:00
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
df: pd.DataFrame with dates for predictions (column ds), and capacity
|
|
|
|
|
(column cap) if logistic growth. If not provided, predictions are
|
|
|
|
|
made on the history.
|
2017-02-22 23:59:43 +00:00
|
|
|
|
2017-03-23 15:27:44 +00:00
|
|
|
Returns
|
|
|
|
|
-------
|
|
|
|
|
A pd.DataFrame with the forecast components.
|
2017-02-22 23:59:43 +00:00
|
|
|
"""
|
2019-05-02 21:53:57 +00:00
|
|
|
if self.history is None:
|
2019-11-19 16:26:12 +00:00
|
|
|
raise Exception('Model has not been fit.')
|
2019-05-02 21:53:57 +00:00
|
|
|
|
2017-02-22 23:59:43 +00:00
|
|
|
if df is None:
|
2017-02-28 08:08:37 +00:00
|
|
|
df = self.history.copy()
|
2017-02-22 23:59:43 +00:00
|
|
|
else:
|
2017-09-01 22:50:19 +00:00
|
|
|
if df.shape[0] == 0:
|
|
|
|
|
raise ValueError('Dataframe has no rows.')
|
2017-07-21 14:05:16 +00:00
|
|
|
df = self.setup_dataframe(df.copy())
|
2017-02-22 23:59:43 +00:00
|
|
|
|
|
|
|
|
df['trend'] = self.predict_trend(df)
|
|
|
|
|
seasonal_components = self.predict_seasonal_components(df)
|
2019-07-23 08:59:28 +00:00
|
|
|
if self.uncertainty_samples:
|
|
|
|
|
intervals = self.predict_uncertainty(df)
|
|
|
|
|
else:
|
|
|
|
|
intervals = None
|
2017-02-22 23:59:43 +00:00
|
|
|
|
2017-08-28 16:06:00 +00:00
|
|
|
# Drop columns except ds, cap, floor, and trend
|
|
|
|
|
cols = ['ds', 'trend']
|
2017-07-21 14:05:16 +00:00
|
|
|
if 'cap' in df:
|
2017-08-28 16:06:00 +00:00
|
|
|
cols.append('cap')
|
|
|
|
|
if self.logistic_floor:
|
|
|
|
|
cols.append('floor')
|
2017-07-21 14:05:16 +00:00
|
|
|
# Add in forecast components
|
|
|
|
|
df2 = pd.concat((df[cols], intervals, seasonal_components), axis=1)
|
2018-05-09 19:25:29 +00:00
|
|
|
df2['yhat'] = (
|
2018-08-27 20:52:34 +00:00
|
|
|
df2['trend'] * (1 + df2['multiplicative_terms'])
|
|
|
|
|
+ df2['additive_terms']
|
2018-05-09 19:25:29 +00:00
|
|
|
)
|
2017-02-22 23:59:43 +00:00
|
|
|
return df2
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def piecewise_linear(t, deltas, k, m, changepoint_ts):
|
2017-03-23 15:27:44 +00:00
|
|
|
"""Evaluate the piecewise linear function.
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
t: np.array of times on which the function is evaluated.
|
|
|
|
|
deltas: np.array of rate changes at each changepoint.
|
|
|
|
|
k: Float initial rate.
|
|
|
|
|
m: Float initial offset.
|
|
|
|
|
changepoint_ts: np.array of changepoint times.
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
|
|
|
|
Vector y(t).
|
|
|
|
|
"""
|
2017-02-22 23:59:43 +00:00
|
|
|
# Intercept changes
|
|
|
|
|
gammas = -changepoint_ts * deltas
|
|
|
|
|
# Get cumulative slope and intercept at each t
|
|
|
|
|
k_t = k * np.ones_like(t)
|
|
|
|
|
m_t = m * np.ones_like(t)
|
|
|
|
|
for s, t_s in enumerate(changepoint_ts):
|
|
|
|
|
indx = t >= t_s
|
|
|
|
|
k_t[indx] += deltas[s]
|
|
|
|
|
m_t[indx] += gammas[s]
|
|
|
|
|
return k_t * t + m_t
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def piecewise_logistic(t, cap, deltas, k, m, changepoint_ts):
|
2017-03-23 15:27:44 +00:00
|
|
|
"""Evaluate the piecewise logistic function.
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
t: np.array of times on which the function is evaluated.
|
|
|
|
|
cap: np.array of capacities at each t.
|
|
|
|
|
deltas: np.array of rate changes at each changepoint.
|
|
|
|
|
k: Float initial rate.
|
|
|
|
|
m: Float initial offset.
|
|
|
|
|
changepoint_ts: np.array of changepoint times.
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
|
|
|
|
Vector y(t).
|
|
|
|
|
"""
|
2017-02-22 23:59:43 +00:00
|
|
|
# Compute offset changes
|
|
|
|
|
k_cum = np.concatenate((np.atleast_1d(k), np.cumsum(deltas) + k))
|
|
|
|
|
gammas = np.zeros(len(changepoint_ts))
|
|
|
|
|
for i, t_s in enumerate(changepoint_ts):
|
|
|
|
|
gammas[i] = (
|
2018-08-27 20:52:34 +00:00
|
|
|
(t_s - m - np.sum(gammas))
|
|
|
|
|
* (1 - k_cum[i] / k_cum[i + 1]) # noqa W503
|
2017-02-22 23:59:43 +00:00
|
|
|
)
|
|
|
|
|
# Get cumulative rate and offset at each t
|
|
|
|
|
k_t = k * np.ones_like(t)
|
|
|
|
|
m_t = m * np.ones_like(t)
|
|
|
|
|
for s, t_s in enumerate(changepoint_ts):
|
|
|
|
|
indx = t >= t_s
|
|
|
|
|
k_t[indx] += deltas[s]
|
|
|
|
|
m_t[indx] += gammas[s]
|
|
|
|
|
return cap / (1 + np.exp(-k_t * (t - m_t)))
|
|
|
|
|
|
|
|
|
|
def predict_trend(self, df):
|
2017-03-23 15:27:44 +00:00
|
|
|
"""Predict trend using the prophet model.
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
df: Prediction dataframe.
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
|
|
|
|
Vector with trend on prediction dates.
|
|
|
|
|
"""
|
2017-02-22 23:59:43 +00:00
|
|
|
k = np.nanmean(self.params['k'])
|
|
|
|
|
m = np.nanmean(self.params['m'])
|
|
|
|
|
deltas = np.nanmean(self.params['delta'], axis=0)
|
|
|
|
|
|
|
|
|
|
t = np.array(df['t'])
|
|
|
|
|
if self.growth == 'linear':
|
2017-02-28 08:08:37 +00:00
|
|
|
trend = self.piecewise_linear(t, deltas, k, m, self.changepoints_t)
|
2017-02-22 23:59:43 +00:00
|
|
|
else:
|
|
|
|
|
cap = df['cap_scaled']
|
2017-02-28 08:08:37 +00:00
|
|
|
trend = self.piecewise_logistic(
|
|
|
|
|
t, cap, deltas, k, m, self.changepoints_t)
|
2017-02-22 23:59:43 +00:00
|
|
|
|
2017-08-28 16:06:00 +00:00
|
|
|
return trend * self.y_scale + df['floor']
|
2017-02-22 23:59:43 +00:00
|
|
|
|
|
|
|
|
def predict_seasonal_components(self, df):
|
2017-07-21 14:05:16 +00:00
|
|
|
"""Predict seasonality components, holidays, and added regressors.
|
2017-03-23 15:27:44 +00:00
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
df: Prediction dataframe.
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
|
|
|
|
Dataframe with seasonal components.
|
|
|
|
|
"""
|
2018-05-12 00:54:29 +00:00
|
|
|
seasonal_features, _, component_cols, _ = (
|
2018-05-08 00:09:02 +00:00
|
|
|
self.make_all_seasonality_features(df)
|
|
|
|
|
)
|
2019-07-23 08:59:28 +00:00
|
|
|
if self.uncertainty_samples:
|
|
|
|
|
lower_p = 100 * (1.0 - self.interval_width) / 2
|
|
|
|
|
upper_p = 100 * (1.0 + self.interval_width) / 2
|
2017-02-22 23:59:43 +00:00
|
|
|
|
2018-05-25 22:44:05 +00:00
|
|
|
X = seasonal_features.values
|
2018-05-08 00:09:02 +00:00
|
|
|
data = {}
|
|
|
|
|
for component in component_cols.columns:
|
|
|
|
|
beta_c = self.params['beta'] * component_cols[component].values
|
|
|
|
|
|
2018-05-09 19:25:29 +00:00
|
|
|
comp = np.matmul(X, beta_c.transpose())
|
2018-05-12 00:54:29 +00:00
|
|
|
if component in self.component_modes['additive']:
|
2018-08-27 20:52:34 +00:00
|
|
|
comp *= self.y_scale
|
2018-05-08 00:09:02 +00:00
|
|
|
data[component] = np.nanmean(comp, axis=1)
|
2019-07-23 08:59:28 +00:00
|
|
|
if self.uncertainty_samples:
|
|
|
|
|
data[component + '_lower'] = np.nanpercentile(
|
|
|
|
|
comp, lower_p, axis=1,
|
|
|
|
|
)
|
|
|
|
|
data[component + '_upper'] = np.nanpercentile(
|
|
|
|
|
comp, upper_p, axis=1,
|
|
|
|
|
)
|
2018-05-08 00:09:02 +00:00
|
|
|
return pd.DataFrame(data)
|
|
|
|
|
|
2017-07-05 01:27:57 +00:00
|
|
|
def sample_posterior_predictive(self, df):
|
|
|
|
|
"""Prophet posterior predictive samples.
|
2017-03-23 15:27:44 +00:00
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
df: Prediction dataframe.
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
2018-05-09 19:25:29 +00:00
|
|
|
Dictionary with posterior predictive samples for the forecast yhat and
|
|
|
|
|
for the trend component.
|
2017-03-23 15:27:44 +00:00
|
|
|
"""
|
2017-02-22 23:59:43 +00:00
|
|
|
n_iterations = self.params['k'].shape[0]
|
|
|
|
|
samp_per_iter = max(1, int(np.ceil(
|
|
|
|
|
self.uncertainty_samples / float(n_iterations)
|
|
|
|
|
)))
|
|
|
|
|
|
|
|
|
|
# Generate seasonality features once so we can re-use them.
|
2018-05-09 19:25:29 +00:00
|
|
|
seasonal_features, _, component_cols, _ = (
|
|
|
|
|
self.make_all_seasonality_features(df)
|
|
|
|
|
)
|
2017-02-22 23:59:43 +00:00
|
|
|
|
2018-05-09 19:25:29 +00:00
|
|
|
sim_values = {'yhat': [], 'trend': []}
|
2017-02-22 23:59:43 +00:00
|
|
|
for i in range(n_iterations):
|
2017-04-06 18:47:03 +00:00
|
|
|
for _j in range(samp_per_iter):
|
2018-05-09 19:25:29 +00:00
|
|
|
sim = self.sample_model(
|
|
|
|
|
df=df,
|
|
|
|
|
seasonal_features=seasonal_features,
|
|
|
|
|
iteration=i,
|
|
|
|
|
s_a=component_cols['additive_terms'],
|
|
|
|
|
s_m=component_cols['multiplicative_terms'],
|
|
|
|
|
)
|
2017-02-22 23:59:43 +00:00
|
|
|
for key in sim_values:
|
|
|
|
|
sim_values[key].append(sim[key])
|
2017-07-05 01:27:57 +00:00
|
|
|
for k, v in sim_values.items():
|
|
|
|
|
sim_values[k] = np.column_stack(v)
|
|
|
|
|
return sim_values
|
|
|
|
|
|
|
|
|
|
def predictive_samples(self, df):
|
|
|
|
|
"""Sample from the posterior predictive distribution.
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
df: Dataframe with dates for predictions (column ds), and capacity
|
|
|
|
|
(column cap) if logistic growth.
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
2018-05-09 19:25:29 +00:00
|
|
|
Dictionary with keys "trend" and "yhat" containing
|
|
|
|
|
posterior predictive samples for that component.
|
2017-07-05 01:27:57 +00:00
|
|
|
"""
|
2017-07-21 14:05:16 +00:00
|
|
|
df = self.setup_dataframe(df.copy())
|
2017-07-05 01:27:57 +00:00
|
|
|
sim_values = self.sample_posterior_predictive(df)
|
|
|
|
|
return sim_values
|
|
|
|
|
|
|
|
|
|
def predict_uncertainty(self, df):
|
2017-07-21 14:05:16 +00:00
|
|
|
"""Prediction intervals for yhat and trend.
|
2017-07-05 01:27:57 +00:00
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
df: Prediction dataframe.
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
|
|
|
|
Dataframe with uncertainty intervals.
|
|
|
|
|
"""
|
|
|
|
|
sim_values = self.sample_posterior_predictive(df)
|
2017-02-22 23:59:43 +00:00
|
|
|
|
|
|
|
|
lower_p = 100 * (1.0 - self.interval_width) / 2
|
|
|
|
|
upper_p = 100 * (1.0 + self.interval_width) / 2
|
|
|
|
|
|
|
|
|
|
series = {}
|
2017-07-21 14:05:16 +00:00
|
|
|
for key in ['yhat', 'trend']:
|
|
|
|
|
series['{}_lower'.format(key)] = np.nanpercentile(
|
|
|
|
|
sim_values[key], lower_p, axis=1)
|
|
|
|
|
series['{}_upper'.format(key)] = np.nanpercentile(
|
|
|
|
|
sim_values[key], upper_p, axis=1)
|
2017-02-22 23:59:43 +00:00
|
|
|
|
|
|
|
|
return pd.DataFrame(series)
|
|
|
|
|
|
2018-05-09 19:25:29 +00:00
|
|
|
def sample_model(self, df, seasonal_features, iteration, s_a, s_m):
|
2017-03-23 15:27:44 +00:00
|
|
|
"""Simulate observations from the extrapolated generative model.
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
df: Prediction dataframe.
|
|
|
|
|
seasonal_features: pd.DataFrame of seasonal features.
|
|
|
|
|
iteration: Int sampling iteration to use parameters from.
|
2018-05-15 17:07:38 +00:00
|
|
|
s_a: Indicator vector for additive components
|
|
|
|
|
s_m: Indicator vector for multiplicative components
|
2017-03-23 15:27:44 +00:00
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
2018-05-09 19:25:29 +00:00
|
|
|
Dataframe with trend and yhat, each like df['t'].
|
2017-03-23 15:27:44 +00:00
|
|
|
"""
|
2017-02-22 23:59:43 +00:00
|
|
|
trend = self.sample_predictive_trend(df, iteration)
|
|
|
|
|
|
|
|
|
|
beta = self.params['beta'][iteration]
|
2019-11-19 16:26:12 +00:00
|
|
|
Xb_a = np.matmul(seasonal_features.values,
|
|
|
|
|
beta * s_a.values) * self.y_scale
|
2019-01-15 20:47:54 +00:00
|
|
|
Xb_m = np.matmul(seasonal_features.values, beta * s_m.values)
|
2017-02-22 23:59:43 +00:00
|
|
|
|
|
|
|
|
sigma = self.params['sigma_obs'][iteration]
|
|
|
|
|
noise = np.random.normal(0, sigma, df.shape[0]) * self.y_scale
|
|
|
|
|
|
|
|
|
|
return pd.DataFrame({
|
2018-05-09 19:25:29 +00:00
|
|
|
'yhat': trend * (1 + Xb_m) + Xb_a + noise,
|
|
|
|
|
'trend': trend
|
2017-02-22 23:59:43 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
|
|
def sample_predictive_trend(self, df, iteration):
|
2017-03-23 15:27:44 +00:00
|
|
|
"""Simulate the trend using the extrapolated generative model.
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
df: Prediction dataframe.
|
|
|
|
|
iteration: Int sampling iteration to use parameters from.
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
|
|
|
|
np.array of simulated trend over df['t'].
|
|
|
|
|
"""
|
2017-02-22 23:59:43 +00:00
|
|
|
k = self.params['k'][iteration]
|
|
|
|
|
m = self.params['m'][iteration]
|
|
|
|
|
deltas = self.params['delta'][iteration]
|
|
|
|
|
|
|
|
|
|
t = np.array(df['t'])
|
|
|
|
|
T = t.max()
|
|
|
|
|
|
2018-05-30 04:47:11 +00:00
|
|
|
# New changepoints from a Poisson process with rate S on [1, T]
|
2017-02-22 23:59:43 +00:00
|
|
|
if T > 1:
|
2017-02-28 08:08:37 +00:00
|
|
|
S = len(self.changepoints_t)
|
2018-05-30 04:47:11 +00:00
|
|
|
n_changes = np.random.poisson(S * (T - 1))
|
2017-02-22 23:59:43 +00:00
|
|
|
else:
|
|
|
|
|
n_changes = 0
|
2018-05-30 04:47:11 +00:00
|
|
|
if n_changes > 0:
|
|
|
|
|
changepoint_ts_new = 1 + np.random.rand(n_changes) * (T - 1)
|
|
|
|
|
changepoint_ts_new.sort()
|
|
|
|
|
else:
|
|
|
|
|
changepoint_ts_new = []
|
2017-02-22 23:59:43 +00:00
|
|
|
|
|
|
|
|
# Get the empirical scale of the deltas, plus epsilon to avoid NaNs.
|
|
|
|
|
lambda_ = np.mean(np.abs(deltas)) + 1e-8
|
|
|
|
|
|
|
|
|
|
# Sample deltas
|
|
|
|
|
deltas_new = np.random.laplace(0, lambda_, n_changes)
|
|
|
|
|
|
|
|
|
|
# Prepend the times and deltas from the history
|
2017-02-28 08:08:37 +00:00
|
|
|
changepoint_ts = np.concatenate((self.changepoints_t,
|
|
|
|
|
changepoint_ts_new))
|
2017-02-22 23:59:43 +00:00
|
|
|
deltas = np.concatenate((deltas, deltas_new))
|
|
|
|
|
|
|
|
|
|
if self.growth == 'linear':
|
|
|
|
|
trend = self.piecewise_linear(t, deltas, k, m, changepoint_ts)
|
|
|
|
|
else:
|
|
|
|
|
cap = df['cap_scaled']
|
|
|
|
|
trend = self.piecewise_logistic(t, cap, deltas, k, m,
|
|
|
|
|
changepoint_ts)
|
|
|
|
|
|
2017-08-28 16:06:00 +00:00
|
|
|
return trend * self.y_scale + df['floor']
|
2017-02-22 23:59:43 +00:00
|
|
|
|
|
|
|
|
def make_future_dataframe(self, periods, freq='D', include_history=True):
|
2017-03-23 15:27:44 +00:00
|
|
|
"""Simulate the trend using the extrapolated generative model.
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
periods: Int number of periods to forecast forward.
|
|
|
|
|
freq: Any valid frequency for pd.date_range, such as 'D' or 'M'.
|
|
|
|
|
include_history: Boolean to include the historical dates in the data
|
|
|
|
|
frame for predictions.
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
|
|
|
|
pd.Dataframe that extends forward from the end of self.history for the
|
|
|
|
|
requested number of periods.
|
|
|
|
|
"""
|
2018-04-21 01:48:21 +00:00
|
|
|
if self.history_dates is None:
|
2019-11-19 17:35:49 +00:00
|
|
|
raise Exception('Model has not been fit.')
|
2017-03-23 13:47:29 +00:00
|
|
|
last_date = self.history_dates.max()
|
2017-02-22 23:59:43 +00:00
|
|
|
dates = pd.date_range(
|
|
|
|
|
start=last_date,
|
2017-03-12 12:29:20 +00:00
|
|
|
periods=periods + 1, # An extra in case we include start
|
|
|
|
|
freq=freq)
|
|
|
|
|
dates = dates[dates > last_date] # Drop start if equals last_date
|
|
|
|
|
dates = dates[:periods] # Return correct number of periods
|
2017-02-22 23:59:43 +00:00
|
|
|
|
|
|
|
|
if include_history:
|
2017-03-23 13:47:29 +00:00
|
|
|
dates = np.concatenate((np.array(self.history_dates), dates))
|
2017-02-22 23:59:43 +00:00
|
|
|
|
|
|
|
|
return pd.DataFrame({'ds': dates})
|
|
|
|
|
|
2019-11-19 16:26:12 +00:00
|
|
|
def plot(self, fcst, ax=None, uncertainty=True, plot_cap=True,
|
|
|
|
|
xlabel='ds', ylabel='y', figsize=(10, 6)):
|
2017-02-22 23:59:43 +00:00
|
|
|
"""Plot the Prophet forecast.
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
fcst: pd.DataFrame output of self.predict.
|
2017-04-17 19:45:44 +00:00
|
|
|
ax: Optional matplotlib axes on which to plot.
|
2017-02-22 23:59:43 +00:00
|
|
|
uncertainty: Optional boolean to plot uncertainty intervals.
|
2017-04-11 05:48:43 +00:00
|
|
|
plot_cap: Optional boolean indicating if the capacity should be shown
|
|
|
|
|
in the figure, if available.
|
2017-02-26 13:38:10 +00:00
|
|
|
xlabel: Optional label name on X-axis
|
|
|
|
|
ylabel: Optional label name on Y-axis
|
2019-05-15 12:36:53 +00:00
|
|
|
figsize: Optional tuple width, height in inches.
|
2017-02-22 23:59:43 +00:00
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
2017-04-17 19:45:44 +00:00
|
|
|
A matplotlib figure.
|
2017-02-22 23:59:43 +00:00
|
|
|
"""
|
2018-05-03 17:23:56 +00:00
|
|
|
return plot(
|
|
|
|
|
m=self, fcst=fcst, ax=ax, uncertainty=uncertainty,
|
|
|
|
|
plot_cap=plot_cap, xlabel=xlabel, ylabel=ylabel,
|
2019-05-15 12:36:53 +00:00
|
|
|
figsize=figsize
|
2018-05-03 17:23:56 +00:00
|
|
|
)
|
2017-02-22 23:59:43 +00:00
|
|
|
|
2017-04-13 08:51:17 +00:00
|
|
|
def plot_components(self, fcst, uncertainty=True, plot_cap=True,
|
2019-05-15 12:36:53 +00:00
|
|
|
weekly_start=0, yearly_start=0, figsize=None):
|
2017-02-22 23:59:43 +00:00
|
|
|
"""Plot the Prophet forecast components.
|
|
|
|
|
|
|
|
|
|
Will plot whichever are available of: trend, holidays, weekly
|
|
|
|
|
seasonality, and yearly seasonality.
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
fcst: pd.DataFrame output of self.predict.
|
|
|
|
|
uncertainty: Optional boolean to plot uncertainty intervals.
|
2017-04-11 05:48:43 +00:00
|
|
|
plot_cap: Optional boolean indicating if the capacity should be shown
|
|
|
|
|
in the figure, if available.
|
2017-04-13 08:51:17 +00:00
|
|
|
weekly_start: Optional int specifying the start day of the weekly
|
|
|
|
|
seasonality plot. 0 (default) starts the week on Sunday. 1 shifts
|
|
|
|
|
by 1 day to Monday, and so on.
|
2017-04-13 09:08:34 +00:00
|
|
|
yearly_start: Optional int specifying the start day of the yearly
|
|
|
|
|
seasonality plot. 0 (default) starts the year on Jan 1. 1 shifts
|
|
|
|
|
by 1 day to Jan 2, and so on.
|
2019-05-15 12:36:53 +00:00
|
|
|
figsize: Optional tuple width, height in inches.
|
2017-02-22 23:59:43 +00:00
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
2017-04-17 19:45:44 +00:00
|
|
|
A matplotlib figure.
|
2017-02-22 23:59:43 +00:00
|
|
|
"""
|
2018-05-03 17:23:56 +00:00
|
|
|
return plot_components(
|
|
|
|
|
m=self, fcst=fcst, uncertainty=uncertainty, plot_cap=plot_cap,
|
|
|
|
|
weekly_start=weekly_start, yearly_start=yearly_start,
|
2019-05-15 12:36:53 +00:00
|
|
|
figsize=figsize
|
2018-05-03 17:23:56 +00:00
|
|
|
)
|