mirror of
https://github.com/saymrwulf/prophet.git
synced 2026-05-31 23:27:52 +00:00
Feature Proposal with Python Draft: Masked seasonality (#873)
* Added feature to mask seasonality * Added conditional seasonality tests and fixed plotting it * Fixed diagnostics for conditional seasonality and cleaned up code
This commit is contained in:
parent
eec2f46ac5
commit
8eea5a1ca3
5 changed files with 104 additions and 24 deletions
|
|
@ -110,6 +110,10 @@ def cross_validation(model, horizon, period=None, initial=None):
|
|||
if m.logistic_floor:
|
||||
columns.append('floor')
|
||||
columns.extend(m.extra_regressors.keys())
|
||||
columns.extend([
|
||||
props['condition_name']
|
||||
for props in m.seasonalities.values()
|
||||
if props['condition_name'] is not None])
|
||||
yhat = m.predict(df[index_predicted][columns])
|
||||
# Merge yhat(predicts), y(df, original data) and cutoff
|
||||
predicts.append(pd.concat([
|
||||
|
|
|
|||
|
|
@ -246,6 +246,15 @@ class Prophet(object):
|
|||
df[name] = pd.to_numeric(df[name])
|
||||
if df[name].isnull().any():
|
||||
raise ValueError('Found NaN in column ' + name)
|
||||
for props in self.seasonalities.values():
|
||||
condition_name = props['condition_name']
|
||||
if condition_name is not None:
|
||||
if condition_name not in df:
|
||||
raise ValueError(
|
||||
'Condition "{}" missing from dataframe'.format(condition_name))
|
||||
if not df[condition_name].isin([True, False]).all():
|
||||
raise ValueError('Found non-boolean in column ' + condition_name)
|
||||
df[condition_name] = df[condition_name].astype('bool')
|
||||
|
||||
df = df.sort_values('ds')
|
||||
df.reset_index(inplace=True, drop=True)
|
||||
|
|
@ -581,7 +590,7 @@ class Prophet(object):
|
|||
return self
|
||||
|
||||
def add_seasonality(
|
||||
self, name, period, fourier_order, prior_scale=None, mode=None
|
||||
self, name, period, fourier_order, prior_scale=None, mode=None, condition_name=None
|
||||
):
|
||||
"""Add a seasonal component with specified period, number of Fourier
|
||||
components, and prior scale.
|
||||
|
|
@ -600,6 +609,10 @@ class Prophet(object):
|
|||
Additive means the seasonality will be added to the trend,
|
||||
multiplicative means it will multiply the trend.
|
||||
|
||||
If condition_name is provided, the dataframe passed to `fit` and `predict`
|
||||
should have a column with the specified condition_name containing booleans
|
||||
which decides when to apply seasonality.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name: string name of the seasonality component.
|
||||
|
|
@ -607,6 +620,7 @@ class Prophet(object):
|
|||
fourier_order: int number of Fourier components to use.
|
||||
prior_scale: optional float prior scale for this component.
|
||||
mode: optional 'additive' or 'multiplicative'
|
||||
condition_name: string name of the seasonality condition.
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
|
@ -628,11 +642,14 @@ class Prophet(object):
|
|||
mode = self.seasonality_mode
|
||||
if mode not in ['additive', 'multiplicative']:
|
||||
raise ValueError("mode must be 'additive' or 'multiplicative'")
|
||||
if condition_name is not None:
|
||||
self.validate_column_name(condition_name)
|
||||
self.seasonalities[name] = {
|
||||
'period': period,
|
||||
'fourier_order': fourier_order,
|
||||
'prior_scale': ps,
|
||||
'mode': mode,
|
||||
'condition_name': condition_name,
|
||||
}
|
||||
return self
|
||||
|
||||
|
|
@ -705,6 +722,8 @@ class Prophet(object):
|
|||
props['fourier_order'],
|
||||
name,
|
||||
)
|
||||
if props['condition_name'] is not None:
|
||||
features[~df[props['condition_name']]] = 0
|
||||
seasonal_features.append(features)
|
||||
prior_scales.extend(
|
||||
[props['prior_scale']] * features.shape[1])
|
||||
|
|
@ -889,6 +908,7 @@ class Prophet(object):
|
|||
'fourier_order': fourier_order,
|
||||
'prior_scale': self.seasonality_prior_scale,
|
||||
'mode': self.seasonality_mode,
|
||||
'condition_name': None
|
||||
}
|
||||
|
||||
# Weekly seasonality
|
||||
|
|
@ -902,6 +922,7 @@ class Prophet(object):
|
|||
'fourier_order': fourier_order,
|
||||
'prior_scale': self.seasonality_prior_scale,
|
||||
'mode': self.seasonality_mode,
|
||||
'condition_name': None
|
||||
}
|
||||
|
||||
# Daily seasonality
|
||||
|
|
@ -915,6 +936,7 @@ class Prophet(object):
|
|||
'fourier_order': fourier_order,
|
||||
'prior_scale': self.seasonality_prior_scale,
|
||||
'mode': self.seasonality_mode,
|
||||
'condition_name': None
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -136,14 +136,19 @@ def plot_components(
|
|||
m=m, fcst=fcst, name='trend', ax=ax, uncertainty=uncertainty,
|
||||
plot_cap=plot_cap,
|
||||
)
|
||||
elif plot_name == 'weekly':
|
||||
plot_weekly(
|
||||
m=m, ax=ax, uncertainty=uncertainty, weekly_start=weekly_start,
|
||||
)
|
||||
elif plot_name == 'yearly':
|
||||
plot_yearly(
|
||||
m=m, ax=ax, uncertainty=uncertainty, yearly_start=yearly_start,
|
||||
)
|
||||
elif plot_name in m.seasonalities:
|
||||
if plot_name == 'weekly' or m.seasonalities[plot_name]['period'] == 7:
|
||||
plot_weekly(
|
||||
m=m, name=plot_name, ax=ax, uncertainty=uncertainty, weekly_start=weekly_start
|
||||
)
|
||||
elif plot_name == 'yearly' or m.seasonalities[plot_name]['period'] == 365.25:
|
||||
plot_yearly(
|
||||
m=m, name=plot_name, ax=ax, uncertainty=uncertainty, yearly_start=yearly_start
|
||||
)
|
||||
else:
|
||||
plot_seasonality(
|
||||
m=m, name=plot_name, ax=ax, uncertainty=uncertainty,
|
||||
)
|
||||
elif plot_name in [
|
||||
'holidays',
|
||||
'extra_regressors_additive',
|
||||
|
|
@ -153,10 +158,6 @@ def plot_components(
|
|||
m=m, fcst=fcst, name=plot_name, ax=ax, uncertainty=uncertainty,
|
||||
plot_cap=False,
|
||||
)
|
||||
else:
|
||||
plot_seasonality(
|
||||
m=m, name=plot_name, ax=ax, uncertainty=uncertainty,
|
||||
)
|
||||
if plot_name in m.component_modes['multiplicative']:
|
||||
multiplicative_axes.append(ax)
|
||||
|
||||
|
|
@ -224,12 +225,16 @@ def seasonality_plot_df(m, ds):
|
|||
df_dict = {'ds': ds, 'cap': 1., 'floor': 0.}
|
||||
for name in m.extra_regressors:
|
||||
df_dict[name] = 0.
|
||||
# Activate all conditional seasonality columns
|
||||
for props in m.seasonalities.values():
|
||||
if props['condition_name'] is not None:
|
||||
df_dict[props['condition_name']] = True
|
||||
df = pd.DataFrame(df_dict)
|
||||
df = m.setup_dataframe(df)
|
||||
return df
|
||||
|
||||
|
||||
def plot_weekly(m, ax=None, uncertainty=True, weekly_start=0, figsize=(10, 6)):
|
||||
def plot_weekly(m, ax=None, uncertainty=True, weekly_start=0, figsize=(10, 6), name='weekly'):
|
||||
"""Plot the weekly component of the forecast.
|
||||
|
||||
Parameters
|
||||
|
|
@ -242,6 +247,7 @@ def plot_weekly(m, ax=None, uncertainty=True, weekly_start=0, figsize=(10, 6)):
|
|||
seasonality plot. 0 (default) starts the week on Sunday. 1 shifts
|
||||
by 1 day to Monday, and so on.
|
||||
figsize: Optional tuple width, height in inches.
|
||||
name: Name of seasonality component if changed from default 'weekly'.
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
|
@ -257,23 +263,23 @@ def plot_weekly(m, ax=None, uncertainty=True, weekly_start=0, figsize=(10, 6)):
|
|||
df_w = seasonality_plot_df(m, days)
|
||||
seas = m.predict_seasonal_components(df_w)
|
||||
days = days.weekday_name
|
||||
artists += ax.plot(range(len(days)), seas['weekly'], ls='-',
|
||||
artists += ax.plot(range(len(days)), seas[name], ls='-',
|
||||
c='#0072B2')
|
||||
if uncertainty:
|
||||
artists += [ax.fill_between(range(len(days)),
|
||||
seas['weekly_lower'], seas['weekly_upper'],
|
||||
seas[name + '_lower'], seas[name + '_upper'],
|
||||
color='#0072B2', alpha=0.2)]
|
||||
ax.grid(True, which='major', c='gray', ls='-', lw=1, alpha=0.2)
|
||||
ax.set_xticks(range(len(days)))
|
||||
ax.set_xticklabels(days)
|
||||
ax.set_xlabel('Day of week')
|
||||
ax.set_ylabel('weekly')
|
||||
if m.seasonalities['weekly']['mode'] == 'multiplicative':
|
||||
ax.set_ylabel(name)
|
||||
if m.seasonalities[name]['mode'] == 'multiplicative':
|
||||
ax = set_y_as_percent(ax)
|
||||
return artists
|
||||
|
||||
|
||||
def plot_yearly(m, ax=None, uncertainty=True, yearly_start=0, figsize=(10, 6)):
|
||||
def plot_yearly(m, ax=None, uncertainty=True, yearly_start=0, figsize=(10, 6), name='yearly'):
|
||||
"""Plot the yearly component of the forecast.
|
||||
|
||||
Parameters
|
||||
|
|
@ -286,6 +292,7 @@ def plot_yearly(m, ax=None, uncertainty=True, yearly_start=0, figsize=(10, 6)):
|
|||
seasonality plot. 0 (default) starts the year on Jan 1. 1 shifts
|
||||
by 1 day to Jan 2, and so on.
|
||||
figsize: Optional tuple width, height in inches.
|
||||
name: Name of seasonality component if previously changed from default 'yearly'.
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
|
@ -301,19 +308,19 @@ def plot_yearly(m, ax=None, uncertainty=True, yearly_start=0, figsize=(10, 6)):
|
|||
df_y = seasonality_plot_df(m, days)
|
||||
seas = m.predict_seasonal_components(df_y)
|
||||
artists += ax.plot(
|
||||
df_y['ds'].dt.to_pydatetime(), seas['yearly'], ls='-', c='#0072B2')
|
||||
df_y['ds'].dt.to_pydatetime(), seas[name], ls='-', c='#0072B2')
|
||||
if uncertainty:
|
||||
artists += [ax.fill_between(
|
||||
df_y['ds'].dt.to_pydatetime(), seas['yearly_lower'],
|
||||
seas['yearly_upper'], color='#0072B2', alpha=0.2)]
|
||||
df_y['ds'].dt.to_pydatetime(), seas[name + '_lower'],
|
||||
seas[name + '_upper'], color='#0072B2', alpha=0.2)]
|
||||
ax.grid(True, which='major', c='gray', ls='-', lw=1, alpha=0.2)
|
||||
months = MonthLocator(range(1, 13), bymonthday=1, interval=2)
|
||||
ax.xaxis.set_major_formatter(FuncFormatter(
|
||||
lambda x, pos=None: '{dt:%B} {dt.day}'.format(dt=num2date(x))))
|
||||
ax.xaxis.set_major_locator(months)
|
||||
ax.set_xlabel('Day of year')
|
||||
ax.set_ylabel('yearly')
|
||||
if m.seasonalities['yearly']['mode'] == 'multiplicative':
|
||||
ax.set_ylabel(name)
|
||||
if m.seasonalities[name]['mode'] == 'multiplicative':
|
||||
ax = set_y_as_percent(ax)
|
||||
return artists
|
||||
|
||||
|
|
|
|||
|
|
@ -75,8 +75,11 @@ class TestDiagnostics(TestCase):
|
|||
def test_cross_validation_extra_regressors(self):
|
||||
df = self.__df.copy()
|
||||
df['extra'] = range(df.shape[0])
|
||||
df['is_conditional_week'] = np.arange(df.shape[0]) // 7 % 2
|
||||
m = Prophet()
|
||||
m.add_seasonality(name='monthly', period=30.5, fourier_order=5)
|
||||
m.add_seasonality(name='conditional_weekly', period=7, fourier_order=3,
|
||||
prior_scale=2., condition_name='is_conditional_week')
|
||||
m.add_regressor('extra')
|
||||
m.fit(df)
|
||||
df_cv = diagnostics.cross_validation(
|
||||
|
|
|
|||
|
|
@ -416,6 +416,7 @@ class TestProphet(TestCase):
|
|||
'fourier_order': 3,
|
||||
'prior_scale': 10.,
|
||||
'mode': 'additive',
|
||||
'condition_name': None
|
||||
},
|
||||
)
|
||||
# Should be disabled due to too short history
|
||||
|
|
@ -441,6 +442,7 @@ class TestProphet(TestCase):
|
|||
'fourier_order': 2,
|
||||
'prior_scale': 3.,
|
||||
'mode': 'additive',
|
||||
'condition_name': None
|
||||
},
|
||||
)
|
||||
|
||||
|
|
@ -457,6 +459,7 @@ class TestProphet(TestCase):
|
|||
'fourier_order': 10,
|
||||
'prior_scale': 10.,
|
||||
'mode': 'additive',
|
||||
'condition_name': None
|
||||
},
|
||||
)
|
||||
# Should be disabled due to too short history
|
||||
|
|
@ -477,6 +480,7 @@ class TestProphet(TestCase):
|
|||
'fourier_order': 7,
|
||||
'prior_scale': 3.,
|
||||
'mode': 'additive',
|
||||
'condition_name': None
|
||||
},
|
||||
)
|
||||
|
||||
|
|
@ -493,6 +497,7 @@ class TestProphet(TestCase):
|
|||
'fourier_order': 4,
|
||||
'prior_scale': 10.,
|
||||
'mode': 'additive',
|
||||
'condition_name': None
|
||||
},
|
||||
)
|
||||
# Should be disabled due to too short history
|
||||
|
|
@ -513,6 +518,7 @@ class TestProphet(TestCase):
|
|||
'fourier_order': 7,
|
||||
'prior_scale': 3.,
|
||||
'mode': 'additive',
|
||||
'condition_name': None
|
||||
},
|
||||
)
|
||||
m = Prophet()
|
||||
|
|
@ -545,6 +551,7 @@ class TestProphet(TestCase):
|
|||
'fourier_order': 5,
|
||||
'prior_scale': 2.,
|
||||
'mode': 'additive',
|
||||
'condition_name': None
|
||||
},
|
||||
)
|
||||
with self.assertRaises(ValueError):
|
||||
|
|
@ -580,6 +587,43 @@ class TestProphet(TestCase):
|
|||
self.assertEqual(sum(component_cols['monthly'][6:16]), 10)
|
||||
self.assertEqual(prior_scales, true)
|
||||
|
||||
def test_conditional_custom_seasonality(self):
|
||||
m = Prophet(weekly_seasonality=False, yearly_seasonality=False)
|
||||
m.add_seasonality(name='conditional_weekly', period=7, fourier_order=3,
|
||||
prior_scale=2., condition_name='is_conditional_week')
|
||||
m.add_seasonality(name='normal_monthly', period=30.5, fourier_order=5,
|
||||
prior_scale=2.)
|
||||
df = DATA.copy()
|
||||
with self.assertRaises(ValueError):
|
||||
# Require all conditions names in df
|
||||
m.fit(df)
|
||||
df['is_conditional_week'] = [0] * 255 + [2] * 255
|
||||
with self.assertRaises(ValueError):
|
||||
# Require boolean compatible values
|
||||
m.fit(df)
|
||||
df['is_conditional_week'] = [0] * 255 + [1] * 255
|
||||
m.fit(df)
|
||||
self.assertEqual(
|
||||
m.seasonalities['conditional_weekly'],
|
||||
{
|
||||
'period': 7,
|
||||
'fourier_order': 3,
|
||||
'prior_scale': 2.,
|
||||
'mode': 'additive',
|
||||
'condition_name': 'is_conditional_week'
|
||||
},
|
||||
)
|
||||
self.assertIsNone(m.seasonalities['normal_monthly']['condition_name'])
|
||||
seasonal_features, prior_scales, component_cols, modes = (
|
||||
m.make_all_seasonality_features(m.history)
|
||||
)
|
||||
# Confirm that only values without is_conditional_week has non zero entries
|
||||
conditional_weekly_columns = seasonal_features.columns[
|
||||
seasonal_features.columns.str.startswith('conditional_weekly')]
|
||||
self.assertTrue(np.array_equal((seasonal_features[conditional_weekly_columns] != 0).any(axis=1).values,
|
||||
df['is_conditional_week'].values))
|
||||
|
||||
|
||||
def test_added_regressors(self):
|
||||
m = Prophet()
|
||||
m.add_regressor('binary_feature', prior_scale=0.2)
|
||||
|
|
|
|||
Loading…
Reference in a new issue