Feature Proposal with Python Draft: Masked seasonality (#873)

* Added feature to mask seasonality

* Added conditional seasonality tests and fixed plotting it

* Fixed diagnostics for conditional seasonality and cleaned up code
This commit is contained in:
Olof Höjvall 2019-03-18 18:04:35 +01:00 committed by Ben Letham
parent eec2f46ac5
commit 8eea5a1ca3
5 changed files with 104 additions and 24 deletions

View file

@ -110,6 +110,10 @@ def cross_validation(model, horizon, period=None, initial=None):
if m.logistic_floor:
columns.append('floor')
columns.extend(m.extra_regressors.keys())
columns.extend([
props['condition_name']
for props in m.seasonalities.values()
if props['condition_name'] is not None])
yhat = m.predict(df[index_predicted][columns])
# Merge yhat(predicts), y(df, original data) and cutoff
predicts.append(pd.concat([

View file

@ -246,6 +246,15 @@ class Prophet(object):
df[name] = pd.to_numeric(df[name])
if df[name].isnull().any():
raise ValueError('Found NaN in column ' + name)
for props in self.seasonalities.values():
condition_name = props['condition_name']
if condition_name is not None:
if condition_name not in df:
raise ValueError(
'Condition "{}" missing from dataframe'.format(condition_name))
if not df[condition_name].isin([True, False]).all():
raise ValueError('Found non-boolean in column ' + condition_name)
df[condition_name] = df[condition_name].astype('bool')
df = df.sort_values('ds')
df.reset_index(inplace=True, drop=True)
@ -581,7 +590,7 @@ class Prophet(object):
return self
def add_seasonality(
self, name, period, fourier_order, prior_scale=None, mode=None
self, name, period, fourier_order, prior_scale=None, mode=None, condition_name=None
):
"""Add a seasonal component with specified period, number of Fourier
components, and prior scale.
@ -600,6 +609,10 @@ class Prophet(object):
Additive means the seasonality will be added to the trend,
multiplicative means it will multiply the trend.
If condition_name is provided, the dataframe passed to `fit` and `predict`
should have a column with the specified condition_name containing booleans
which decides when to apply seasonality.
Parameters
----------
name: string name of the seasonality component.
@ -607,6 +620,7 @@ class Prophet(object):
fourier_order: int number of Fourier components to use.
prior_scale: optional float prior scale for this component.
mode: optional 'additive' or 'multiplicative'
condition_name: string name of the seasonality condition.
Returns
-------
@ -628,11 +642,14 @@ class Prophet(object):
mode = self.seasonality_mode
if mode not in ['additive', 'multiplicative']:
raise ValueError("mode must be 'additive' or 'multiplicative'")
if condition_name is not None:
self.validate_column_name(condition_name)
self.seasonalities[name] = {
'period': period,
'fourier_order': fourier_order,
'prior_scale': ps,
'mode': mode,
'condition_name': condition_name,
}
return self
@ -705,6 +722,8 @@ class Prophet(object):
props['fourier_order'],
name,
)
if props['condition_name'] is not None:
features[~df[props['condition_name']]] = 0
seasonal_features.append(features)
prior_scales.extend(
[props['prior_scale']] * features.shape[1])
@ -889,6 +908,7 @@ class Prophet(object):
'fourier_order': fourier_order,
'prior_scale': self.seasonality_prior_scale,
'mode': self.seasonality_mode,
'condition_name': None
}
# Weekly seasonality
@ -902,6 +922,7 @@ class Prophet(object):
'fourier_order': fourier_order,
'prior_scale': self.seasonality_prior_scale,
'mode': self.seasonality_mode,
'condition_name': None
}
# Daily seasonality
@ -915,6 +936,7 @@ class Prophet(object):
'fourier_order': fourier_order,
'prior_scale': self.seasonality_prior_scale,
'mode': self.seasonality_mode,
'condition_name': None
}
@staticmethod

View file

@ -136,14 +136,19 @@ def plot_components(
m=m, fcst=fcst, name='trend', ax=ax, uncertainty=uncertainty,
plot_cap=plot_cap,
)
elif plot_name == 'weekly':
plot_weekly(
m=m, ax=ax, uncertainty=uncertainty, weekly_start=weekly_start,
)
elif plot_name == 'yearly':
plot_yearly(
m=m, ax=ax, uncertainty=uncertainty, yearly_start=yearly_start,
)
elif plot_name in m.seasonalities:
if plot_name == 'weekly' or m.seasonalities[plot_name]['period'] == 7:
plot_weekly(
m=m, name=plot_name, ax=ax, uncertainty=uncertainty, weekly_start=weekly_start
)
elif plot_name == 'yearly' or m.seasonalities[plot_name]['period'] == 365.25:
plot_yearly(
m=m, name=plot_name, ax=ax, uncertainty=uncertainty, yearly_start=yearly_start
)
else:
plot_seasonality(
m=m, name=plot_name, ax=ax, uncertainty=uncertainty,
)
elif plot_name in [
'holidays',
'extra_regressors_additive',
@ -153,10 +158,6 @@ def plot_components(
m=m, fcst=fcst, name=plot_name, ax=ax, uncertainty=uncertainty,
plot_cap=False,
)
else:
plot_seasonality(
m=m, name=plot_name, ax=ax, uncertainty=uncertainty,
)
if plot_name in m.component_modes['multiplicative']:
multiplicative_axes.append(ax)
@ -224,12 +225,16 @@ def seasonality_plot_df(m, ds):
df_dict = {'ds': ds, 'cap': 1., 'floor': 0.}
for name in m.extra_regressors:
df_dict[name] = 0.
# Activate all conditional seasonality columns
for props in m.seasonalities.values():
if props['condition_name'] is not None:
df_dict[props['condition_name']] = True
df = pd.DataFrame(df_dict)
df = m.setup_dataframe(df)
return df
def plot_weekly(m, ax=None, uncertainty=True, weekly_start=0, figsize=(10, 6)):
def plot_weekly(m, ax=None, uncertainty=True, weekly_start=0, figsize=(10, 6), name='weekly'):
"""Plot the weekly component of the forecast.
Parameters
@ -242,6 +247,7 @@ def plot_weekly(m, ax=None, uncertainty=True, weekly_start=0, figsize=(10, 6)):
seasonality plot. 0 (default) starts the week on Sunday. 1 shifts
by 1 day to Monday, and so on.
figsize: Optional tuple width, height in inches.
name: Name of seasonality component if changed from default 'weekly'.
Returns
-------
@ -257,23 +263,23 @@ def plot_weekly(m, ax=None, uncertainty=True, weekly_start=0, figsize=(10, 6)):
df_w = seasonality_plot_df(m, days)
seas = m.predict_seasonal_components(df_w)
days = days.weekday_name
artists += ax.plot(range(len(days)), seas['weekly'], ls='-',
artists += ax.plot(range(len(days)), seas[name], ls='-',
c='#0072B2')
if uncertainty:
artists += [ax.fill_between(range(len(days)),
seas['weekly_lower'], seas['weekly_upper'],
seas[name + '_lower'], seas[name + '_upper'],
color='#0072B2', alpha=0.2)]
ax.grid(True, which='major', c='gray', ls='-', lw=1, alpha=0.2)
ax.set_xticks(range(len(days)))
ax.set_xticklabels(days)
ax.set_xlabel('Day of week')
ax.set_ylabel('weekly')
if m.seasonalities['weekly']['mode'] == 'multiplicative':
ax.set_ylabel(name)
if m.seasonalities[name]['mode'] == 'multiplicative':
ax = set_y_as_percent(ax)
return artists
def plot_yearly(m, ax=None, uncertainty=True, yearly_start=0, figsize=(10, 6)):
def plot_yearly(m, ax=None, uncertainty=True, yearly_start=0, figsize=(10, 6), name='yearly'):
"""Plot the yearly component of the forecast.
Parameters
@ -286,6 +292,7 @@ def plot_yearly(m, ax=None, uncertainty=True, yearly_start=0, figsize=(10, 6)):
seasonality plot. 0 (default) starts the year on Jan 1. 1 shifts
by 1 day to Jan 2, and so on.
figsize: Optional tuple width, height in inches.
name: Name of seasonality component if previously changed from default 'yearly'.
Returns
-------
@ -301,19 +308,19 @@ def plot_yearly(m, ax=None, uncertainty=True, yearly_start=0, figsize=(10, 6)):
df_y = seasonality_plot_df(m, days)
seas = m.predict_seasonal_components(df_y)
artists += ax.plot(
df_y['ds'].dt.to_pydatetime(), seas['yearly'], ls='-', c='#0072B2')
df_y['ds'].dt.to_pydatetime(), seas[name], ls='-', c='#0072B2')
if uncertainty:
artists += [ax.fill_between(
df_y['ds'].dt.to_pydatetime(), seas['yearly_lower'],
seas['yearly_upper'], color='#0072B2', alpha=0.2)]
df_y['ds'].dt.to_pydatetime(), seas[name + '_lower'],
seas[name + '_upper'], color='#0072B2', alpha=0.2)]
ax.grid(True, which='major', c='gray', ls='-', lw=1, alpha=0.2)
months = MonthLocator(range(1, 13), bymonthday=1, interval=2)
ax.xaxis.set_major_formatter(FuncFormatter(
lambda x, pos=None: '{dt:%B} {dt.day}'.format(dt=num2date(x))))
ax.xaxis.set_major_locator(months)
ax.set_xlabel('Day of year')
ax.set_ylabel('yearly')
if m.seasonalities['yearly']['mode'] == 'multiplicative':
ax.set_ylabel(name)
if m.seasonalities[name]['mode'] == 'multiplicative':
ax = set_y_as_percent(ax)
return artists

View file

@ -75,8 +75,11 @@ class TestDiagnostics(TestCase):
def test_cross_validation_extra_regressors(self):
df = self.__df.copy()
df['extra'] = range(df.shape[0])
df['is_conditional_week'] = np.arange(df.shape[0]) // 7 % 2
m = Prophet()
m.add_seasonality(name='monthly', period=30.5, fourier_order=5)
m.add_seasonality(name='conditional_weekly', period=7, fourier_order=3,
prior_scale=2., condition_name='is_conditional_week')
m.add_regressor('extra')
m.fit(df)
df_cv = diagnostics.cross_validation(

View file

@ -416,6 +416,7 @@ class TestProphet(TestCase):
'fourier_order': 3,
'prior_scale': 10.,
'mode': 'additive',
'condition_name': None
},
)
# Should be disabled due to too short history
@ -441,6 +442,7 @@ class TestProphet(TestCase):
'fourier_order': 2,
'prior_scale': 3.,
'mode': 'additive',
'condition_name': None
},
)
@ -457,6 +459,7 @@ class TestProphet(TestCase):
'fourier_order': 10,
'prior_scale': 10.,
'mode': 'additive',
'condition_name': None
},
)
# Should be disabled due to too short history
@ -477,6 +480,7 @@ class TestProphet(TestCase):
'fourier_order': 7,
'prior_scale': 3.,
'mode': 'additive',
'condition_name': None
},
)
@ -493,6 +497,7 @@ class TestProphet(TestCase):
'fourier_order': 4,
'prior_scale': 10.,
'mode': 'additive',
'condition_name': None
},
)
# Should be disabled due to too short history
@ -513,6 +518,7 @@ class TestProphet(TestCase):
'fourier_order': 7,
'prior_scale': 3.,
'mode': 'additive',
'condition_name': None
},
)
m = Prophet()
@ -545,6 +551,7 @@ class TestProphet(TestCase):
'fourier_order': 5,
'prior_scale': 2.,
'mode': 'additive',
'condition_name': None
},
)
with self.assertRaises(ValueError):
@ -580,6 +587,43 @@ class TestProphet(TestCase):
self.assertEqual(sum(component_cols['monthly'][6:16]), 10)
self.assertEqual(prior_scales, true)
def test_conditional_custom_seasonality(self):
m = Prophet(weekly_seasonality=False, yearly_seasonality=False)
m.add_seasonality(name='conditional_weekly', period=7, fourier_order=3,
prior_scale=2., condition_name='is_conditional_week')
m.add_seasonality(name='normal_monthly', period=30.5, fourier_order=5,
prior_scale=2.)
df = DATA.copy()
with self.assertRaises(ValueError):
# Require all conditions names in df
m.fit(df)
df['is_conditional_week'] = [0] * 255 + [2] * 255
with self.assertRaises(ValueError):
# Require boolean compatible values
m.fit(df)
df['is_conditional_week'] = [0] * 255 + [1] * 255
m.fit(df)
self.assertEqual(
m.seasonalities['conditional_weekly'],
{
'period': 7,
'fourier_order': 3,
'prior_scale': 2.,
'mode': 'additive',
'condition_name': 'is_conditional_week'
},
)
self.assertIsNone(m.seasonalities['normal_monthly']['condition_name'])
seasonal_features, prior_scales, component_cols, modes = (
m.make_all_seasonality_features(m.history)
)
# Confirm that only values without is_conditional_week has non zero entries
conditional_weekly_columns = seasonal_features.columns[
seasonal_features.columns.str.startswith('conditional_weekly')]
self.assertTrue(np.array_equal((seasonal_features[conditional_weekly_columns] != 0).any(axis=1).values,
df['is_conditional_week'].values))
def test_added_regressors(self):
m = Prophet()
m.add_regressor('binary_feature', prior_scale=0.2)