add implementation for constant trend in Python (#1466)

* add implementation for constant trend

* force k and delta params to be 0s

* add tests and fix n_changepoints, changepoints_t to 0

* Add test for cv with constant trend

* Add docs and test for checking invalid input

* make changes to stan

* add transformed params block in stan and output flat trend vector

* correct syntax

* transformed params syntax

* Fix test and port changes to win stan file

* add test for flat trend function

* Add separate function for flat trend init

* fix test
This commit is contained in:
Ryan Nazareth 2020-05-15 05:40:40 +01:00 committed by GitHub
parent cb4f1dcc59
commit ac59b44ca3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 212 additions and 56 deletions

File diff suppressed because one or more lines are too long

View file

@ -155,9 +155,9 @@ class Prophet(object):
def validate_inputs(self):
"""Validates the inputs to Prophet."""
if self.growth not in ('linear', 'logistic'):
if self.growth not in ('linear', 'logistic', 'flat'):
raise ValueError(
'Parameter "growth" should be "linear" or "logistic".')
'Parameter "growth" should be "linear", "logistic" or "flat".')
if ((self.changepoint_range < 0) or (self.changepoint_range > 1)):
raise ValueError('Parameter "changepoint_range" must be in [0, 1]')
if self.holidays is not None:
@ -1049,6 +1049,27 @@ class Prophet(object):
k = (L0 - L1) / T
return (k, m)
@staticmethod
def flat_growth_init(df):
"""Initialize flat growth.
Provides a strong initialization for flat growth. Sets the growth to 0
and offset parameter as mean of history y_scaled values.
Parameters
----------
df: pd.DataFrame with columns ds (date), y_scaled (scaled time series),
and t (scaled time).
Returns
-------
A tuple (k, m) with the rate (k) and offset (m) of the linear growth
function.
"""
k = 0
m = df['y_scaled'].mean()
return k, m
def fit(self, df, **kwargs):
"""Fit the Prophet model.
@ -1098,6 +1119,8 @@ class Prophet(object):
self.set_changepoints()
trend_indicator = {'linear': 0, 'logistic': 1, 'flat': 2}
dat = {
'T': history.shape[0],
'K': seasonal_features.shape[1],
@ -1108,7 +1131,7 @@ class Prophet(object):
'X': seasonal_features,
'sigmas': prior_scales,
'tau': self.changepoint_prior_scale,
'trend_indicator': int(self.growth == 'logistic'),
'trend_indicator': trend_indicator[self.growth],
's_a': component_cols['additive_terms'],
's_m': component_cols['multiplicative_terms'],
}
@ -1116,6 +1139,9 @@ class Prophet(object):
if self.growth == 'linear':
dat['cap'] = np.zeros(self.history.shape[0])
kinit = self.linear_growth_init(history)
elif self.growth == 'flat':
dat['cap'] = np.zeros(self.history.shape[0])
kinit = self.flat_growth_init(history)
else:
dat['cap'] = history['cap_scaled']
kinit = self.logistic_growth_init(history)
@ -1128,9 +1154,8 @@ class Prophet(object):
'sigma_obs': 1,
}
if (history['y'].min() == history['y'].max()
and self.growth == 'linear'):
# Nothing to fit.
if history['y'].min() == history['y'].max() and \
(self.growth == 'linear' or self.growth == 'flat'):
self.params = stan_init
self.params['sigma_obs'] = 1e-9
for par in self.params:
@ -1255,6 +1280,22 @@ class Prophet(object):
m_t[indx] += gammas[s]
return cap / (1 + np.exp(-k_t * (t - m_t)))
@staticmethod
def flat_trend(t, m):
"""Evaluate the flat trend function.
Parameters
----------
t: np.array of times on which the function is evaluated.
m: Float initial offset.
Returns
-------
Vector y(t).
"""
m_t = m * np.ones_like(t)
return m_t
def predict_trend(self, df):
"""Predict trend using the prophet model.
@ -1273,10 +1314,13 @@ class Prophet(object):
t = np.array(df['t'])
if self.growth == 'linear':
trend = self.piecewise_linear(t, deltas, k, m, self.changepoints_t)
else:
elif self.growth == 'logistic':
cap = df['cap_scaled']
trend = self.piecewise_logistic(
t, cap, deltas, k, m, self.changepoints_t)
elif self.growth == 'flat':
# constant trend
trend = self.flat_trend(t, m)
return trend * self.y_scale + df['floor']
@ -1470,10 +1514,12 @@ class Prophet(object):
if self.growth == 'linear':
trend = self.piecewise_linear(t, deltas, k, m, changepoint_ts)
else:
elif self.growth == 'logistic':
cap = df['cap_scaled']
trend = self.piecewise_logistic(t, cap, deltas, k, m,
changepoint_ts)
elif self.growth == 'flat':
trend = self.flat_trend(t, m)
return trend * self.y_scale + df['floor']

View file

@ -106,18 +106,21 @@ class TestDiagnostics(TestCase):
self.assertEqual(diagnostics.single_cutoff_forecast.call_count,
forecasts)
def test_cross_validation_logistic(self):
df = self.__df.copy()
df['cap'] = 40
m = Prophet(growth='logistic').fit(df)
df_cv = diagnostics.cross_validation(
m, horizon='1 days', period='1 days', initial='140 days')
self.assertEqual(len(np.unique(df_cv['cutoff'])), 2)
self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all())
df_merged = pd.merge(df_cv, self.__df, 'left', on='ds')
self.assertAlmostEqual(
np.sum((df_merged['y_x'] - df_merged['y_y']) ** 2), 0.0)
def test_cross_validation_logistic_or_flat_growth(self):
params = (x for x in ['logistic', 'flat'])
for growth in params:
with self.subTest(i=growth):
df = self.__df.copy()
if growth == "logistic":
df['cap'] = 40
m = Prophet(growth=growth).fit(df)
df_cv = diagnostics.cross_validation(
m, horizon='1 days', period='1 days', initial='140 days')
self.assertEqual(len(np.unique(df_cv['cutoff'])), 2)
self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all())
df_merged = pd.merge(df_cv, self.__df, 'left', on='ds')
self.assertAlmostEqual(
np.sum((df_merged['y_x'] - df_merged['y_y']) ** 2), 0.0)
def test_cross_validation_extra_regressors(self):
df = self.__df.copy()

View file

@ -206,22 +206,41 @@ class TestProphet(TestCase):
# Check for approximate shift invariance
self.assertTrue((np.abs(fcst1['yhat'] - fcst2['yhat']) < 1).all())
def test_get_changepoints(self):
m = Prophet()
def test_flat_growth(self):
m = Prophet(growth='flat')
N = DATA.shape[0]
history = DATA.head(N // 2).copy()
m.fit(history)
future = m.make_future_dataframe(N // 2, include_history=False)
fcst = m.predict(future)
m_ = m.params['m']
k = m.params['k']
self.assertEqual(k[0, 0], 0)
self.assertEqual(fcst['trend'].unique(), m_*m.y_scale)
self.assertEqual(np.round(m_[0,0]*m.y_scale), 26)
history = m.setup_dataframe(history, initialize_scales=True)
m.history = history
def test_invalid_growth_input(self):
msg = 'Parameter "growth" should be "linear", ' \
'"logistic" or "flat".'
with self.assertRaisesRegex(ValueError, msg):
Prophet(growth="constant")
m.set_changepoints()
def test_get_changepoints(self):
m = Prophet()
N = DATA.shape[0]
history = DATA.head(N // 2).copy()
cp = m.changepoints_t
self.assertEqual(cp.shape[0], m.n_changepoints)
self.assertEqual(len(cp.shape), 1)
self.assertTrue(cp.min() > 0)
cp_indx = int(np.ceil(0.8 * history.shape[0]))
self.assertTrue(cp.max() <= history['t'].values[cp_indx])
history = m.setup_dataframe(history, initialize_scales=True)
m.history = history
m.set_changepoints()
cp = m.changepoints_t
self.assertEqual(cp.shape[0], m.n_changepoints)
self.assertEqual(len(cp.shape), 1)
self.assertTrue(cp.min() > 0)
cp_indx = int(np.ceil(0.8 * history.shape[0]))
self.assertTrue(cp.max() <= history['t'].values[cp_indx])
def test_set_changepoint_range(self):
m = Prophet(changepoint_range=0.4)
@ -301,6 +320,10 @@ class TestProphet(TestCase):
self.assertAlmostEqual(k, 1.507925, places=4)
self.assertAlmostEqual(m, -0.08167497, places=4)
k,m = model.flat_growth_init(history)
self.assertEqual(k, 0)
self.assertAlmostEqual(m, 0.49335657, places=4)
def test_piecewise_linear(self):
model = Prophet()
@ -342,6 +365,18 @@ class TestProphet(TestCase):
y = model.piecewise_logistic(t, cap, deltas, k, m, changepoint_ts)
self.assertAlmostEqual((y - y_true).sum(), 0.0, places=5)
def test_flat_trend(self):
model = Prophet()
t = np.arange(11)
m = 0.5
y = model.flat_trend(t, m)
y_true = np.array([0.5]*11)
self.assertEqual((y - y_true).sum(), 0)
t = t[8:]
y_true = y_true[8:]
y = model.flat_trend(t, m)
self.assertEqual((y - y_true).sum(), 0)
def test_holidays(self):
holidays = pd.DataFrame({
'ds': pd.to_datetime(['2016-12-25']),

View file

@ -73,6 +73,15 @@ functions {
) {
return (k + A * delta) .* t + (m + A * (-t_change .* delta));
}
// Flat trend function
vector flat_trend(
real m,
int T
) {
return rep_vector(m, T);
}
}
data {
@ -86,7 +95,7 @@ data {
matrix[T,K] X; // Regressors
vector[K] sigmas; // Scale on seasonality prior
real<lower=0> tau; // Scale on changepoints prior
int trend_indicator; // 0 for linear, 1 for logistic
int trend_indicator; // 0 for linear, 1 for logistic, 2 for flat
vector[K] s_a; // Indicator of additive features
vector[K] s_m; // Indicator of multiplicative features
}
@ -104,6 +113,17 @@ parameters {
vector[K] beta; // Regressor coefficients
}
transformed parameters {
vector[T] trend;
if (trend_indicator == 0) {
trend = linear_trend(k, m, delta, t, A, t_change);
} else if (trend_indicator == 1) {
trend = logistic_trend(k, m, delta, t, cap, A, t_change, S);
} else if (trend_indicator == 2) {
trend = flat_trend(m, T);
}
}
model {
//priors
k ~ normal(0, 5);
@ -113,19 +133,10 @@ model {
beta ~ normal(0, sigmas);
// Likelihood
if (trend_indicator == 0) {
y ~ normal(
linear_trend(k, m, delta, t, A, t_change)
.* (1 + X * (beta .* s_m))
+ X * (beta .* s_a),
sigma_obs
);
} else if (trend_indicator == 1) {
y ~ normal(
logistic_trend(k, m, delta, t, cap, A, t_change, S)
.* (1 + X * (beta .* s_m))
+ X * (beta .* s_a),
sigma_obs
);
}
y ~ normal(
trend
.* (1 + X * (beta .* s_m))
+ X * (beta .* s_a),
sigma_obs
);
}

View file

@ -94,6 +94,17 @@ functions {
}
return Y;
}
// Flat trend function
real[] flat_trend(
real m,
int T
) {
return rep_array(m, T);
}
}
data {
@ -107,7 +118,7 @@ data {
real X[T,K]; // Regressors
vector[K] sigmas; // Scale on seasonality prior
real<lower=0> tau; // Scale on changepoints prior
int trend_indicator; // 0 for linear, 1 for logistic
int trend_indicator; // 0 for linear, 1 for logistic, 2 for flat
real s_a[K]; // Indicator of additive features
real s_m[K]; // Indicator of multiplicative features
}
@ -135,6 +146,8 @@ transformed parameters {
trend = linear_trend(k, m, delta, t, A, t_change, S, T);
} else if (trend_indicator == 1) {
trend = logistic_trend(k, m, delta, t, cap, A, t_change, S, T);
} else if (trend_indicator == 2){
trend = flat_trend(m, T);
}
for (i in 1:K) {