From feb7be397bc4d4932d03b08ba241d7a538ae03cb Mon Sep 17 00:00:00 2001 From: bl Date: Sat, 4 Nov 2017 16:40:20 -0700 Subject: [PATCH] Fix R warning with extra regressor; disallow constant extra regressors. --- R/R/prophet.R | 9 +++++---- R/tests/testthat/test_prophet.R | 5 +++++ docs/_docs/seasonality_and_holiday_effects.md | 2 +- notebooks/seasonality_and_holiday_effects.ipynb | 2 +- python/fbprophet/forecaster.py | 5 +++-- python/fbprophet/tests/test_prophet.py | 6 ++++++ 6 files changed, 21 insertions(+), 8 deletions(-) diff --git a/R/R/prophet.R b/R/R/prophet.R index a1daaa1..506615a 100644 --- a/R/R/prophet.R +++ b/R/R/prophet.R @@ -395,9 +395,13 @@ initialize_scales_fn <- function(m, initialize_scales, df) { m$start <- min(df$ds) m$t.scale <- time_diff(max(df$ds), m$start, "secs") for (name in names(m$extra_regressors)) { + n.vals <- length(unique(df[[name]])) + if (n.vals < 2) { + stop('Regressor ', name, ' is constant.') + } standardize <- m$extra_regressors[[name]]$standardize if (standardize == 'auto') { - if (all(sort(unique(df[[name]])) == c(0, 1))) { + if (n.vals == 2 && all(sort(unique(df[[name]])) == c(0, 1))) { # Don't standardize binary variables standardize <- FALSE } else { @@ -407,9 +411,6 @@ initialize_scales_fn <- function(m, initialize_scales, df) { if (standardize) { mu <- mean(df[[name]]) std <- stats::sd(df[[name]]) - if (std == 0) { - std <- mu - } m$extra_regressors[[name]]$mu <- mu m$extra_regressors[[name]]$std <- std } diff --git a/R/tests/testthat/test_prophet.R b/R/tests/testthat/test_prophet.R index 313b1ef..ad72585 100644 --- a/R/tests/testthat/test_prophet.R +++ b/R/tests/testthat/test_prophet.R @@ -511,6 +511,11 @@ test_that("added_regressors", { expect_equal(fcst$seasonal[1], fcst$seasonalities[1] + fcst$extra_regressors[1]) expect_equal(fcst$yhat[1], fcst$trend[1] + fcst$seasonal[1]) + # Check fails if constant extra regressor + df$constant_feature <- 5 + m <- prophet() + m <- add_regressor(m, 'constant_feature') + expect_error(fit.prophet(m, df)) }) test_that("copy", { diff --git a/docs/_docs/seasonality_and_holiday_effects.md b/docs/_docs/seasonality_and_holiday_effects.md index 6c49caf..ad3d757 100644 --- a/docs/_docs/seasonality_and_holiday_effects.md +++ b/docs/_docs/seasonality_and_holiday_effects.md @@ -362,6 +362,6 @@ m.plot_components(forecast); ![png](/prophet/static/seasonality_and_holiday_effects_files/seasonality_and_holiday_effects_26_0.png) -NFL Sundays could also have been handled using the "holidays" interface described above, by creating a list of past and future NFL Sundays. The `add_regressor` function provides a more general interface for defining extra linear regressors, and in particular does not require that the regressor be a binary indicator. Another time series could be used as a regressor, although its future values would have to be known. +NFL Sundays could also have been handled using the "holidays" interface described above, by creating a list of past and future NFL Sundays. The `add_regressor` function provides a more general interface for defining extra linear regressors, and in particular does not require that the regressor be a binary indicator. Another time series could be used as a regressor, although its future values would have to be known. The regressor cannot be constant in the training data; fitting will exit with an error if it is. The `add_regressor` function has optional arguments for specifying the prior scale (holiday prior scale is used by default) and whether or not the regressor is standardized - see the docstring with `help(Prophet.add_regressor)` in Python and `?add_regressor` in R. diff --git a/notebooks/seasonality_and_holiday_effects.ipynb b/notebooks/seasonality_and_holiday_effects.ipynb index 9e4d17a..160d07c 100644 --- a/notebooks/seasonality_and_holiday_effects.ipynb +++ b/notebooks/seasonality_and_holiday_effects.ipynb @@ -728,7 +728,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "NFL Sundays could also have been handled using the \"holidays\" interface described above, by creating a list of past and future NFL Sundays. The `add_regressor` function provides a more general interface for defining extra linear regressors, and in particular does not require that the regressor be a binary indicator. Another time series could be used as a regressor, although its future values would have to be known.\n", + "NFL Sundays could also have been handled using the \"holidays\" interface described above, by creating a list of past and future NFL Sundays. The `add_regressor` function provides a more general interface for defining extra linear regressors, and in particular does not require that the regressor be a binary indicator. Another time series could be used as a regressor, although its future values would have to be known. The regressor cannot be constant in the training data; fitting will exit with an error if it is.\n", "\n", "The `add_regressor` function has optional arguments for specifying the prior scale (holiday prior scale is used by default) and whether or not the regressor is standardized - see the docstring with `help(Prophet.add_regressor)` in Python and `?add_regressor` in R." ] diff --git a/python/fbprophet/forecaster.py b/python/fbprophet/forecaster.py index 3f0c323..8a07a11 100644 --- a/python/fbprophet/forecaster.py +++ b/python/fbprophet/forecaster.py @@ -278,6 +278,9 @@ class Prophet(object): self.t_scale = df['ds'].max() - self.start for name, props in self.extra_regressors.items(): standardize = props['standardize'] + n_vals = len(df[name].unique()) + if n_vals < 2: + raise ValueError('Regressor {} is constant.'.format(name)) if standardize == 'auto': if set(df[name].unique()) == set([1, 0]): # Don't standardize binary variables. @@ -287,8 +290,6 @@ class Prophet(object): if standardize: mu = df[name].mean() std = df[name].std() - if std == 0: - std = mu self.extra_regressors[name]['mu'] = mu self.extra_regressors[name]['std'] = std diff --git a/python/fbprophet/tests/test_prophet.py b/python/fbprophet/tests/test_prophet.py index ac5b3e9..cea923e 100644 --- a/python/fbprophet/tests/test_prophet.py +++ b/python/fbprophet/tests/test_prophet.py @@ -548,6 +548,12 @@ class TestProphet(TestCase): fcst['yhat'][0], fcst['trend'][0] + fcst['seasonal'][0], ) + # Check fails if constant extra regressor + df['constant_feature'] = 5 + m = Prophet() + m.add_regressor('constant_feature') + with self.assertRaises(ValueError): + m.fit(df.copy()) def test_copy(self): # These values are created except for its default values