mirror of
https://github.com/saymrwulf/prophet.git
synced 2026-05-16 21:00:16 +00:00
Add regressor_coefficients function for R (#1803)
* function code * add tests for regressor coefficients utility * add documentation for regressor_coefficients util function * generate Rd docs * add regressor_coefficients to R namespace * minor formatting nit * fix bugs
This commit is contained in:
parent
e95d7c5155
commit
2d56e71fed
6 changed files with 185 additions and 26 deletions
|
|
@ -16,6 +16,7 @@ export(plot_forecast_component)
|
|||
export(predictive_samples)
|
||||
export(prophet)
|
||||
export(prophet_plot_components)
|
||||
export(regressor_coefficients)
|
||||
import(Rcpp)
|
||||
import(RcppParallel, except = LdFlags)
|
||||
import(rlang)
|
||||
|
|
|
|||
67
R/R/utilities.R
Normal file
67
R/R/utilities.R
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates.
|
||||
|
||||
# This source code is licensed under the MIT license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
#' Summarise the coefficients of the extra regressors used in the model.
|
||||
#' For additive regressors, the coefficient represents the incremental impact
|
||||
#' on \code{y} of a unit increase in the regressor. For multiplicative regressors,
|
||||
#' the incremental impact is equal to \code{trend(t)} multiplied by the coefficient.
|
||||
#'
|
||||
#' \textbf{Coefficients are measured on the original scale of the training data.}
|
||||
#'
|
||||
#' @param m Prophet model object, after fitting.
|
||||
#'
|
||||
#' @return Dataframe with one row per regressor.
|
||||
#' @details Output dataframe columns:
|
||||
#' \itemize{
|
||||
#' \item{regressor: Name of the regressor}
|
||||
#' \item{regressor_mode: Whether the regressor has an additive or multiplicative
|
||||
#' effect on \code{y}.}
|
||||
#' \item{center: The mean of the regressor if it was standardized. Otherwise 0.}
|
||||
#' \item{coef_lower: Lower bound for the coefficient, estimated from the MCMC samples.
|
||||
#' Only different to \code{coef} if \code{mcmc_samples > 0}.
|
||||
#' }
|
||||
#' \item{coef: Expected value of the coefficient.}
|
||||
#' \item{coef_upper: Upper bound for the coefficient, estimated from MCMC samples.
|
||||
#' Only to different to \code{coef} if \code{mcmc_samples > 0}.
|
||||
#' }
|
||||
#' }
|
||||
#'
|
||||
#' @export
|
||||
regressor_coefficients <- function(m){
|
||||
if (length(m$extra_regressors) == 0) {
|
||||
stop("No extra regressors found.")
|
||||
}
|
||||
regr_names <- names(m$extra_regressors)
|
||||
regr_modes <- unlist(lapply(m$extra_regressors, function(x) x$mode))
|
||||
regr_mus <- unlist(lapply(m$extra_regressors, function (x) x$mu))
|
||||
regr_stds <- unlist(lapply(m$extra_regressors, function(x) x$std))
|
||||
|
||||
beta_indices <- which(m$train.component.cols[, regr_names] == 1, arr.ind = TRUE)[, "row"]
|
||||
betas <- m$params$beta[, beta_indices, drop = FALSE]
|
||||
# If regressor is additive, multiply by the scale factor to put coefficients on the original training data scale.
|
||||
y_scale_indicator <- matrix(
|
||||
data = ifelse(regr_modes == "additive", m$y.scale, 1),
|
||||
nrow = nrow(betas),
|
||||
ncol = ncol(betas),
|
||||
byrow = TRUE
|
||||
)
|
||||
coefs <- betas * y_scale_indicator / regr_stds
|
||||
|
||||
percentiles = c((1 - m$interval.width) / 2, 1 - (1 - m$interval.width) / 2)
|
||||
bounds <- apply(betas, 2, quantile, probs = percentiles)
|
||||
|
||||
df <- data.frame(
|
||||
regressor = regr_names,
|
||||
regressor_mode = regr_modes,
|
||||
center = regr_mus,
|
||||
coef_lower = bounds[1, ],
|
||||
coef = apply(betas, 2, mean),
|
||||
coef_upper = bounds[2, ],
|
||||
stringsAsFactors = FALSE,
|
||||
row.names = NULL
|
||||
)
|
||||
|
||||
return(df)
|
||||
}
|
||||
36
R/man/regressor_coefficients.Rd
Normal file
36
R/man/regressor_coefficients.Rd
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/utilities.R
|
||||
\name{regressor_coefficients}
|
||||
\alias{regressor_coefficients}
|
||||
\title{Summarise the coefficients of the extra regressors used in the model.
|
||||
For additive regressors, the coefficient represents the incremental impact
|
||||
on \code{y} of a unit increase in the regressor. For multiplicative regressors,
|
||||
the incremental impact is equal to \code{trend(t)} multiplied by the coefficient.}
|
||||
\usage{
|
||||
regressor_coefficients(m)
|
||||
}
|
||||
\arguments{
|
||||
\item{m}{Prophet model object, after fitting.}
|
||||
}
|
||||
\value{
|
||||
Dataframe with one row per regressor.
|
||||
}
|
||||
\description{
|
||||
\textbf{Coefficients are measured on the original scale of the training data.}
|
||||
}
|
||||
\details{
|
||||
Output dataframe columns:
|
||||
\itemize{
|
||||
\item{regressor: Name of the regressor}
|
||||
\item{regressor_mode: Whether the regressor has an additive or multiplicative
|
||||
effect on \code{y}.}
|
||||
\item{center: The mean of the regressor if it was standardized. Otherwise 0.}
|
||||
\item{coef_lower: Lower bound for the coefficient, estimated from the MCMC samples.
|
||||
Only different to \code{coef} if \code{mcmc_samples > 0}.
|
||||
}
|
||||
\item{coef: Expected value of the coefficient.}
|
||||
\item{coef_upper: Upper bound for the coefficient, estimated from MCMC samples.
|
||||
Only to different to \code{coef} if \code{mcmc_samples > 0}.
|
||||
}
|
||||
}
|
||||
}
|
||||
48
R/tests/testthat/test_utilities.R
Normal file
48
R/tests/testthat/test_utilities.R
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates.
|
||||
|
||||
# This source code is licensed under the MIT license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
library(prophet)
|
||||
context("Prophet utilities tests")
|
||||
|
||||
DATA <- read.csv('data.csv')
|
||||
DATA$ds <- as.Date(DATA$ds)
|
||||
|
||||
build_model_with_regressors <- function(data, mcmc.samples = 0) {
|
||||
m <- prophet(mcmc.samples = mcmc.samples)
|
||||
m <- add_regressor(m, 'binary_feature', prior.scale=0.2)
|
||||
m <- add_regressor(m, 'numeric_feature', prior.scale=0.5)
|
||||
m <- add_regressor(
|
||||
m, 'numeric_feature2', prior.scale=0.5, mode = 'multiplicative')
|
||||
m <- add_regressor(m, 'binary_feature2', standardize=TRUE)
|
||||
|
||||
df <- data
|
||||
df$binary_feature <- c(rep(0, 255), rep(1, 255))
|
||||
df$numeric_feature <- 0:509
|
||||
df$numeric_feature2 <- 0:509
|
||||
df$binary_feature2 <- c(rep(1, 100), rep(0, 410))
|
||||
m <- fit.prophet(m, df)
|
||||
|
||||
return(m)
|
||||
}
|
||||
|
||||
test_that("regressor_coefficients_no_uncertainty", {
|
||||
skip_if_not(Sys.getenv('R_ARCH') != '/i386')
|
||||
m <- build_model_with_regressors(DATA, mcmc.samples = 0)
|
||||
coefs <- regressor_coefficients(m)
|
||||
|
||||
expect_equal(dim(coefs), c(4, 6))
|
||||
expect_equal(coefs[, "coef_lower"], coefs[, "coef"])
|
||||
expect_equal(coefs[, "coef_upper"], coefs[, "coef"])
|
||||
})
|
||||
|
||||
test_that("regressor_coefficients_with_uncertainty", {
|
||||
skip_if_not(Sys.getenv('R_ARCH') != '/i386')
|
||||
suppressWarnings(m <- build_model_with_regressors(DATA, mcmc.samples = 100))
|
||||
coefs <- regressor_coefficients(m)
|
||||
|
||||
expect_equal(dim(coefs), c(4, 6))
|
||||
expect_true(all(coefs[, "coef_lower"] < coefs[, "coef"]))
|
||||
expect_true(all(coefs[, "coef_upper"] > coefs[, "coef"]))
|
||||
})
|
||||
|
|
@ -97,8 +97,8 @@ The holiday effect can be seen in the `forecast` dataframe:
|
|||
|
||||
```R
|
||||
# R
|
||||
forecast %>%
|
||||
select(ds, playoff, superbowl) %>%
|
||||
forecast %>%
|
||||
select(ds, playoff, superbowl) %>%
|
||||
filter(abs(playoff + superbowl) > 0) %>%
|
||||
tail(10)
|
||||
```
|
||||
|
|
@ -211,8 +211,8 @@ prophet_plot_components(m, forecast)
|
|||
# Python
|
||||
fig = m.plot_components(forecast)
|
||||
```
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
|
||||
Individual holidays can be plotted using the `plot_forecast_component` function (imported from `fbprophet.plot` in Python) like `plot_forecast_component(m, forecast, 'superbowl')` to plot just the superbowl holiday component.
|
||||
|
|
@ -247,14 +247,14 @@ You can see which holidays were included by looking at the `train_holiday_names`
|
|||
m$train.holiday.names
|
||||
```
|
||||
|
||||
[1] "playoff" "superbowl"
|
||||
[1] "playoff" "superbowl"
|
||||
[3] "New Year's Day" "Martin Luther King, Jr. Day"
|
||||
[5] "Washington's Birthday" "Memorial Day"
|
||||
[7] "Independence Day" "Labor Day"
|
||||
[9] "Columbus Day" "Veterans Day"
|
||||
[11] "Veterans Day (Observed)" "Thanksgiving"
|
||||
[5] "Washington's Birthday" "Memorial Day"
|
||||
[7] "Independence Day" "Labor Day"
|
||||
[9] "Columbus Day" "Veterans Day"
|
||||
[11] "Veterans Day (Observed)" "Thanksgiving"
|
||||
[13] "Christmas Day" "Independence Day (Observed)"
|
||||
[15] "Christmas Day (Observed)" "New Year's Day (Observed)"
|
||||
[15] "Christmas Day (Observed)" "New Year's Day (Observed)"
|
||||
|
||||
|
||||
|
||||
|
|
@ -306,8 +306,8 @@ prophet_plot_components(m, forecast)
|
|||
forecast = m.predict(future)
|
||||
fig = m.plot_components(forecast)
|
||||
```
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
|
||||
<a id="fourier-order-for-seasonalities"> </a>
|
||||
|
|
@ -330,8 +330,8 @@ from fbprophet.plot import plot_yearly
|
|||
m = Prophet().fit(df)
|
||||
a = plot_yearly(m)
|
||||
```
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
|
||||
The default values are often appropriate, but they can be increased when the seasonality needs to fit higher-frequency changes, and generally be less smooth. The Fourier order can be specified for each built-in seasonality when instantiating the model, here it is increased to 20:
|
||||
|
|
@ -348,8 +348,8 @@ from fbprophet.plot import plot_yearly
|
|||
m = Prophet(yearly_seasonality=20).fit(df)
|
||||
a = plot_yearly(m)
|
||||
```
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
|
||||
Increasing the number of Fourier terms allows the seasonality to fit faster changing cycles, but can also lead to overfitting: N Fourier terms corresponds to 2N variables used for modeling the cycle
|
||||
|
|
@ -388,8 +388,8 @@ m.add_seasonality(name='monthly', period=30.5, fourier_order=5)
|
|||
forecast = m.fit(df).predict(future)
|
||||
fig = m.plot_components(forecast)
|
||||
```
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
|
||||
<a id="seasonalities-that-depend-on-other-factors"> </a>
|
||||
|
|
@ -452,8 +452,8 @@ future['off_season'] = ~future['ds'].apply(is_nfl_season)
|
|||
forecast = m.fit(df).predict(future)
|
||||
fig = m.plot_components(forecast)
|
||||
```
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
|
||||
Both of the seasonalities now show up in the components plots above. We can see that during the on-season when games are played every Sunday, there are large increases on Sunday and Monday that are completely absent during the off-season.
|
||||
|
|
@ -470,8 +470,8 @@ If you find that the holidays are overfitting, you can adjust their prior scale
|
|||
# R
|
||||
m <- prophet(df, holidays = holidays, holidays.prior.scale = 0.05)
|
||||
forecast <- predict(m, future)
|
||||
forecast %>%
|
||||
select(ds, playoff, superbowl) %>%
|
||||
forecast %>%
|
||||
select(ds, playoff, superbowl) %>%
|
||||
filter(abs(playoff + superbowl) > 0) %>%
|
||||
tail(10)
|
||||
```
|
||||
|
|
@ -640,8 +640,8 @@ future['nfl_sunday'] = future['ds'].apply(nfl_sunday)
|
|||
forecast = m.predict(future)
|
||||
fig = m.plot_components(forecast)
|
||||
```
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
|
||||
NFL Sundays could also have been handled using the "holidays" interface described above, by creating a list of past and future NFL Sundays. The `add_regressor` function provides a more general interface for defining extra linear regressors, and in particular does not require that the regressor be a binary indicator. Another time series could be used as a regressor, although its future values would have to be known.
|
||||
|
|
@ -662,3 +662,6 @@ The extra regressor must be known for both the history and for future dates. It
|
|||
|
||||
Extra regressors are put in the linear component of the model, so the underlying model is that the time series depends on the extra regressor as either an additive or multiplicative factor (see the next section for multiplicativity).
|
||||
|
||||
#### Coefficients of additional regressors
|
||||
|
||||
To extract the beta coefficients of the extra regressors, use the utility function `regressor_coefficients` (`from fbprophet.utilities import regressor_coefficients` in Python, `prophet::regressor_coefficients` in R) on the fitted model. The estimated beta coefficient for each regressor roughly represents the increase in prediction value for a unit increase in the regressor value (note that the coefficients returned are always on the scale of the original data). If `mcmc_samples` is specified, a credible interval for each coefficient is also returned, which can help identify whether each regressor is "statistically significant".
|
||||
|
|
|
|||
|
|
@ -1199,7 +1199,11 @@
|
|||
"\n",
|
||||
"The extra regressor must be known for both the history and for future dates. It thus must either be something that has known future values (such as `nfl_sunday`), or something that has separately been forecasted elsewhere. The weather regressors used in the notebook linked above is a good example of an extra regressor that has forecasts that can be used for future values. One can also use as a regressor another time series that has been forecasted with a time series model, such as Prophet. For instance, if `r(t)` is included as a regressor for `y(t)`, Prophet can be used to forecast `r(t)` and then that forecast can be plugged in as the future values when forecasting `y(t)`. A note of caution around this approach: This will probably not be useful unless `r(t)` is somehow easier to forecast then `y(t)`. This is because error in the forecast of `r(t)` will produce error in the forecast of `y(t)`. One setting where this can be useful is in hierarchical time series, where there is top-level forecast that has higher signal-to-noise and is thus easier to forecast. Its forecast can be included in the forecast for each lower-level series.\n",
|
||||
"\n",
|
||||
"Extra regressors are put in the linear component of the model, so the underlying model is that the time series depends on the extra regressor as either an additive or multiplicative factor (see the next section for multiplicativity)."
|
||||
"Extra regressors are put in the linear component of the model, so the underlying model is that the time series depends on the extra regressor as either an additive or multiplicative factor (see the next section for multiplicativity).\n",
|
||||
"\n",
|
||||
"#### Coefficients of additional regressors\n",
|
||||
"\n",
|
||||
"To extract the beta coefficients of the extra regressors, use the utility function `regressor_coefficients` (`from fbprophet.utilities import regressor_coefficients` in Python, `prophet::regressor_coefficients` in R) on the fitted model. The estimated beta coefficient for each regressor roughly represents the increase in prediction value for a unit increase in the regressor value (note that the coefficients returned are always on the scale of the original data). If `mcmc_samples` is specified, a credible interval for each coefficient is also returned, which can help identify whether each regressor is \"statistically significant\"."
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -1225,4 +1229,4 @@
|
|||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
}
|
||||
}
|
||||
Loading…
Reference in a new issue