prophet/R/R/prophet.R
2017-09-01 16:51:55 -07:00

1730 lines
55 KiB
R

## Copyright (c) 2017-present, Facebook, Inc.
## All rights reserved.
## This source code is licensed under the BSD-style license found in the
## LICENSE file in the root directory of this source tree. An additional grant
## of patent rights can be found in the PATENTS file in the same directory.
## Makes R CMD CHECK happy due to dplyr syntax below
globalVariables(c(
"ds", "y", "cap", ".",
"component", "dow", "doy", "holiday", "holidays", "holidays_lower", "holidays_upper", "ix",
"lower", "n", "stat", "trend", "row_number", "extra_regressors", "col",
"trend_lower", "trend_upper", "upper", "value", "weekly", "weekly_lower", "weekly_upper",
"x", "yearly", "yearly_lower", "yearly_upper", "yhat", "yhat_lower", "yhat_upper"))
#' Prophet forecaster.
#'
#' @param df (optional) Dataframe containing the history. Must have columns ds
#' (date type) and y, the time series. If growth is logistic, then df must
#' also have a column cap that specifies the capacity at each ds. If not
#' provided, then the model object will be instantiated but not fit; use
#' fit.prophet(m, df) to fit the model.
#' @param growth String 'linear' or 'logistic' to specify a linear or logistic
#' trend.
#' @param changepoints Vector of dates at which to include potential
#' changepoints. If not specified, potential changepoints are selected
#' automatically.
#' @param n.changepoints Number of potential changepoints to include. Not used
#' if input `changepoints` is supplied. If `changepoints` is not supplied,
#' then n.changepoints potential changepoints are selected uniformly from the
#' first 80 percent of df$ds.
#' @param yearly.seasonality Fit yearly seasonality. Can be 'auto', TRUE,
#' FALSE, or a number of Fourier terms to generate.
#' @param weekly.seasonality Fit weekly seasonality. Can be 'auto', TRUE,
#' FALSE, or a number of Fourier terms to generate.
#' @param daily.seasonality Fit daily seasonality. Can be 'auto', TRUE,
#' FALSE, or a number of Fourier terms to generate.
#' @param holidays data frame with columns holiday (character) and ds (date
#' type)and optionally columns lower_window and upper_window which specify a
#' range of days around the date to be included as holidays. lower_window=-2
#' will include 2 days prior to the date as holidays. Also optionally can have
#' a column prior_scale specifying the prior scale for each holiday.
#' @param seasonality.prior.scale Parameter modulating the strength of the
#' seasonality model. Larger values allow the model to fit larger seasonal
#' fluctuations, smaller values dampen the seasonality. Can be specified for
#' individual seasonalities using add_seasonality.
#' @param holidays.prior.scale Parameter modulating the strength of the holiday
#' components model, unless overridden in the holidays input.
#' @param changepoint.prior.scale Parameter modulating the flexibility of the
#' automatic changepoint selection. Large values will allow many changepoints,
#' small values will allow few changepoints.
#' @param mcmc.samples Integer, if greater than 0, will do full Bayesian
#' inference with the specified number of MCMC samples. If 0, will do MAP
#' estimation.
#' @param interval.width Numeric, width of the uncertainty intervals provided
#' for the forecast. If mcmc.samples=0, this will be only the uncertainty
#' in the trend using the MAP estimate of the extrapolated generative model.
#' If mcmc.samples>0, this will be integrated over all model parameters,
#' which will include uncertainty in seasonality.
#' @param uncertainty.samples Number of simulated draws used to estimate
#' uncertainty intervals.
#' @param fit Boolean, if FALSE the model is initialized but not fit.
#' @param ... Additional arguments, passed to \code{\link{fit.prophet}}
#'
#' @return A prophet model.
#'
#' @examples
#' \dontrun{
#' history <- data.frame(ds = seq(as.Date('2015-01-01'), as.Date('2016-01-01'), by = 'd'),
#' y = sin(1:366/200) + rnorm(366)/10)
#' m <- prophet(history)
#' }
#'
#' @export
#' @importFrom dplyr "%>%"
#' @import Rcpp
prophet <- function(df = NULL,
growth = 'linear',
changepoints = NULL,
n.changepoints = 25,
yearly.seasonality = 'auto',
weekly.seasonality = 'auto',
daily.seasonality = 'auto',
holidays = NULL,
seasonality.prior.scale = 10,
holidays.prior.scale = 10,
changepoint.prior.scale = 0.05,
mcmc.samples = 0,
interval.width = 0.80,
uncertainty.samples = 1000,
fit = TRUE,
...
) {
# fb-block 1
if (!is.null(changepoints)) {
n.changepoints <- length(changepoints)
}
m <- list(
growth = growth,
changepoints = changepoints,
n.changepoints = n.changepoints,
yearly.seasonality = yearly.seasonality,
weekly.seasonality = weekly.seasonality,
daily.seasonality = daily.seasonality,
holidays = holidays,
seasonality.prior.scale = seasonality.prior.scale,
changepoint.prior.scale = changepoint.prior.scale,
holidays.prior.scale = holidays.prior.scale,
mcmc.samples = mcmc.samples,
interval.width = interval.width,
uncertainty.samples = uncertainty.samples,
specified.changepoints = !is.null(changepoints),
start = NULL, # This and following attributes are set during fitting
y.scale = NULL,
logistic.floor = FALSE,
t.scale = NULL,
changepoints.t = NULL,
seasonalities = list(),
extra_regressors = list(),
stan.fit = NULL,
params = list(),
history = NULL,
history.dates = NULL
)
validate_inputs(m)
class(m) <- append("prophet", class(m))
if ((fit) && (!is.null(df))) {
m <- fit.prophet(m, df, ...)
}
# fb-block 2
return(m)
}
#' Validates the inputs to Prophet.
#'
#' @param m Prophet object.
#'
#' @keywords internal
validate_inputs <- function(m) {
if (!(m$growth %in% c('linear', 'logistic'))) {
stop("Parameter 'growth' should be 'linear' or 'logistic'.")
}
if (!is.null(m$holidays)) {
if (!(exists('holiday', where = m$holidays))) {
stop('Holidays dataframe must have holiday field.')
}
if (!(exists('ds', where = m$holidays))) {
stop('Holidays dataframe must have ds field.')
}
has.lower <- exists('lower_window', where = m$holidays)
has.upper <- exists('upper_window', where = m$holidays)
if (has.lower + has.upper == 1) {
stop(paste('Holidays must have both lower_window and upper_window,',
'or neither.'))
}
if (has.lower) {
if(max(m$holidays$lower_window, na.rm=TRUE) > 0) {
stop('Holiday lower_window should be <= 0')
}
if(min(m$holidays$upper_window, na.rm=TRUE) < 0) {
stop('Holiday upper_window should be >= 0')
}
}
for (h in unique(m$holidays$holiday)) {
validate_column_name(m, h, check_holidays = FALSE)
}
}
}
#' Validates the name of a seasonality, holiday, or regressor.
#'
#' @param m Prophet object.
#' @param name string
#' @param check_holidays bool check if name already used for holiday
#' @param check_seasonalities bool check if name already used for seasonality
#' @param check_regressors bool check if name already used for regressor
#'
#' @keywords internal
validate_column_name <- function(
m, name, check_holidays = TRUE, check_seasonalities = TRUE,
check_regressors = TRUE
) {
if (grepl("_delim_", name)) {
stop('Holiday name cannot contain "_delim_"')
}
reserved_names = c(
'trend', 'seasonal', 'seasonalities', 'daily', 'weekly', 'yearly',
'holidays', 'zeros', 'extra_regressors', 'yhat'
)
rn_l = paste(reserved_names,"_lower",sep="")
rn_u = paste(reserved_names,"_upper",sep="")
reserved_names = c(reserved_names, rn_l, rn_u,
c("ds", "y", "cap", "floor", "y_scaled", "cap_scaled"))
if(name %in% reserved_names){
stop("Name ", name, " is reserved.")
}
if(check_holidays & !is.null(m$holidays) &
(name %in% unique(m$holidays$holiday))){
stop("Name ", name, " already used for a holiday.")
}
if(check_seasonalities & (!is.null(m$seasonalities[[name]]))){
stop("Name ", name, " already used for a seasonality.")
}
if(check_regressors & (!is.null(m$seasonalities[[name]]))){
stop("Name ", name, " already used for an added regressor.")
}
}
#' Load compiled Stan model
#'
#' @param model String 'linear' or 'logistic' to specify a linear or logistic
#' trend.
#'
#' @return Stan model.
#'
#' @keywords internal
get_prophet_stan_model <- function(model) {
fn <- paste('prophet', model, 'growth.RData', sep = '_')
## If the cached model doesn't work, just compile a new one.
tryCatch({
binary <- system.file('libs', Sys.getenv('R_ARCH'), fn,
package = 'prophet',
mustWork = TRUE)
load(binary)
obj.name <- paste(model, 'growth.stanm', sep = '.')
stanm <- eval(parse(text = obj.name))
## Should cause an error if the model doesn't work.
stanm@mk_cppmodule(stanm)
stanm
}, error = function(cond) {
compile_stan_model(model)
})
}
#' Compile Stan model
#'
#' @param model String 'linear' or 'logistic' to specify a linear or logistic
#' trend.
#'
#' @return Stan model.
#'
#' @keywords internal
compile_stan_model <- function(model) {
fn <- paste('stan/prophet', model, 'growth.stan', sep = '_')
stan.src <- system.file(fn, package = 'prophet', mustWork = TRUE)
stanc <- rstan::stanc(stan.src)
model.name <- paste(model, 'growth', sep = '_')
return(rstan::stan_model(stanc_ret = stanc, model_name = model.name))
}
#' Convert date vector
#'
#' Convert the date to POSIXct object
#'
#' @param ds Date vector, can be consisted of characters
#' @param tz string time zone
#'
#' @return vector of POSIXct object converted from date
#'
#' @keywords internal
set_date <- function(ds = NULL, tz = "GMT") {
if (length(ds) == 0) {
return(NULL)
}
if (is.factor(ds)) {
ds <- as.character(ds)
}
if (min(nchar(ds)) < 12) {
ds <- as.POSIXct(ds, format = "%Y-%m-%d", tz = tz)
} else {
ds <- as.POSIXct(ds, format = "%Y-%m-%d %H:%M:%S", tz = tz)
}
attr(ds, "tzone") <- tz
return(ds)
}
#' Time difference between datetimes
#'
#' Compute time difference of two POSIXct objects
#'
#' @param ds1 POSIXct object
#' @param ds2 POSIXct object
#' @param units string units of difference, e.g. 'days' or 'secs'.
#'
#' @return numeric time difference
#'
#' @keywords internal
time_diff <- function(ds1, ds2, units = "days") {
return(as.numeric(difftime(ds1, ds2, units = units)))
}
#' Prepare dataframe for fitting or predicting.
#'
#' Adds a time index and scales y. Creates auxillary columns 't', 't_ix',
#' 'y_scaled', and 'cap_scaled'. These columns are used during both fitting
#' and predicting.
#'
#' @param m Prophet object.
#' @param df Data frame with columns ds, y, and cap if logistic growth. Any
#' specified additional regressors must also be present.
#' @param initialize_scales Boolean set scaling factors in m from df.
#'
#' @return list with items 'df' and 'm'.
#'
#' @keywords internal
setup_dataframe <- function(m, df, initialize_scales = FALSE) {
if (exists('y', where=df)) {
df$y <- as.numeric(df$y)
}
if (any(is.infinite(df$y))) {
stop("Found infinity in column y.")
}
df$ds <- set_date(df$ds)
if (anyNA(df$ds)) {
stop(paste('Unable to parse date format in column ds. Convert to date ',
'format. Either %Y-%m-%d or %Y-%m-%d %H:%M:%S'))
}
for (name in names(m$extra_regressors)) {
if (!(name %in% colnames(df))) {
stop('Regressor "', name, '" missing from dataframe')
}
}
df <- df %>%
dplyr::arrange(ds)
m <- initialize_scales_fn(m, initialize_scales, df)
if (m$logistic.floor) {
if (!('floor' %in% colnames(df))) {
stop("Expected column 'floor'.")
}
} else {
df$floor <- 0
}
if (m$growth == 'logistic') {
if (!(exists('cap', where=df))) {
stop('Capacities must be supplied for logistic growth.')
}
df <- df %>%
dplyr::mutate(cap_scaled = (cap - floor) / m$y.scale)
}
df$t <- time_diff(df$ds, m$start, "secs") / m$t.scale
if (exists('y', where=df)) {
df$y_scaled <- (df$y - df$floor) / m$y.scale
}
for (name in names(m$extra_regressors)) {
df[[name]] <- as.numeric(df[[name]])
props <- m$extra_regressors[[name]]
df[[name]] <- (df[[name]] - props$mu) / props$std
if (anyNA(df[[name]])) {
stop('Found NaN in column ', name)
}
}
return(list("m" = m, "df" = df))
}
#' Initialize model scales.
#'
#' Sets model scaling factors using df.
#'
#' @param m Prophet object.
#' @param initialize_scales Boolean set the scales or not.
#' @param df Dataframe for setting scales.
#'
#' @return Prophet object with scales set.
#'
#' @keywords internal
initialize_scales_fn <- function(m, initialize_scales, df) {
if (!initialize_scales) {
return(m)
}
if ((m$growth == 'logistic') && ('floor' %in% colnames(df))) {
m$logistic.floor <- TRUE
floor <- df$floor
} else {
floor <- 0
}
m$y.scale <- max(abs(df$y - floor))
if (m$y.scale == 0) {
m$y.scale <- 1
}
m$start <- min(df$ds)
m$t.scale <- time_diff(max(df$ds), m$start, "secs")
for (name in names(m$extra_regressors)) {
standardize <- m$extra_regressors[[name]]$standardize
if (standardize == 'auto') {
if (all(sort(unique(df[[name]])) == c(0, 1))) {
# Don't standardize binary variables
standardize <- FALSE
} else {
standardize <- TRUE
}
}
if (standardize) {
mu <- mean(df[[name]])
std <- stats::sd(df[[name]])
if (std == 0) {
std <- mu
}
m$extra_regressors[[name]]$mu <- mu
m$extra_regressors[[name]]$std <- std
}
}
return(m)
}
#' Set changepoints
#'
#' Sets m$changepoints to the dates of changepoints. Either:
#' 1) The changepoints were passed in explicitly.
#' A) They are empty.
#' B) They are not empty, and need validation.
#' 2) We are generating a grid of them.
#' 3) The user prefers no changepoints be used.
#'
#' @param m Prophet object.
#'
#' @return m with changepoints set.
#'
#' @keywords internal
set_changepoints <- function(m) {
if (!is.null(m$changepoints)) {
if (length(m$changepoints) > 0) {
m$changepoints <- set_date(m$changepoints)
if (min(m$changepoints) < min(m$history$ds)
|| max(m$changepoints) > max(m$history$ds)) {
stop('Changepoints must fall within training data.')
}
}
} else {
# Place potential changepoints evenly through the first 80 pcnt of
# the history.
hist.size <- floor(nrow(m$history) * .8)
if (m$n.changepoints + 1 > hist.size) {
m$n.changepoints <- hist.size - 1
message('n.changepoints greater than number of observations. Using ',
m$n.changepoints)
}
if (m$n.changepoints > 0) {
cp.indexes <- round(seq.int(1, hist.size,
length.out = (m$n.changepoints + 1))[-1])
m$changepoints <- m$history$ds[cp.indexes]
} else {
m$changepoints <- c()
}
}
if (length(m$changepoints) > 0) {
m$changepoints.t <- sort(
time_diff(m$changepoints, m$start, "secs")) / m$t.scale
} else {
m$changepoints.t <- c(0) # dummy changepoint
}
return(m)
}
#' Gets changepoint matrix for history dataframe.
#'
#' @param m Prophet object.
#'
#' @return array of indexes.
#'
#' @keywords internal
get_changepoint_matrix <- function(m) {
A <- matrix(0, nrow(m$history), length(m$changepoints.t))
for (i in 1:length(m$changepoints.t)) {
A[m$history$t >= m$changepoints.t[i], i] <- 1
}
return(A)
}
#' Provides Fourier series components with the specified frequency and order.
#'
#' @param dates Vector of dates.
#' @param period Number of days of the period.
#' @param series.order Number of components.
#'
#' @return Matrix with seasonality features.
#'
#' @keywords internal
fourier_series <- function(dates, period, series.order) {
t <- time_diff(dates, set_date('1970-01-01 00:00:00'))
features <- matrix(0, length(t), 2 * series.order)
for (i in 1:series.order) {
x <- as.numeric(2 * i * pi * t / period)
features[, i * 2 - 1] <- sin(x)
features[, i * 2] <- cos(x)
}
return(features)
}
#' Data frame with seasonality features.
#'
#' @param dates Vector of dates.
#' @param period Number of days of the period.
#' @param series.order Number of components.
#' @param prefix Column name prefix.
#'
#' @return Dataframe with seasonality.
#'
#' @keywords internal
make_seasonality_features <- function(dates, period, series.order, prefix) {
features <- fourier_series(dates, period, series.order)
colnames(features) <- paste(prefix, 1:ncol(features), sep = '_delim_')
return(data.frame(features))
}
#' Construct a matrix of holiday features.
#'
#' @param m Prophet object.
#' @param dates Vector with dates used for computing seasonality.
#'
#' @return A list with entries
#' holiday.features: dataframe with a column for each holiday.
#' prior.scales: array of prior scales for each holiday column.
#'
#' @importFrom dplyr "%>%"
#' @keywords internal
make_holiday_features <- function(m, dates) {
# Strip dates to be just days, for joining on holidays
dates <- set_date(format(dates, "%Y-%m-%d"))
wide <- m$holidays %>%
dplyr::mutate(ds = set_date(ds)) %>%
dplyr::group_by(holiday, ds) %>%
dplyr::filter(row_number() == 1) %>%
dplyr::do({
if (exists('lower_window', where = .) && !is.na(.$lower_window)
&& !is.na(.$upper_window)) {
offsets <- seq(.$lower_window, .$upper_window)
} else {
offsets <- c(0)
}
names <- paste(.$holiday, '_delim_', ifelse(offsets < 0, '-', '+'),
abs(offsets), sep = '')
dplyr::data_frame(ds = .$ds + offsets * 24 * 3600, holiday = names)
}) %>%
dplyr::mutate(x = 1.) %>%
tidyr::spread(holiday, x, fill = 0)
holiday.features <- data.frame(ds = set_date(dates)) %>%
dplyr::left_join(wide, by = 'ds') %>%
dplyr::select(-ds)
holiday.features[is.na(holiday.features)] <- 0
# Prior scales
if (!('prior_scale' %in% colnames(m$holidays))) {
m$holidays$prior_scale <- m$holidays.prior.scale
}
prior.scales.list <- list()
for (name in unique(m$holidays$holiday)) {
df.h <- m$holidays[m$holidays$holiday == name, ]
ps <- unique(df.h$prior_scale)
if (length(ps) > 1) {
stop('Holiday ', name, ' does not have a consistent prior scale ',
'specification')
}
if (is.na(ps)) {
ps <- m$holidays.prior.scale
}
if (ps <= 0) {
stop('Prior scale must be > 0.')
}
prior.scales.list[[name]] <- ps
}
prior.scales <- c()
for (name in colnames(holiday.features)) {
sn <- strsplit(name, '_delim_', fixed = TRUE)[[1]][1]
prior.scales <- c(prior.scales, prior.scales.list[[sn]])
}
return(list(holiday.features = holiday.features,
prior.scales = prior.scales))
}
#' Add an additional regressor to be used for fitting and predicting.
#'
#' The dataframe passed to `fit` and `predict` will have a column with the
#' specified name to be used as a regressor. When standardize='auto', the
#' regressor will be standardized unless it is binary. The regression
#' coefficient is given a prior with the specified scale parameter.
#' Decreasing the prior scale will add additional regularization. If no
#' prior scale is provided, holidays.prior.scale will be used.
#'
#' @param m Prophet object.
#' @param name String name of the regressor
#' @param prior.scale Float scale for the normal prior. If not provided,
#' holidays.prior.scale will be used.
#' @param standardize Bool, specify whether this regressor will be standardized
#' prior to fitting. Can be 'auto' (standardize if not binary), True, or
#' False.
#'
#' @return The prophet model with the regressor added.
#'
#' @export
add_regressor <- function(m, name, prior.scale = NULL, standardize = 'auto'){
if (!is.null(m$history)) {
stop('Regressors must be added prior to model fitting.')
}
validate_column_name(m, name, check_regressors = FALSE)
if (is.null(prior.scale)) {
prior.scale <- m$holidays.prior.scale
}
if(prior.scale <= 0) {
stop("Prior scale must be > 0")
}
m$extra_regressors[[name]] <- list(
prior.scale = prior.scale,
standardize = standardize,
mu = 0,
std = 1.0
)
return(m)
}
#' Add a seasonal component with specified period, number of Fourier
#' components, and prior scale.
#'
#' Increasing the number of Fourier components allows the seasonality to change
#' more quickly (at risk of overfitting). Default values for yearly and weekly
#' seasonalities are 10 and 3 respectively.
#'
#' Increasing prior scale will allow this seasonality component more
#' flexibility, decreasing will dampen it. If not provided, will use the
#' seasonality.prior.scale provided on Prophet initialization (defaults to 10).
#'
#' @param m Prophet object.
#' @param name String name of the seasonality component.
#' @param period Float number of days in one period.
#' @param fourier.order Int number of Fourier components to use.
#' @param prior.scale Float prior scale for this component.
#'
#' @return The prophet model with the seasonality added.
#'
#' @importFrom dplyr "%>%"
#' @export
add_seasonality <- function(m, name, period, fourier.order, prior.scale = NULL) {
if (!is.null(m$history)) {
stop("Seasonality must be added prior to model fitting.")
}
if (!(name %in% c('daily', 'weekly', 'yearly'))) {
# Allow overriding built-in seasonalities
validate_column_name(m, name, check_seasonalities = FALSE)
}
if (is.null(prior.scale)) {
ps <- m$seasonality.prior.scale
} else {
ps <- prior.scale
}
if (ps <= 0) {
stop('Prior scale must be > 0')
}
m$seasonalities[[name]] <- list(
period = period,
fourier.order = fourier.order,
prior.scale = ps
)
return(m)
}
#' Dataframe with seasonality features.
#' Includes seasonality features, holiday features, and added regressors.
#'
#' @param m Prophet object.
#' @param df Dataframe with dates for computing seasonality features and any
#' added regressors.
#'
#' @return List with items
#' seasonal.features: Dataframe with regressor features,
#' prior.scales: Array of prior scales for each colum of the features
#' dataframe.
#'
#' @keywords internal
make_all_seasonality_features <- function(m, df) {
seasonal.features <- data.frame(row.names = 1:nrow(df))
prior.scales <- c()
# Seasonality features
for (name in names(m$seasonalities)) {
props <- m$seasonalities[[name]]
features <- make_seasonality_features(
df$ds, props$period, props$fourier.order, name)
seasonal.features <- cbind(seasonal.features, features)
prior.scales <- c(prior.scales,
props$prior.scale * rep(1, ncol(features)))
}
# Holiday features
if (!is.null(m$holidays)) {
hf <- make_holiday_features(m, df$ds)
seasonal.features <- cbind(seasonal.features, hf$holiday.features)
prior.scales <- c(prior.scales, hf$prior.scales)
}
# Additional regressors
for (name in names(m$extra_regressors)) {
seasonal.features[[name]] <- df[[name]]
prior.scales <- c(prior.scales, m$extra_regressors[[name]]$prior.scale)
}
if (ncol(seasonal.features) == 0) {
seasonal.features <- data.frame(zeros = rep(0, nrow(df)))
prior.scales <- c(1.)
}
return(list(seasonal.features = seasonal.features,
prior.scales = prior.scales))
}
#' Get number of Fourier components for built-in seasonalities.
#'
#' @param m Prophet object.
#' @param name String name of the seasonality component.
#' @param arg 'auto', TRUE, FALSE, or number of Fourier components as
#' provided.
#' @param auto.disable Bool if seasonality should be disabled when 'auto'.
#' @param default.order Int default Fourier order.
#'
#' @return Number of Fourier components, or 0 for disabled.
#'
#' @keywords internal
parse_seasonality_args <- function(m, name, arg, auto.disable, default.order) {
if (arg == 'auto') {
fourier.order <- 0
if (name %in% names(m$seasonalities)) {
message('Found custom seasonality named "', name,
'", disabling built-in ', name, ' seasonality.')
} else if (auto.disable) {
message('Disabling ', name, ' seasonality. Run prophet with ', name,
'.seasonality=TRUE to override this.')
} else {
fourier.order <- default.order
}
} else if (arg == TRUE) {
fourier.order <- default.order
} else if (arg == FALSE) {
fourier.order <- 0
} else {
fourier.order <- arg
}
return(fourier.order)
}
#' Set seasonalities that were left on auto.
#'
#' Turns on yearly seasonality if there is >=2 years of history.
#' Turns on weekly seasonality if there is >=2 weeks of history, and the
#' spacing between dates in the history is <7 days.
#' Turns on daily seasonality if there is >=2 days of history, and the spacing
#' between dates in the history is <1 day.
#'
#' @param m Prophet object.
#'
#' @return The prophet model with seasonalities set.
#'
#' @keywords internal
set_auto_seasonalities <- function(m) {
first <- min(m$history$ds)
last <- max(m$history$ds)
dt <- diff(time_diff(m$history$ds, m$start))
min.dt <- min(dt[dt > 0])
yearly.disable <- time_diff(last, first) < 730
fourier.order <- parse_seasonality_args(
m, 'yearly', m$yearly.seasonality, yearly.disable, 10)
if (fourier.order > 0) {
m$seasonalities[['yearly']] <- list(
period = 365.25,
fourier.order = fourier.order,
prior.scale = m$seasonality.prior.scale
)
}
weekly.disable <- ((time_diff(last, first) < 14) || (min.dt >= 7))
fourier.order <- parse_seasonality_args(
m, 'weekly', m$weekly.seasonality, weekly.disable, 3)
if (fourier.order > 0) {
m$seasonalities[['weekly']] <- list(
period = 7,
fourier.order = fourier.order,
prior.scale = m$seasonality.prior.scale
)
}
daily.disable <- ((time_diff(last, first) < 2) || (min.dt >= 1))
fourier.order <- parse_seasonality_args(
m, 'daily', m$daily.seasonality, daily.disable, 4)
if (fourier.order > 0) {
m$seasonalities[['daily']] <- list(
period = 1,
fourier.order = fourier.order,
prior.scale = m$seasonality.prior.scale
)
}
return(m)
}
#' Initialize linear growth.
#'
#' Provides a strong initialization for linear growth by calculating the
#' growth and offset parameters that pass the function through the first and
#' last points in the time series.
#'
#' @param df Data frame with columns ds (date), y_scaled (scaled time series),
#' and t (scaled time).
#'
#' @return A vector (k, m) with the rate (k) and offset (m) of the linear
#' growth function.
#'
#' @keywords internal
linear_growth_init <- function(df) {
i0 <- which.min(df$ds)
i1 <- which.max(df$ds)
T <- df$t[i1] - df$t[i0]
# Initialize the rate
k <- (df$y_scaled[i1] - df$y_scaled[i0]) / T
# And the offset
m <- df$y_scaled[i0] - k * df$t[i0]
return(c(k, m))
}
#' Initialize logistic growth.
#'
#' Provides a strong initialization for logistic growth by calculating the
#' growth and offset parameters that pass the function through the first and
#' last points in the time series.
#'
#' @param df Data frame with columns ds (date), cap_scaled (scaled capacity),
#' y_scaled (scaled time series), and t (scaled time).
#'
#' @return A vector (k, m) with the rate (k) and offset (m) of the logistic
#' growth function.
#'
#' @keywords internal
logistic_growth_init <- function(df) {
i0 <- which.min(df$ds)
i1 <- which.max(df$ds)
T <- df$t[i1] - df$t[i0]
# Force valid values, in case y > cap or y < 0
C0 <- df$cap_scaled[i0]
C1 <- df$cap_scaled[i1]
y0 <- max(0.01 * C0, min(0.99 * C0, df$y_scaled[i0]))
y1 <- max(0.01 * C1, min(0.99 * C1, df$y_scaled[i1]))
r0 <- C0 / y0
r1 <- C1 / y1
if (abs(r0 - r1) <= 0.01) {
r0 <- 1.05 * r0
}
L0 <- log(r0 - 1)
L1 <- log(r1 - 1)
# Initialize the offset
m <- L0 * T / (L0 - L1)
# And the rate
k <- (L0 - L1) / T
return(c(k, m))
}
#' Fit the prophet model.
#'
#' This sets m$params to contain the fitted model parameters. It is a list
#' with the following elements:
#' k (M array): M posterior samples of the initial slope.
#' m (M array): The initial intercept.
#' delta (MxN matrix): The slope change at each of N changepoints.
#' beta (MxK matrix): Coefficients for K seasonality features.
#' sigma_obs (M array): Noise level.
#' Note that M=1 if MAP estimation.
#'
#' @param m Prophet object.
#' @param df Data frame.
#' @param ... Additional arguments passed to the \code{optimizing} or
#' \code{sampling} functions in Stan.
#'
#' @export
fit.prophet <- function(m, df, ...) {
if (!is.null(m$history)) {
stop("Prophet object can only be fit once. Instantiate a new object.")
}
history <- df %>%
dplyr::filter(!is.na(y))
if (nrow(history) < 2) {
stop("Dataframe has less than 2 non-NA rows.")
}
m$history.dates <- sort(set_date(df$ds))
out <- setup_dataframe(m, history, initialize_scales = TRUE)
history <- out$df
m <- out$m
m$history <- history
m <- set_auto_seasonalities(m)
out2 <- make_all_seasonality_features(m, history)
seasonal.features <- out2$seasonal.features
prior.scales <- out2$prior.scales
m <- set_changepoints(m)
A <- get_changepoint_matrix(m)
# Construct input to stan
dat <- list(
T = nrow(history),
K = ncol(seasonal.features),
S = length(m$changepoints.t),
y = history$y_scaled,
t = history$t,
A = A,
t_change = array(m$changepoints.t),
X = as.matrix(seasonal.features),
sigmas = array(prior.scales),
tau = m$changepoint.prior.scale
)
# Run stan
if (m$growth == 'linear') {
kinit <- linear_growth_init(history)
} else {
dat$cap <- history$cap_scaled # Add capacities to the Stan data
kinit <- logistic_growth_init(history)
}
if (exists(".prophet.stan.models")) {
model <- .prophet.stan.models[[m$growth]]
} else {
model <- get_prophet_stan_model(m$growth)
}
stan_init <- function() {
list(k = kinit[1],
m = kinit[2],
delta = array(rep(0, length(m$changepoints.t))),
beta = array(rep(0, ncol(seasonal.features))),
sigma_obs = 1
)
}
if (min(history$y) == max(history$y)) {
# Nothing to fit.
m$params <- stan_init()
m$params$sigma_obs <- 0.
n.iteration <- 1.
} else if (m$mcmc.samples > 0) {
stan.fit <- rstan::sampling(
model,
data = dat,
init = stan_init,
iter = m$mcmc.samples,
...
)
m$params <- rstan::extract(stan.fit)
n.iteration <- length(m$params$k)
} else {
stan.fit <- rstan::optimizing(
model,
data = dat,
init = stan_init,
iter = 1e4,
as_vector = FALSE,
...
)
m$params <- stan.fit$par
n.iteration <- 1
}
# Cast the parameters to have consistent form, whether full bayes or MAP
for (name in c('delta', 'beta')){
m$params[[name]] <- matrix(m$params[[name]], nrow = n.iteration)
}
# rstan::sampling returns 1d arrays; converts to atomic vectors.
for (name in c('k', 'm', 'sigma_obs')){
m$params[[name]] <- c(m$params[[name]])
}
# If no changepoints were requested, replace delta with 0s
if (m$n.changepoints == 0) {
# Fold delta into the base rate k
m$params$k <- m$params$k + m$params$delta[, 1]
m$params$delta <- matrix(rep(0, length(m$params$delta)), nrow = n.iteration)
}
return(m)
}
#' Predict using the prophet model.
#'
#' @param object Prophet object.
#' @param df Dataframe with dates for predictions (column ds), and capacity
#' (column cap) if logistic growth. If not provided, predictions are made on
#' the history.
#' @param ... additional arguments.
#'
#' @return A dataframe with the forecast components.
#'
#' @examples
#' \dontrun{
#' history <- data.frame(ds = seq(as.Date('2015-01-01'), as.Date('2016-01-01'), by = 'd'),
#' y = sin(1:366/200) + rnorm(366)/10)
#' m <- prophet(history)
#' future <- make_future_dataframe(m, periods = 365)
#' forecast <- predict(m, future)
#' plot(m, forecast)
#' }
#'
#' @export
predict.prophet <- function(object, df = NULL, ...) {
if (is.null(df)) {
df <- object$history
} else {
if (nrow(df) == 0) {
stop("Dataframe has no rows.")
}
out <- setup_dataframe(object, df)
df <- out$df
}
df$trend <- predict_trend(object, df)
seasonal.components <- predict_seasonal_components(object, df)
intervals <- predict_uncertainty(object, df)
# Drop columns except ds, cap, floor, and trend
cols <- c('ds', 'trend')
if ('cap' %in% colnames(df)) {
cols <- c(cols, 'cap')
}
if (object$logistic.floor) {
cols <- c(cols, 'floor')
}
df <- df[cols]
df <- df %>%
dplyr::bind_cols(seasonal.components) %>%
dplyr::bind_cols(intervals)
df$yhat <- df$trend + df$seasonal
return(df)
}
#' Evaluate the piecewise linear function.
#'
#' @param t Vector of times on which the function is evaluated.
#' @param deltas Vector of rate changes at each changepoint.
#' @param k Float initial rate.
#' @param m Float initial offset.
#' @param changepoint.ts Vector of changepoint times.
#'
#' @return Vector y(t).
#'
#' @keywords internal
piecewise_linear <- function(t, deltas, k, m, changepoint.ts) {
# Intercept changes
gammas <- -changepoint.ts * deltas
# Get cumulative slope and intercept at each t
k_t <- rep(k, length(t))
m_t <- rep(m, length(t))
for (s in 1:length(changepoint.ts)) {
indx <- t >= changepoint.ts[s]
k_t[indx] <- k_t[indx] + deltas[s]
m_t[indx] <- m_t[indx] + gammas[s]
}
y <- k_t * t + m_t
return(y)
}
#' Evaluate the piecewise logistic function.
#'
#' @param t Vector of times on which the function is evaluated.
#' @param cap Vector of capacities at each t.
#' @param deltas Vector of rate changes at each changepoint.
#' @param k Float initial rate.
#' @param m Float initial offset.
#' @param changepoint.ts Vector of changepoint times.
#'
#' @return Vector y(t).
#'
#' @keywords internal
piecewise_logistic <- function(t, cap, deltas, k, m, changepoint.ts) {
# Compute offset changes
k.cum <- c(k, cumsum(deltas) + k)
gammas <- rep(0, length(changepoint.ts))
for (i in 1:length(changepoint.ts)) {
gammas[i] <- ((changepoint.ts[i] - m - sum(gammas))
* (1 - k.cum[i] / k.cum[i + 1]))
}
# Get cumulative rate and offset at each t
k_t <- rep(k, length(t))
m_t <- rep(m, length(t))
for (s in 1:length(changepoint.ts)) {
indx <- t >= changepoint.ts[s]
k_t[indx] <- k_t[indx] + deltas[s]
m_t[indx] <- m_t[indx] + gammas[s]
}
y <- cap / (1 + exp(-k_t * (t - m_t)))
return(y)
}
#' Predict trend using the prophet model.
#'
#' @param model Prophet object.
#' @param df Prediction dataframe.
#'
#' @return Vector with trend on prediction dates.
#'
#' @keywords internal
predict_trend <- function(model, df) {
k <- mean(model$params$k, na.rm = TRUE)
param.m <- mean(model$params$m, na.rm = TRUE)
deltas <- colMeans(model$params$delta, na.rm = TRUE)
t <- df$t
if (model$growth == 'linear') {
trend <- piecewise_linear(t, deltas, k, param.m, model$changepoints.t)
} else {
cap <- df$cap_scaled
trend <- piecewise_logistic(
t, cap, deltas, k, param.m, model$changepoints.t)
}
return(trend * model$y.scale + df$floor)
}
#' Predict seasonality components, holidays, and added regressors.
#'
#' @param m Prophet object.
#' @param df Prediction dataframe.
#'
#' @return Dataframe with seasonal components.
#'
#' @keywords internal
predict_seasonal_components <- function(m, df) {
seasonal.features <- make_all_seasonality_features(m, df)$seasonal.features
lower.p <- (1 - m$interval.width)/2
upper.p <- (1 + m$interval.width)/2
components <- dplyr::data_frame(component = colnames(seasonal.features)) %>%
dplyr::mutate(col = 1:n()) %>%
tidyr::separate(component, c('component', 'part'), sep = "_delim_",
extra = "merge", fill = "right") %>%
dplyr::select(col, component)
# Add total for all regression components
components <- rbind(
components,
data.frame(col = 1:ncol(seasonal.features), component = 'seasonal'))
# Add totals for seasonality, holiday, and extra regressors
components <- add_group_component(
components, 'seasonalities', names(m$seasonalities))
if(!is.null(m$holidays)){
components <- add_group_component(
components, 'holidays', unique(m$holidays$holiday))
}
components <- add_group_component(
components, 'extra_regressors', names(m$extra_regressors))
# Remove the placeholder
components <- dplyr::filter(components, component != 'zeros')
component.predictions <- components %>%
dplyr::group_by(component) %>% dplyr::do({
comp <- (as.matrix(seasonal.features[, .$col])
%*% t(m$params$beta[, .$col, drop = FALSE])) * m$y.scale
dplyr::data_frame(ix = 1:nrow(seasonal.features),
mean = rowMeans(comp, na.rm = TRUE),
lower = apply(comp, 1, stats::quantile, lower.p,
na.rm = TRUE),
upper = apply(comp, 1, stats::quantile, upper.p,
na.rm = TRUE))
}) %>%
tidyr::gather(stat, value, mean, lower, upper) %>%
dplyr::mutate(stat = ifelse(stat == 'mean', '', paste0('_', stat))) %>%
tidyr::unite(component, component, stat, sep="") %>%
tidyr::spread(component, value) %>%
dplyr::select(-ix)
return(component.predictions)
}
#' Adds a component with given name that contains all of the components
#' in group.
#'
#' @param components Dataframe with components.
#' @param name Name of new group component.
#' @param group List of components that form the group.
#'
#' @return Dataframe with components.
#'
#' @keywords internal
add_group_component <- function(components, name, group) {
new_comp <- components[(components$component %in% group), ]
if (nrow(new_comp) > 0) {
new_comp$component <- name
components <- rbind(components, new_comp)
}
return(components)
}
#' Prophet posterior predictive samples.
#'
#' @param m Prophet object.
#' @param df Prediction dataframe.
#'
#' @return List with posterior predictive samples for each component.
#'
#' @keywords internal
sample_posterior_predictive <- function(m, df) {
# Sample trend, seasonality, and yhat from the extrapolation model.
n.iterations <- length(m$params$k)
samp.per.iter <- max(1, ceiling(m$uncertainty.samples / n.iterations))
nsamp <- n.iterations * samp.per.iter # The actual number of samples
seasonal.features <- make_all_seasonality_features(m, df)$seasonal.features
sim.values <- list("trend" = matrix(, nrow = nrow(df), ncol = nsamp),
"seasonal" = matrix(, nrow = nrow(df), ncol = nsamp),
"yhat" = matrix(, nrow = nrow(df), ncol = nsamp))
for (i in 1:n.iterations) {
# For each set of parameters from MCMC (or just 1 set for MAP),
for (j in 1:samp.per.iter) {
# Do a simulation with this set of parameters,
sim <- sample_model(m, df, seasonal.features, i)
# Store the results
for (key in c("trend", "seasonal", "yhat")) {
sim.values[[key]][,(i - 1) * samp.per.iter + j] <- sim[[key]]
}
}
}
return(sim.values)
}
#' Sample from the posterior predictive distribution.
#'
#' @param m Prophet object.
#' @param df Dataframe with dates for predictions (column ds), and capacity
#' (column cap) if logistic growth.
#'
#' @return A list with items "trend", "seasonal", and "yhat" containing
#' posterior predictive samples for that component. "seasonal" is the sum
#' of seasonalities, holidays, and added regressors.
#'
#' @export
predictive_samples <- function(m, df) {
df <- setup_dataframe(m, df)$df
sim.values <- sample_posterior_predictive(m, df)
return(sim.values)
}
#' Prophet uncertainty intervals for yhat and trend
#'
#' @param m Prophet object.
#' @param df Prediction dataframe.
#'
#' @return Dataframe with uncertainty intervals.
#'
#' @keywords internal
predict_uncertainty <- function(m, df) {
sim.values <- sample_posterior_predictive(m, df)
# Add uncertainty estimates
lower.p <- (1 - m$interval.width)/2
upper.p <- (1 + m$interval.width)/2
intervals <- cbind(
t(apply(t(sim.values$yhat), 2, stats::quantile, c(lower.p, upper.p),
na.rm = TRUE)),
t(apply(t(sim.values$trend), 2, stats::quantile, c(lower.p, upper.p),
na.rm = TRUE))
) %>% dplyr::as_data_frame()
colnames(intervals) <- paste(rep(c('yhat', 'trend'), each=2),
c('lower', 'upper'), sep = "_")
return(intervals)
}
#' Simulate observations from the extrapolated generative model.
#'
#' @param m Prophet object.
#' @param df Prediction dataframe.
#' @param seasonal.features Data frame of seasonal features
#' @param iteration Int sampling iteration to use parameters from.
#'
#' @return List of trend, seasonality, and yhat, each a vector like df$t.
#'
#' @keywords internal
sample_model <- function(m, df, seasonal.features, iteration) {
trend <- sample_predictive_trend(m, df, iteration)
beta <- m$params$beta[iteration,]
seasonal <- (as.matrix(seasonal.features) %*% beta) * m$y.scale
sigma <- m$params$sigma_obs[iteration]
noise <- stats::rnorm(nrow(df), mean = 0, sd = sigma) * m$y.scale
return(list("yhat" = trend + seasonal + noise,
"trend" = trend,
"seasonal" = seasonal))
}
#' Simulate the trend using the extrapolated generative model.
#'
#' @param model Prophet object.
#' @param df Prediction dataframe.
#' @param iteration Int sampling iteration to use parameters from.
#'
#' @return Vector of simulated trend over df$t.
#'
#' @keywords internal
sample_predictive_trend <- function(model, df, iteration) {
k <- model$params$k[iteration]
param.m <- model$params$m[iteration]
deltas <- model$params$delta[iteration,]
t <- df$t
T <- max(t)
if (T > 1) {
# Get the time discretization of the history
dt <- diff(model$history$t)
dt <- min(dt[dt > 0])
# Number of time periods in the future
N <- ceiling((T - 1) / dt)
S <- length(model$changepoints.t)
# The history had S split points, over t = [0, 1].
# The forecast is on [1, T], and should have the same average frequency of
# rate changes. Thus for N time periods in the future, we want an average
# of S * (T - 1) changepoints in expectation.
prob.change <- min(1, (S * (T - 1)) / N)
# This calculation works for both history and df not uniformly spaced.
n.changes <- stats::rbinom(1, N, prob.change)
# Sample ts
if (n.changes == 0) {
changepoint.ts.new <- c()
} else {
changepoint.ts.new <- sort(stats::runif(n.changes, min = 1, max = T))
}
} else {
changepoint.ts.new <- c()
n.changes <- 0
}
# Get the empirical scale of the deltas, plus epsilon to avoid NaNs.
lambda <- mean(abs(c(deltas))) + 1e-8
# Sample deltas
deltas.new <- extraDistr::rlaplace(n.changes, mu = 0, sigma = lambda)
# Combine with changepoints from the history
changepoint.ts <- c(model$changepoints.t, changepoint.ts.new)
deltas <- c(deltas, deltas.new)
# Get the corresponding trend
if (model$growth == 'linear') {
trend <- piecewise_linear(t, deltas, k, param.m, changepoint.ts)
} else {
cap <- df$cap_scaled
trend <- piecewise_logistic(t, cap, deltas, k, param.m, changepoint.ts)
}
return(trend * model$y.scale + df$floor)
}
#' Make dataframe with future dates for forecasting.
#'
#' @param m Prophet model object.
#' @param periods Int number of periods to forecast forward.
#' @param freq 'day', 'week', 'month', 'quarter', 'year', 1(1 sec), 60(1 minute) or 3600(1 hour).
#' @param include_history Boolean to include the historical dates in the data
#' frame for predictions.
#'
#' @return Dataframe that extends forward from the end of m$history for the
#' requested number of periods.
#'
#' @export
make_future_dataframe <- function(m, periods, freq = 'day',
include_history = TRUE) {
# For backwards compatability with previous zoo date type,
if (freq == 'm') {
freq <- 'month'
}
dates <- seq(max(m$history.dates), length.out = periods + 1, by = freq)
dates <- dates[2:(periods + 1)] # Drop the first, which is max(history$ds)
if (include_history) {
dates <- c(m$history.dates, dates)
attr(dates, "tzone") <- "GMT"
}
return(data.frame(ds = dates))
}
#' Merge history and forecast for plotting.
#'
#' @param m Prophet object.
#' @param fcst Data frame returned by prophet predict.
#'
#' @importFrom dplyr "%>%"
#' @keywords internal
df_for_plotting <- function(m, fcst) {
# Make sure there is no y in fcst
fcst$y <- NULL
df <- m$history %>%
dplyr::select(ds, y) %>%
dplyr::full_join(fcst, by = "ds") %>%
dplyr::arrange(ds)
return(df)
}
#' Plot the prophet forecast.
#'
#' @param x Prophet object.
#' @param fcst Data frame returned by predict(m, df).
#' @param uncertainty Boolean indicating if the uncertainty interval for yhat
#' should be plotted. Must be present in fcst as yhat_lower and yhat_upper.
#' @param plot_cap Boolean indicating if the capacity should be shown in the
#' figure, if available.
#' @param xlabel Optional label for x-axis
#' @param ylabel Optional label for y-axis
#' @param ... additional arguments
#'
#' @return A ggplot2 plot.
#'
#' @examples
#' \dontrun{
#' history <- data.frame(ds = seq(as.Date('2015-01-01'), as.Date('2016-01-01'), by = 'd'),
#' y = sin(1:366/200) + rnorm(366)/10)
#' m <- prophet(history)
#' future <- make_future_dataframe(m, periods = 365)
#' forecast <- predict(m, future)
#' plot(m, forecast)
#' }
#'
#' @export
plot.prophet <- function(x, fcst, uncertainty = TRUE, plot_cap = TRUE,
xlabel = 'ds', ylabel = 'y', ...) {
df <- df_for_plotting(x, fcst)
gg <- ggplot2::ggplot(df, ggplot2::aes(x = ds, y = y)) +
ggplot2::labs(x = xlabel, y = ylabel)
if (exists('cap', where = df) && plot_cap) {
gg <- gg + ggplot2::geom_line(
ggplot2::aes(y = cap), linetype = 'dashed', na.rm = TRUE)
}
if (x$logistic.floor && exists('floor', where = df) && plot_cap) {
gg <- gg + ggplot2::geom_line(
ggplot2::aes(y = floor), linetype = 'dashed', na.rm = TRUE)
}
if (uncertainty && exists('yhat_lower', where = df)) {
gg <- gg +
ggplot2::geom_ribbon(ggplot2::aes(ymin = yhat_lower, ymax = yhat_upper),
alpha = 0.2,
fill = "#0072B2",
na.rm = TRUE)
}
gg <- gg +
ggplot2::geom_point(na.rm=TRUE) +
ggplot2::geom_line(ggplot2::aes(y = yhat), color = "#0072B2",
na.rm = TRUE) +
ggplot2::theme(aspect.ratio = 3 / 5)
return(gg)
}
#' Plot the components of a prophet forecast.
#' Prints a ggplot2 with panels for trend, weekly and yearly seasonalities if
#' present, and holidays if present.
#'
#' @param m Prophet object.
#' @param fcst Data frame returned by predict(m, df).
#' @param uncertainty Boolean indicating if the uncertainty interval should be
#' plotted for the trend, from fcst columns trend_lower and trend_upper.
#' @param plot_cap Boolean indicating if the capacity should be shown in the
#' figure, if available.
#' @param weekly_start Integer specifying the start day of the weekly
#' seasonality plot. 0 (default) starts the week on Sunday. 1 shifts by 1 day
#' to Monday, and so on.
#' @param yearly_start Integer specifying the start day of the yearly
#' seasonality plot. 0 (default) starts the year on Jan 1. 1 shifts by 1 day
#' to Jan 2, and so on.
#'
#' @return Invisibly return a list containing the plotted ggplot objects
#'
#' @export
#' @importFrom dplyr "%>%"
prophet_plot_components <- function(
m, fcst, uncertainty = TRUE, plot_cap = TRUE, weekly_start = 0,
yearly_start = 0
) {
# Plot the trend
panels <- list(plot_forecast_component(fcst, 'trend', uncertainty, plot_cap))
# Plot holiday components, if present.
if (!is.null(m$holidays) & ('holidays' %in% colnames(fcst))) {
panels[[length(panels) + 1]] <- plot_forecast_component(
fcst, 'holidays', uncertainty, FALSE)
}
# Plot weekly seasonality, if present
if ("weekly" %in% colnames(fcst)) {
panels[[length(panels) + 1]] <- plot_weekly(m, uncertainty, weekly_start)
}
# Plot yearly seasonality, if present
if ("yearly" %in% colnames(fcst)) {
panels[[length(panels) + 1]] <- plot_yearly(m, uncertainty, yearly_start)
}
# Plot other seasonalities
for (name in names(m$seasonalities)) {
if (!(name %in% c('weekly', 'yearly')) &&
(name %in% colnames(fcst))) {
panels[[length(panels) + 1]] <- plot_seasonality(m, name, uncertainty)
}
}
# Plot extra regressors
if ((length(m$extra_regressors) > 0)
& ('extra_regressors' %in% colnames(fcst))) {
panels[[length(panels) + 1]] <- plot_forecast_component(
fcst, 'extra_regressors', uncertainty, FALSE)
}
# Make the plot.
grid::grid.newpage()
grid::pushViewport(grid::viewport(layout = grid::grid.layout(length(panels),
1)))
for (i in 1:length(panels)) {
print(panels[[i]], vp = grid::viewport(layout.pos.row = i,
layout.pos.col = 1))
}
return(invisible(panels))
}
#' Plot a particular component of the forecast.
#'
#' @param fcst Dataframe output of `predict`.
#' @param name String name of the component to plot (column of fcst).
#' @param uncertainty Boolean to plot uncertainty intervals.
#' @param plot_cap Boolean indicating if the capacity should be shown in the
#' figure, if available.
#'
#' @return A ggplot2 plot.
#'
#' @export
plot_forecast_component <- function(
fcst, name, uncertainty = TRUE, plot_cap = FALSE
) {
gg.comp <- ggplot2::ggplot(
fcst, ggplot2::aes_string(x = 'ds', y = name, group = 1)) +
ggplot2::geom_line(color = "#0072B2", na.rm = TRUE)
if (exists('cap', where = fcst) && plot_cap) {
gg.comp <- gg.comp + ggplot2::geom_line(
ggplot2::aes(y = cap), linetype = 'dashed', na.rm = TRUE)
}
if (exists('floor', where = fcst) && plot_cap) {
gg.comp <- gg.comp + ggplot2::geom_line(
ggplot2::aes(y = floor), linetype = 'dashed', na.rm = TRUE)
}
if (uncertainty) {
gg.comp <- gg.comp +
ggplot2::geom_ribbon(
ggplot2::aes_string(
ymin = paste0(name, '_lower'), ymax = paste0(name, '_upper')
),
alpha = 0.2,
fill = "#0072B2",
na.rm = TRUE)
}
return(gg.comp)
}
#' Prepare dataframe for plotting seasonal components.
#'
#' @param m Prophet object.
#' @param ds Array of dates for column ds.
#'
#' @return A dataframe with seasonal components on ds.
#'
#' @keywords internal
seasonality_plot_df <- function(m, ds) {
df_list <- list(ds = ds, cap = 1)
for (name in names(m$extra_regressors)) {
df_list[[name]] <- 0
}
df <- as.data.frame(df_list)
df <- setup_dataframe(m, df)$df
return(df)
}
#' Plot the weekly component of the forecast.
#'
#' @param m Prophet model object
#' @param uncertainty Boolean to plot uncertainty intervals.
#' @param weekly_start Integer specifying the start day of the weekly
#' seasonality plot. 0 (default) starts the week on Sunday. 1 shifts by 1 day
#' to Monday, and so on.
#'
#' @return A ggplot2 plot.
#'
#' @keywords internal
plot_weekly <- function(m, uncertainty = TRUE, weekly_start = 0) {
# Compute weekly seasonality for a Sun-Sat sequence of dates.
days <- seq(set_date('2017-01-01'), by='d', length.out=7) + weekly_start
df.w <- seasonality_plot_df(m, days)
seas <- predict_seasonal_components(m, df.w)
seas$dow <- factor(weekdays(df.w$ds), levels=weekdays(df.w$ds))
gg.weekly <- ggplot2::ggplot(seas, ggplot2::aes(x = dow, y = weekly,
group = 1)) +
ggplot2::geom_line(color = "#0072B2", na.rm = TRUE) +
ggplot2::labs(x = "Day of week")
if (uncertainty) {
gg.weekly <- gg.weekly +
ggplot2::geom_ribbon(ggplot2::aes(ymin = weekly_lower,
ymax = weekly_upper),
alpha = 0.2,
fill = "#0072B2",
na.rm = TRUE)
}
return(gg.weekly)
}
#' Plot the yearly component of the forecast.
#'
#' @param m Prophet model object.
#' @param uncertainty Boolean to plot uncertainty intervals.
#' @param yearly_start Integer specifying the start day of the yearly
#' seasonality plot. 0 (default) starts the year on Jan 1. 1 shifts by 1 day
#' to Jan 2, and so on.
#'
#' @return A ggplot2 plot.
#'
#' @keywords internal
plot_yearly <- function(m, uncertainty = TRUE, yearly_start = 0) {
# Compute yearly seasonality for a Jan 1 - Dec 31 sequence of dates.
days <- seq(set_date('2017-01-01'), by='d', length.out=365) + yearly_start
df.y <- seasonality_plot_df(m, days)
seas <- predict_seasonal_components(m, df.y)
seas$ds <- df.y$ds
gg.yearly <- ggplot2::ggplot(seas, ggplot2::aes(x = ds, y = yearly,
group = 1)) +
ggplot2::geom_line(color = "#0072B2", na.rm = TRUE) +
ggplot2::labs(x = "Day of year") +
ggplot2::scale_x_datetime(labels = scales::date_format('%B %d'))
if (uncertainty) {
gg.yearly <- gg.yearly +
ggplot2::geom_ribbon(ggplot2::aes(ymin = yearly_lower,
ymax = yearly_upper),
alpha = 0.2,
fill = "#0072B2",
na.rm = TRUE)
}
return(gg.yearly)
}
#' Plot a custom seasonal component.
#'
#' @param m Prophet model object.
#' @param name String name of the seasonality.
#' @param uncertainty Boolean to plot uncertainty intervals.
#'
#' @return A ggplot2 plot.
#'
#' @keywords internal
plot_seasonality <- function(m, name, uncertainty = TRUE) {
# Compute seasonality from Jan 1 through a single period.
start <- set_date('2017-01-01')
period <- m$seasonalities[[name]]$period
end <- start + period * 24 * 3600
plot.points <- 200
days <- seq(from=start, to=end, length.out=plot.points)
df.y <- seasonality_plot_df(m, days)
seas <- predict_seasonal_components(m, df.y)
seas$ds <- df.y$ds
gg.s <- ggplot2::ggplot(
seas, ggplot2::aes_string(x = 'ds', y = name, group = 1)) +
ggplot2::geom_line(color = "#0072B2", na.rm = TRUE)
if (period <= 2) {
fmt.str <- '%T'
} else if (period < 14) {
fmt.str <- '%m/%d %R'
} else {
fmt.str <- '%m/%d'
}
gg.s <- gg.s +
ggplot2::scale_x_datetime(labels = scales::date_format(fmt.str))
if (uncertainty) {
gg.s <- gg.s +
ggplot2::geom_ribbon(
ggplot2::aes_string(
ymin = paste0(name, '_lower'), ymax = paste0(name, '_upper')
),
alpha = 0.2,
fill = "#0072B2",
na.rm = TRUE)
}
return(gg.s)
}
#' Copy Prophet object.
#'
#' @param m Prophet model object.
#' @param cutoff Date, possibly as string. Changepoints are only retained if
#' changepoints <= cutoff.
#'
#' @return An unfitted Prophet model object with the same parameters as the
#' input model.
#'
#' @keywords internal
prophet_copy <- function(m, cutoff = NULL) {
if (m$specified.changepoints) {
changepoints <- m$changepoints
if (!is.null(cutoff)) {
cutoff <- set_date(cutoff)
changepoints <- changepoints[changepoints <= cutoff]
}
} else {
changepoints <- NULL
}
return(prophet(
growth = m$growth,
changepoints = changepoints,
n.changepoints = m$n.changepoints,
yearly.seasonality = m$yearly.seasonality,
weekly.seasonality = m$weekly.seasonality,
daily.seasonality = m$daily.seasonality,
holidays = m$holidays,
seasonality.prior.scale = m$seasonality.prior.scale,
changepoint.prior.scale = m$changepoint.prior.scale,
holidays.prior.scale = m$holidays.prior.scale,
mcmc.samples = m$mcmc.samples,
interval.width = m$interval.width,
uncertainty.samples = m$uncertainty.samples,
fit = FALSE,
))
}
# fb-block 3