From b999dd4aa4ff91b2f6501979c192743a7fc5e9b9 Mon Sep 17 00:00:00 2001 From: Ben Letham Date: Thu, 23 Mar 2017 17:27:44 +0200 Subject: [PATCH] Add docstring to Python methods, and minor fixes to R documentation. --- R/R/prophet.R | 74 ++++--- R/man/fourier_series.Rd | 4 +- R/man/linear_growth_init.Rd | 6 +- R/man/logistic_growth_init.Rd | 2 +- R/man/make_all_seasonality_features.Rd | 4 +- R/man/make_holiday_features.Rd | 2 +- R/man/make_seasonality_features.Rd | 2 +- R/man/predict.prophet.Rd | 9 +- R/man/predict_seasonal_components.Rd | 9 +- R/man/predict_trend.Rd | 5 +- R/man/predict_uncertainty.Rd | 5 +- R/man/prophet.Rd | 20 +- R/man/sample_model.Rd | 4 +- R/man/sample_predictive_trend.Rd | 4 +- R/man/set_changepoints.Rd | 7 +- R/man/setup_dataframe.Rd | 4 +- python/fbprophet/forecaster.py | 270 ++++++++++++++++++++++--- 17 files changed, 342 insertions(+), 89 deletions(-) diff --git a/R/R/prophet.R b/R/R/prophet.R index 4414b8a..bf4b8f1 100644 --- a/R/R/prophet.R +++ b/R/R/prophet.R @@ -15,14 +15,14 @@ globalVariables(c( #' Prophet forecaster. #' -#' @param df Data frame with columns ds (date type) and y, the time series. -#' If growth is logistic, then df must also have a column cap that specifies -#' the capacity at each ds. +#' @param df Dataframe containing the history. Must have columns ds (date type) +#' and y, the time series. If growth is logistic, then df must also have a +#' column cap that specifies the capacity at each ds. #' @param growth String 'linear' or 'logistic' to specify a linear or logistic #' trend. #' @param changepoints Vector of dates at which to include potential -#' changepoints. Each date must be present in df$ds. If not specified, -#' potential changepoints are selected automatically. +#' changepoints. If not specified, potential changepoints are selected +#' automatically. #' @param n.changepoints Number of potential changepoints to include. Not used #' if input `changepoints` is supplied. If `changepoints` is not supplied, #' then n.changepoints potential changepoints are selected uniformly from the @@ -36,11 +36,11 @@ globalVariables(c( #' @param seasonality.prior.scale Parameter modulating the strength of the #' seasonality model. Larger values allow the model to fit larger seasonal #' fluctuations, smaller values dampen the seasonality. +#' @param holidays.prior.scale Parameter modulating the strength of the holiday +#' components model. #' @param changepoint.prior.scale Parameter modulating the flexibility of the #' automatic changepoint selection. Large values will allow many changepoints, #' small values will allow few changepoints. -#' @param holidays.prior.scale Parameter modulating the strength of the holiday -#' components model. #' @param mcmc.samples Integer, if great than 0, will do full Bayesian #' inference with the specified number of MCMC samples. If 0, will do MAP #' estimation. @@ -74,8 +74,8 @@ prophet <- function(df = df, weekly.seasonality = TRUE, holidays = NULL, seasonality.prior.scale = 10, - changepoint.prior.scale = 0.05, holidays.prior.scale = 10, + changepoint.prior.scale = 0.05, mcmc.samples = 0, interval.width = 0.80, uncertainty.samples = 1000, @@ -203,7 +203,9 @@ compile_stan_model <- function(model) { #' Prepare dataframe for fitting or predicting. #' -#' Adds a time index and scales y. +#' Adds a time index and scales y. Creates auxillary columns 't', 't_ix', +#' 'y_scaled', and 'cap_scaled'. These columns are used during both fitting +#' and predicting. #' #' @param m Prophet object. #' @param df Data frame with columns ds, y, and cap if logistic growth. @@ -243,7 +245,12 @@ setup_dataframe <- function(m, df, initialize_scales = FALSE) { #' Set changepoints #' -#' Sets m$changepoints to the dates of changepoints. +#' Sets m$changepoints to the dates of changepoints. Either: +#' 1) The changepoints were passed in explicitly. +#' A) They are empty. +#' B) They are not empty, and need validation. +#' 2) We are generating a grid of them. +#' 3) The user prefers no changepoints be used. #' #' @param m Prophet object. #' @@ -292,7 +299,7 @@ get_changepoint_matrix <- function(m) { return(A) } -#' Provides fourier series components with the specified frequency. +#' Provides Fourier series components with the specified frequency and order. #' #' @param dates Vector of dates. #' @param period Number of days of the period. @@ -316,7 +323,7 @@ fourier_series <- function(dates, period, series.order) { #' @param dates Vector of dates. #' @param period Number of days of the period. #' @param series.order Number of components. -#' @param prefix Column name prefix +#' @param prefix Column name prefix. #' #' @return Dataframe with seasonality. #' @@ -331,7 +338,7 @@ make_seasonality_features <- function(dates, period, series.order, prefix) { #' @param m Prophet object. #' @param dates Vector with dates used for computing seasonality. #' -#' @return A dataframe with a column for each holiday +#' @return A dataframe with a column for each holiday. #' #' @importFrom dplyr "%>%" make_holiday_features <- function(m, dates) { @@ -362,7 +369,7 @@ make_holiday_features <- function(m, dates) { return(holiday.mat) } -#' Data frame seasonality features. +#' Dataframe with seasonality features. #' #' @param m Prophet object. #' @param df Dataframe with dates for computing seasonality features. @@ -391,14 +398,14 @@ make_all_seasonality_features <- function(m, df) { return(seasonal.features) } -#' Initialize linear growth +#' Initialize linear growth. #' #' Provides a strong initialization for linear growth by calculating the #' growth and offset parameters that pass the function through the first and #' last points in the time series. #' -#' @param df Data frame with columns ds (date), cap_scaled (scaled capacity), -#' y_scaled (scaled time series), and t (scaled time). +#' @param df Data frame with columns ds (date), y_scaled (scaled time series), +#' and t (scaled time). #' #' @return A vector (k, m) with the rate (k) and offset (m) of the linear #' growth function. @@ -414,7 +421,7 @@ linear_growth_init <- function(df) { return(c(k, m)) } -#' Initialize logistic growth +#' Initialize logistic growth. #' #' Provides a strong initialization for logistic growth by calculating the #' growth and offset parameters that pass the function through the first and @@ -543,11 +550,12 @@ fit.prophet <- function(m, df, ...) { #' Predict using the prophet model. #' #' @param object Prophet object. -#' @param df Dataframe with dates for predictions, and capacity if logistic -#' growth. If not provided, predictions are made on the history. -#' @param ... additional arguments +#' @param df Dataframe with dates for predictions (column ds), and capacity +#' (column cap) if logistic growth. If not provided, predictions are made on +#' the history. +#' @param ... additional arguments. #' -#' @return A data_frame with a forecast +#' @return A dataframe with the forecast components. #' #' @examples #' \dontrun{ @@ -636,7 +644,9 @@ piecewise_logistic <- function(t, cap, deltas, k, m, changepoint.ts) { #' Predict trend using the prophet model. #' #' @param model Prophet object. -#' @param df Data frame. +#' @param df Prediction dataframe. +#' +#' @return Vector with trend on prediction dates. #' predict_trend <- function(model, df) { k <- mean(model$params$k, na.rm = TRUE) @@ -654,10 +664,12 @@ predict_trend <- function(model, df) { return(trend * model$y.scale) } -#' Seasonality broken down into components +#' Predict seasonality broken down into components. #' #' @param m Prophet object. -#' @param df Data frame. +#' @param df Prediction dataframe. +#' +#' @return Dataframe with seasonal components. #' predict_seasonal_components <- function(m, df) { seasonal.features <- make_all_seasonality_features(m, df) @@ -700,7 +712,9 @@ predict_seasonal_components <- function(m, df) { #' Prophet uncertainty intervals. #' #' @param m Prophet object. -#' @param df Data frame. +#' @param df Prediction dataframe. +#' +#' @return Dataframe with uncertainty intervals. #' predict_uncertainty <- function(m, df) { # Sample trend, seasonality, and yhat from the extrapolation model. @@ -746,9 +760,9 @@ predict_uncertainty <- function(m, df) { #' Simulate observations from the extrapolated generative model. #' #' @param m Prophet object. -#' @param df Dataframe that was fit by Prophet. +#' @param df Prediction dataframe. #' @param seasonal.features Data frame of seasonal features -#' @param iteration Int sampling iteration ot use parameters from. +#' @param iteration Int sampling iteration to use parameters from. #' #' @return List of trend, seasonality, and yhat, each a vector like df$t. #' @@ -769,8 +783,8 @@ sample_model <- function(m, df, seasonal.features, iteration) { #' Simulate the trend using the extrapolated generative model. #' #' @param model Prophet object. -#' @param df Dataframe that was fit by Prophet. -#' @param iteration Int sampling iteration ot use parameters from. +#' @param df Prediction dataframe. +#' @param iteration Int sampling iteration to use parameters from. #' #' @return Vector of simulated trend over df$t. #' diff --git a/R/man/fourier_series.Rd b/R/man/fourier_series.Rd index 3fe17eb..a3cf3d6 100644 --- a/R/man/fourier_series.Rd +++ b/R/man/fourier_series.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/prophet.R \name{fourier_series} \alias{fourier_series} -\title{Provides fourier series components with the specified frequency.} +\title{Provides Fourier series components with the specified frequency and order.} \usage{ fourier_series(dates, period, series.order) } @@ -17,6 +17,6 @@ fourier_series(dates, period, series.order) Matrix with seasonality features. } \description{ -Provides fourier series components with the specified frequency. +Provides Fourier series components with the specified frequency and order. } diff --git a/R/man/linear_growth_init.Rd b/R/man/linear_growth_init.Rd index 48915fb..f4c6c17 100644 --- a/R/man/linear_growth_init.Rd +++ b/R/man/linear_growth_init.Rd @@ -2,13 +2,13 @@ % Please edit documentation in R/prophet.R \name{linear_growth_init} \alias{linear_growth_init} -\title{Initialize linear growth} +\title{Initialize linear growth.} \usage{ linear_growth_init(df) } \arguments{ -\item{df}{Data frame with columns ds (date), cap_scaled (scaled capacity), -y_scaled (scaled time series), and t (scaled time).} +\item{df}{Data frame with columns ds (date), y_scaled (scaled time series), +and t (scaled time).} } \value{ A vector (k, m) with the rate (k) and offset (m) of the linear diff --git a/R/man/logistic_growth_init.Rd b/R/man/logistic_growth_init.Rd index adeb5a8..863bd11 100644 --- a/R/man/logistic_growth_init.Rd +++ b/R/man/logistic_growth_init.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/prophet.R \name{logistic_growth_init} \alias{logistic_growth_init} -\title{Initialize logistic growth} +\title{Initialize logistic growth.} \usage{ logistic_growth_init(df) } diff --git a/R/man/make_all_seasonality_features.Rd b/R/man/make_all_seasonality_features.Rd index dd2815b..21c14ac 100644 --- a/R/man/make_all_seasonality_features.Rd +++ b/R/man/make_all_seasonality_features.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/prophet.R \name{make_all_seasonality_features} \alias{make_all_seasonality_features} -\title{Data frame seasonality features.} +\title{Dataframe with seasonality features.} \usage{ make_all_seasonality_features(m, df) } @@ -15,6 +15,6 @@ make_all_seasonality_features(m, df) Dataframe with seasonality. } \description{ -Data frame seasonality features. +Dataframe with seasonality features. } diff --git a/R/man/make_holiday_features.Rd b/R/man/make_holiday_features.Rd index 441bd8a..dd6f73a 100644 --- a/R/man/make_holiday_features.Rd +++ b/R/man/make_holiday_features.Rd @@ -12,7 +12,7 @@ make_holiday_features(m, dates) \item{dates}{Vector with dates used for computing seasonality.} } \value{ -A dataframe with a column for each holiday +A dataframe with a column for each holiday. } \description{ Construct a matrix of holiday features. diff --git a/R/man/make_seasonality_features.Rd b/R/man/make_seasonality_features.Rd index 9577620..e76980b 100644 --- a/R/man/make_seasonality_features.Rd +++ b/R/man/make_seasonality_features.Rd @@ -13,7 +13,7 @@ make_seasonality_features(dates, period, series.order, prefix) \item{series.order}{Number of components.} -\item{prefix}{Column name prefix} +\item{prefix}{Column name prefix.} } \value{ Dataframe with seasonality. diff --git a/R/man/predict.prophet.Rd b/R/man/predict.prophet.Rd index 77c4f4b..ba69efc 100644 --- a/R/man/predict.prophet.Rd +++ b/R/man/predict.prophet.Rd @@ -9,13 +9,14 @@ \arguments{ \item{object}{Prophet object.} -\item{df}{Dataframe with dates for predictions, and capacity if logistic -growth. If not provided, predictions are made on the history.} +\item{df}{Dataframe with dates for predictions (column ds), and capacity +(column cap) if logistic growth. If not provided, predictions are made on +the history.} -\item{...}{additional arguments} +\item{...}{additional arguments.} } \value{ -A data_frame with a forecast +A dataframe with the forecast components. } \description{ Predict using the prophet model. diff --git a/R/man/predict_seasonal_components.Rd b/R/man/predict_seasonal_components.Rd index 5c3ff30..30cf3f2 100644 --- a/R/man/predict_seasonal_components.Rd +++ b/R/man/predict_seasonal_components.Rd @@ -2,16 +2,19 @@ % Please edit documentation in R/prophet.R \name{predict_seasonal_components} \alias{predict_seasonal_components} -\title{Seasonality broken down into components} +\title{Predict seasonality broken down into components.} \usage{ predict_seasonal_components(m, df) } \arguments{ \item{m}{Prophet object.} -\item{df}{Data frame.} +\item{df}{Prediction dataframe.} +} +\value{ +Dataframe with seasonal components. } \description{ -Seasonality broken down into components +Predict seasonality broken down into components. } diff --git a/R/man/predict_trend.Rd b/R/man/predict_trend.Rd index 4556297..98eff57 100644 --- a/R/man/predict_trend.Rd +++ b/R/man/predict_trend.Rd @@ -9,7 +9,10 @@ predict_trend(model, df) \arguments{ \item{model}{Prophet object.} -\item{df}{Data frame.} +\item{df}{Prediction dataframe.} +} +\value{ +Vector with trend on prediction dates. } \description{ Predict trend using the prophet model. diff --git a/R/man/predict_uncertainty.Rd b/R/man/predict_uncertainty.Rd index 3ee5d29..6f163a7 100644 --- a/R/man/predict_uncertainty.Rd +++ b/R/man/predict_uncertainty.Rd @@ -9,7 +9,10 @@ predict_uncertainty(m, df) \arguments{ \item{m}{Prophet object.} -\item{df}{Data frame.} +\item{df}{Prediction dataframe.} +} +\value{ +Dataframe with uncertainty intervals. } \description{ Prophet uncertainty intervals. diff --git a/R/man/prophet.Rd b/R/man/prophet.Rd index 5513e28..8678bc3 100644 --- a/R/man/prophet.Rd +++ b/R/man/prophet.Rd @@ -7,21 +7,21 @@ prophet(df = df, growth = "linear", changepoints = NULL, n.changepoints = 25, yearly.seasonality = TRUE, weekly.seasonality = TRUE, holidays = NULL, - seasonality.prior.scale = 10, changepoint.prior.scale = 0.05, - holidays.prior.scale = 10, mcmc.samples = 0, interval.width = 0.8, + seasonality.prior.scale = 10, holidays.prior.scale = 10, + changepoint.prior.scale = 0.05, mcmc.samples = 0, interval.width = 0.8, uncertainty.samples = 1000, fit = TRUE, ...) } \arguments{ -\item{df}{Data frame with columns ds (date type) and y, the time series. -If growth is logistic, then df must also have a column cap that specifies -the capacity at each ds.} +\item{df}{Dataframe containing the history. Must have columns ds (date type) +and y, the time series. If growth is logistic, then df must also have a +column cap that specifies the capacity at each ds.} \item{growth}{String 'linear' or 'logistic' to specify a linear or logistic trend.} \item{changepoints}{Vector of dates at which to include potential -changepoints. Each date must be present in df$ds. If not specified, -potential changepoints are selected automatically.} +changepoints. If not specified, potential changepoints are selected +automatically.} \item{n.changepoints}{Number of potential changepoints to include. Not used if input `changepoints` is supplied. If `changepoints` is not supplied, @@ -41,13 +41,13 @@ will include 2 days prior to the date as holidays.} seasonality model. Larger values allow the model to fit larger seasonal fluctuations, smaller values dampen the seasonality.} +\item{holidays.prior.scale}{Parameter modulating the strength of the holiday +components model.} + \item{changepoint.prior.scale}{Parameter modulating the flexibility of the automatic changepoint selection. Large values will allow many changepoints, small values will allow few changepoints.} -\item{holidays.prior.scale}{Parameter modulating the strength of the holiday -components model.} - \item{mcmc.samples}{Integer, if great than 0, will do full Bayesian inference with the specified number of MCMC samples. If 0, will do MAP estimation.} diff --git a/R/man/sample_model.Rd b/R/man/sample_model.Rd index d0aa07c..74bfc6c 100644 --- a/R/man/sample_model.Rd +++ b/R/man/sample_model.Rd @@ -9,11 +9,11 @@ sample_model(m, df, seasonal.features, iteration) \arguments{ \item{m}{Prophet object.} -\item{df}{Dataframe that was fit by Prophet.} +\item{df}{Prediction dataframe.} \item{seasonal.features}{Data frame of seasonal features} -\item{iteration}{Int sampling iteration ot use parameters from.} +\item{iteration}{Int sampling iteration to use parameters from.} } \value{ List of trend, seasonality, and yhat, each a vector like df$t. diff --git a/R/man/sample_predictive_trend.Rd b/R/man/sample_predictive_trend.Rd index 14ed164..522f551 100644 --- a/R/man/sample_predictive_trend.Rd +++ b/R/man/sample_predictive_trend.Rd @@ -9,9 +9,9 @@ sample_predictive_trend(model, df, iteration) \arguments{ \item{model}{Prophet object.} -\item{df}{Dataframe that was fit by Prophet.} +\item{df}{Prediction dataframe.} -\item{iteration}{Int sampling iteration ot use parameters from.} +\item{iteration}{Int sampling iteration to use parameters from.} } \value{ Vector of simulated trend over df$t. diff --git a/R/man/set_changepoints.Rd b/R/man/set_changepoints.Rd index 369a0c0..4235508 100644 --- a/R/man/set_changepoints.Rd +++ b/R/man/set_changepoints.Rd @@ -13,6 +13,11 @@ set_changepoints(m) m with changepoints set. } \description{ -Sets m$changepoints to the dates of changepoints. +Sets m$changepoints to the dates of changepoints. Either: +1) The changepoints were passed in explicitly. + A) They are empty. + B) They are not empty, and need validation. +2) We are generating a grid of them. +3) The user prefers no changepoints be used. } diff --git a/R/man/setup_dataframe.Rd b/R/man/setup_dataframe.Rd index d52aa2e..87678c4 100644 --- a/R/man/setup_dataframe.Rd +++ b/R/man/setup_dataframe.Rd @@ -17,6 +17,8 @@ setup_dataframe(m, df, initialize_scales = FALSE) list with items 'df' and 'm'. } \description{ -Adds a time index and scales y. +Adds a time index and scales y. Creates auxillary columns 't', 't_ix', +'y_scaled', and 'cap_scaled'. These columns are used during both fitting +and predicting. } diff --git a/python/fbprophet/forecaster.py b/python/fbprophet/forecaster.py index 81eb94a..0ea5055 100644 --- a/python/fbprophet/forecaster.py +++ b/python/fbprophet/forecaster.py @@ -34,6 +34,44 @@ except ImportError: # fb-block 2 class Prophet(object): + """Prophet forecaster. + + Parameters + ---------- + growth: String 'linear' or 'logistic' to specify a linear or logistic + trend. + changepoints: List of dates at which to include potential changepoints. If + not specified, potential changepoints are selected automatically. + n_changepoints: Number of potential changepoints to include. Not used + if input `changepoints` is supplied. If `changepoints` is not supplied, + then n.changepoints potential changepoints are selected uniformly from + the first 80 percent of the history. + yearly_seasonality: Boolean, fit yearly seasonality. + weekly_seasonality: Boolean, fit weekly seasonality. + holidays: pd.DataFrame with columns holiday (string) and ds (date type) + and optionally columns lower_window and upper_window which specify a + range of days around the date to be included as holidays. + lower_window=-2 will include 2 days prior to the date as holidays. + seasonality_prior_scale: Parameter modulating the strength of the + seasonality model. Larger values allow the model to fit larger seasonal + fluctuations, smaller values dampen the seasonality. + holidays_prior_scale: Parameter modulating the strength of the holiday + components model. + changepoint_prior_scale: Parameter modulating the flexibility of the + automatic changepoint selection. Large values will allow many + changepoints, small values will allow few changepoints. + mcmc_samples: Integer, if great than 0, will do full Bayesian inference + with the specified number of MCMC samples. If 0, will do MAP + estimation. + interval_width: Float, width of the uncertainty intervals provided + for the forecast. If mcmc_samples=0, this will be only the uncertainty + in the trend using the MAP estimate of the extrapolated generative + model. If mcmc.samples>0, this will be integrated over all model + parameters, which will include uncertainty in seasonality. + uncertainty_samples: Number of simulated draws used to estimate + uncertainty intervals. + """ + def __init__( self, growth='linear', @@ -91,6 +129,7 @@ class Prophet(object): self.validate_inputs() def validate_inputs(self): + """Validates the inputs to Prophet.""" if self.growth not in ('linear', 'logistic'): raise ValueError( "Parameter 'growth' should be 'linear' or 'logistic'.") @@ -114,6 +153,7 @@ class Prophet(object): @classmethod def get_linear_model(cls): + """Load compiled linear trend Stan model""" # fb-block 3 # fb-block 4 start model_file = pkg_resources.resource_filename( @@ -126,6 +166,7 @@ class Prophet(object): @classmethod def get_logistic_model(cls): + """Load compiled logistic trend Stan model""" # fb-block 5 # fb-block 6 start model_file = pkg_resources.resource_filename( @@ -137,9 +178,20 @@ class Prophet(object): return pickle.load(f) def setup_dataframe(self, df, initialize_scales=False): - """Create auxillary columns 't', 't_ix', 'y_scaled', and 'cap_scaled'. + """Prepare dataframe for fitting or predicting. - These columns are used during both fitting and prediction. + Adds a time index and scales y. Creates auxillary columns 't', 't_ix', + 'y_scaled', and 'cap_scaled'. These columns are used during both + fitting and predicting. + + Parameters + ---------- + df: pd.DataFrame with columns ds, y, and cap if logistic growth. + initialize_scales: Boolean set scaling factors in self from df. + + Returns + ------- + pd.DataFrame prepared for fitting or predicting. """ if 'y' in df: df['y'] = pd.to_numeric(df['y']) @@ -164,14 +216,14 @@ class Prophet(object): return df def set_changepoints(self): - """Generate a list of changepoints. + """Set changepoints - Either: - 1) the changepoints were passed in explicitly - A) they are empty - B) not empty, needs validation - 2) we are generating a grid of them - 3) the user prefers no changepoints to be used + Sets m$changepoints to the dates of changepoints. Either: + 1) The changepoints were passed in explicitly. + A) They are empty. + B) They are not empty, and need validation. + 2) We are generating a grid of them. + 3) The user prefers no changepoints be used. """ if self.changepoints is not None: if len(self.changepoints) == 0: @@ -200,6 +252,7 @@ class Prophet(object): self.changepoints_t = np.array([0]) # dummy changepoint def get_changepoint_matrix(self): + """Gets changepoint matrix for history dataframe.""" A = np.zeros((self.history.shape[0], len(self.changepoints_t))) for i, t_i in enumerate(self.changepoints_t): A[self.history['t'].values >= t_i, i] = 1 @@ -207,17 +260,18 @@ class Prophet(object): @staticmethod def fourier_series(dates, period, series_order): - """Generate a Fourier expansion for a fixed frequency and order. + """Provides Fourier series components with the specified frequency + and order. Parameters ---------- - dates: a pd.Series containing timestamps - period: an integer frequency (number of days) - series_order: number of components to generate + dates: pd.Series containing timestamps. + period: Number of days of the period. + series_order: Number of components. Returns ------- - a 2-dimensional np.array with one row per row in `dt` + Matrix with seasonality features. """ # convert to days since epoch t = np.array( @@ -233,6 +287,20 @@ class Prophet(object): @classmethod def make_seasonality_features(cls, dates, period, series_order, prefix): + """Data frame with seasonality features. + + Parameters + ---------- + cls: Prophet class. + dates: pd.Series containing timestamps. + period: Number of days of the period. + series_order: Number of components. + prefix: Column name prefix. + + Returns + ------- + pd.DataFrame with seasonality features. + """ features = cls.fourier_series(dates, period, series_order) columns = [ '{}_delim_{}'.format(prefix, i + 1) @@ -241,7 +309,15 @@ class Prophet(object): return pd.DataFrame(features, columns=columns) def make_holiday_features(self, dates): - """Generate a DataFrame with each column corresponding to a holiday. + """Construct a dataframe of holiday features. + + Parameters + ---------- + dates: pd.Series containing timestamps used for computing seasonality. + + Returns + ------- + pd.DataFrame with a column for each holiday. """ # A smaller prior scale will shrink holiday estimates more scale_ratio = self.holidays_prior_scale / self.seasonality_prior_scale @@ -280,6 +356,16 @@ class Prophet(object): return pd.DataFrame(expanded_holidays) def make_all_seasonality_features(self, df): + """Dataframe with seasonality features. + + Parameters + ---------- + df: pd.DataFrame with dates for computing seasonality features. + + Returns + ------- + pd.DataFrame with seasonality. + """ seasonal_features = [ # Add a column of zeros in case no seasonality is used. pd.DataFrame({'zeros': np.zeros(df.shape[0])}) @@ -308,6 +394,22 @@ class Prophet(object): @staticmethod def linear_growth_init(df): + """Initialize linear growth. + + Provides a strong initialization for linear growth by calculating the + growth and offset parameters that pass the function through the first + and last points in the time series. + + Parameters + ---------- + df: pd.DataFrame with columns ds (date), y_scaled (scaled time series), + and t (scaled time). + + Returns + ------- + A tuple (k, m) with the rate (k) and offset (m) of the linear growth + function. + """ i0, i1 = df['ds'].idxmin(), df['ds'].idxmax() T = df['t'].ix[i1] - df['t'].ix[i0] k = (df['y_scaled'].ix[i1] - df['y_scaled'].ix[i0]) / T @@ -316,6 +418,22 @@ class Prophet(object): @staticmethod def logistic_growth_init(df): + """Initialize logistic growth. + + Provides a strong initialization for logistic growth by calculating the + growth and offset parameters that pass the function through the first + and last points in the time series. + + Parameters + ---------- + df: pd.DataFrame with columns ds (date), cap_scaled (scaled capacity), + y_scaled (scaled time series), and t (scaled time). + + Returns + ------- + A tuple (k, m) with the rate (k) and offset (m) of the logistic growth + function. + """ i0, i1 = df['ds'].idxmin(), df['ds'].idxmax() T = df['t'].ix[i1] - df['t'].ix[i0] @@ -337,14 +455,16 @@ class Prophet(object): # fb-block 7 def fit(self, df, **kwargs): - """Fit the Prophet model to data. + """Fit the Prophet model. Parameters ---------- - df: pd.DataFrame containing history. Must have columns 'ds', 'y', and - if logistic growth, 'cap'. - kwargs: Additional arguments passed to Stan's sampling or optimizing - function, as appropriate. + df: pd.DataFrame containing the history. Must have columns ds (date + type) and y, the time series. If self.growth is 'logistic', then + df must also have a column cap that specifies the capacity at + each ds. + kwargs: Additional arguments passed to the optimizing or sampling + functions in Stan. Returns ------- @@ -415,12 +535,17 @@ class Prophet(object): # fb-block 8 def predict(self, df=None): - """Predict historical and future values for y. + """Predict using the prophet model. - Note: you must only pass in future dates here. - Historical dates are prepended before predictions are made. + Parameters + ---------- + df: pd.DataFrame with dates for predictions (column ds), and capacity + (column cap) if logistic growth. If not provided, predictions are + made on the history. - `df` can be None, in which case we predict only on history. + Returns + ------- + A pd.DataFrame with the forecast components. """ if df is None: df = self.history.copy() @@ -437,6 +562,20 @@ class Prophet(object): @staticmethod def piecewise_linear(t, deltas, k, m, changepoint_ts): + """Evaluate the piecewise linear function. + + Parameters + ---------- + t: np.array of times on which the function is evaluated. + deltas: np.array of rate changes at each changepoint. + k: Float initial rate. + m: Float initial offset. + changepoint_ts: np.array of changepoint times. + + Returns + ------- + Vector y(t). + """ # Intercept changes gammas = -changepoint_ts * deltas # Get cumulative slope and intercept at each t @@ -450,6 +589,21 @@ class Prophet(object): @staticmethod def piecewise_logistic(t, cap, deltas, k, m, changepoint_ts): + """Evaluate the piecewise logistic function. + + Parameters + ---------- + t: np.array of times on which the function is evaluated. + cap: np.array of capacities at each t. + deltas: np.array of rate changes at each changepoint. + k: Float initial rate. + m: Float initial offset. + changepoint_ts: np.array of changepoint times. + + Returns + ------- + Vector y(t). + """ # Compute offset changes k_cum = np.concatenate((np.atleast_1d(k), np.cumsum(deltas) + k)) gammas = np.zeros(len(changepoint_ts)) @@ -468,6 +622,16 @@ class Prophet(object): return cap / (1 + np.exp(-k_t * (t - m_t))) def predict_trend(self, df): + """Predict trend using the prophet model. + + Parameters + ---------- + df: Prediction dataframe. + + Returns + ------- + Vector with trend on prediction dates. + """ k = np.nanmean(self.params['k']) m = np.nanmean(self.params['m']) deltas = np.nanmean(self.params['delta'], axis=0) @@ -483,6 +647,16 @@ class Prophet(object): return trend * self.y_scale def predict_seasonal_components(self, df): + """Predict seasonality broken down into components. + + Parameters + ---------- + df: Prediction dataframe. + + Returns + ------- + Dataframe with seasonal components. + """ seasonal_features = self.make_all_seasonality_features(df) lower_p = 100 * (1.0 - self.interval_width) / 2 upper_p = 100 * (1.0 + self.interval_width) / 2 @@ -520,6 +694,16 @@ class Prophet(object): return component_predictions def predict_uncertainty(self, df): + """Predict seasonality broken down into components. + + Parameters + ---------- + df: Prediction dataframe. + + Returns + ------- + Dataframe with uncertainty intervals. + """ n_iterations = self.params['k'].shape[0] samp_per_iter = max(1, int(np.ceil( self.uncertainty_samples / float(n_iterations) @@ -549,6 +733,18 @@ class Prophet(object): return pd.DataFrame(series) def sample_model(self, df, seasonal_features, iteration): + """Simulate observations from the extrapolated generative model. + + Parameters + ---------- + df: Prediction dataframe. + seasonal_features: pd.DataFrame of seasonal features. + iteration: Int sampling iteration to use parameters from. + + Returns + ------- + Dataframe with trend, seasonality, and yhat, each like df['t']. + """ trend = self.sample_predictive_trend(df, iteration) beta = self.params['beta'][iteration] @@ -564,6 +760,18 @@ class Prophet(object): }) def sample_predictive_trend(self, df, iteration): + """Simulate the trend using the extrapolated generative model. + + Parameters + ---------- + df: Prediction dataframe. + seasonal_features: pd.DataFrame of seasonal features. + iteration: Int sampling iteration to use parameters from. + + Returns + ------- + np.array of simulated trend over df['t']. + """ k = self.params['k'][iteration] m = self.params['m'][iteration] deltas = self.params['delta'][iteration] @@ -610,6 +818,20 @@ class Prophet(object): return trend * self.y_scale def make_future_dataframe(self, periods, freq='D', include_history=True): + """Simulate the trend using the extrapolated generative model. + + Parameters + ---------- + periods: Int number of periods to forecast forward. + freq: Any valid frequency for pd.date_range, such as 'D' or 'M'. + include_history: Boolean to include the historical dates in the data + frame for predictions. + + Returns + ------- + pd.Dataframe that extends forward from the end of self.history for the + requested number of periods. + """ last_date = self.history_dates.max() dates = pd.date_range( start=last_date,