Implement metrics for time series forecasts (#384)

* Add metrics for model validation

* Update to reflect comments on Github

* Change all_metrics() implementation to more sophisticated style

* Update metrics functions

* Remove redundant argument

* Use v0.3 files for diagnostics.R and prophet.R

* Use v0.3 file for man/predict.prophet.Rd

* Change the interface and do some refactoring
This commit is contained in:
Nagi Teramo 2018-01-06 09:05:58 +09:00 committed by Ben Letham
parent 014b3b5919
commit 14d5028a73
6 changed files with 321 additions and 1 deletions

View file

@ -25,6 +25,8 @@ Imports:
stats,
tidyr (>= 0.6.1),
utils,
purrr,
rlang,
xts
Suggests:
knitr,

View file

@ -1,18 +1,25 @@
# Generated by roxygen2: do not edit by hand
S3method(plot,prophet)
S3method(predict,prophet)
export(add_regressor)
export(add_seasonality)
export(all_metrics)
export(cross_validation)
export(dyplot.prophet)
export(fit.prophet)
export(layer_changepoints)
export(mae)
export(make_future_dataframe)
export(mape)
export(me)
export(mpe)
export(mse)
export(plot.prophet)
export(plot_forecast_component)
export(predictive_samples)
export(prophet)
export(prophet_plot_components)
export(rmse)
export(simulated_historical_forecasts)
import(Rcpp)
importFrom(dplyr,"%>%")

150
R/R/metrics.R Normal file
View file

@ -0,0 +1,150 @@
## Copyright (c) 2017-present, Facebook, Inc.
## All rights reserved.
## This source code is licensed under the BSD-style license found in the
## LICENSE file in the root directory of this source tree. An additional grant
## of patent rights can be found in the PATENTS file in the same directory.
#' @title Metrics for Time Series Forecasts
#'
#' @description
#' A time-series forecast requires making a quantitative prediction of future values.
#' After forecast, we also have to provide accurracy of forecasts to check wether the forecast serves our need.
#' Metrics for time series forecasts are so useful in telling you how your model is good and helping you determine which particular forecasting models work best.
#'
#' @details
#' Here, as a notation, we assume that \eqn{y} is the actual value and \eqn{yhat} is the forecast value.
#'
#' Mean Error (ME, \code{me})
#'
#' The Mean Error (ME) is defined by the formula:
#' \deqn{ \frac{1}{n} \sum_{t=1}^{n} y_{t}-yhat_{t} .}
#'
#' Mean Squared Error (MSE, \code{mse})
#'
#' The Mean Squared Error (MSE) is defined by the formula:
#' \deqn{ \frac{1}{n} \sum_{t=1}^{n} (y_{t}-yhat_{t})^2 .}
#'
#' Root Mean Square Error (RMSE, \code{rmse})
#'
#' Root Mean Square Error (RMSE) is define by the formula:
#' \deqn{ \sqrt{\frac{1}{n} \sum_{t=1}^{n} (y_{t}-yhat_{t})^2} .}
#'
#' Mean Absolute Error (MAE, \code{mae})
#'
#' The Mean Absolute Error (MAE) is defined by the formula:
#' \deqn{ \frac{1}{n} \sum_{t=1}^{n} | y_{t}-yhat_{t} | .}
#'
#' Mean Percentage Error (MPE, \code{mpe})
#'
#' The Mean Percentage Error (MPE) is usually expressed as a percentage
#' and is defined by the formula:
#' \deqn{ \frac{100}{n} \sum_{t=1}^{n} \frac {y_{t}-yhat_{t}}{y_{t}} .}
#'
#' Mean Absolute Percentage Error (MAPE, \code{mape})
#'
#' The Mean absolute Percentage Error (MAPE), also known as Mean Absolute Percentage Deviation (MAPD), is usually expressed as a percentage,
#' and is defined by the formula:
#' \deqn{ \frac{100}{n} \sum_{t=1}^{n} | \frac {y_{t}-yhat_{t}}{y_{t}}| .}
#'
#' @param m Prophet object. Default NULL
#' @param df A dataframe which is output of `simulated_historical_forecasts` or `cross_validation` Default NULL
#'
#' @return metrics value (numeric)
#'
#'@examples
#'\dontrun{
#' # Create example model
#' library(readr)
#' library(prophet)
#' df <- read_csv('../tests/testthat/data.csv')
#' m <- prophet(df)
#' future <- make_future_dataframe(m, periods = 365)
#' forecast <- predict(m, future)
#' all_metrics(forecast)
#' df.cv <- cross_validation(m, horizon = 100, units = 'days')
#' all_metrics(df.cv)
#' # You can check your models's accuracy using me, mse, rmse ...etc.
#' print(rmse(m))
#'}
#' @name metrics
NULL
#' Prepare dataframe for metrics calculation.
#'
#' @param m Prophet object. Default NULL
#' @param df A dataframe which is output of `simulated_historical_forecasts` or `cross_validation` Default NULL
#'
#' @return A dataframe only with y and yhat as a column.
#'
#' @keywords internal
create_metric_data <- function(m=NULL, df=NULL)
{
if(is.null(m) && is.null(df))
{
stop("You have to specify one of `m` and `df` at least.")
}
if(!is.null(m) && !is.null(df))
{
warning("You specify both of `m` and `df`. `df` is used for metrics calclation.")
}
data <- if(!is.null(df)){
df
} else if("prophet" %in% class(m)) {
dplyr::inner_join(m$history, predict(m, NULL), by="ds")
}
dplyr::select(data, y, yhat) %>% na.omit()
}
#' Meta function to make the function which evaluate metrics.
#'
#' @param metrics metrics function
#'
#' @return A function using for metrics evaluation.
#'
#' @keywords internal
make_metrics_function <- function(metrics)
{
function(m=NULL, df=NULL)
{
data <- create_metric_data(m, df)
metrics(data$y, data$yhat)
}
}
#' @rdname metrics
#' @export
me <- make_metrics_function(function(y, yhat){mean(y - yhat)})
#' @rdname metrics
#' @export
mse <- make_metrics_function(function(y, yhat){mean((y - yhat)^2)})
#' @rdname metrics
#' @export
rmse <- make_metrics_function(function(y, yhat){sqrt(mean((y - yhat)^2))})
#' @rdname metrics
#' @export
mae <- make_metrics_function(function(y, yhat){mean(abs(y - yhat))})
#' @rdname metrics
#' @export
mpe <- make_metrics_function(function(y, yhat){100*mean((y - yhat)/y)})
#' @rdname metrics
#' @export
mape <- make_metrics_function(function(y, yhat){100*mean(abs((y - yhat)/y))})
#' @rdname metrics
#' @export
all_metrics <- function(m=NULL, df=NULL)
{
# Define all metrics functions as a character
metrics <- rlang::set_names(c("me", "mse", "rmse", "mae", "mpe", "mape"))
# Convert character to function and evalate each metrics in invoke_map_df
# The result is data.frame with each metrics name
purrr::invoke_map_df(metrics, list(list(m, df)))
}

View file

@ -0,0 +1,18 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/metrics.R
\name{create_metric_data}
\alias{create_metric_data}
\title{Prepare dataframe for metrics calculation.}
\usage{
create_metric_data(fcst)
}
\arguments{
\item{fcst}{Dataframe output of `predict`.}
}
\value{
A dataframe only with y and yhat as a column.
}
\description{
Prepare dataframe for metrics calculation.
}
\keyword{internal}

83
R/man/metrics.Rd Normal file
View file

@ -0,0 +1,83 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/metrics.R
\name{metrics}
\alias{metrics}
\alias{me}
\alias{mse}
\alias{rmse}
\alias{mae}
\alias{mpe}
\alias{mape}
\alias{all_metrics}
\title{Metrics for Time Series Forecasts}
\usage{
me(fcst)
mse(fcst)
rmse(fcst)
mae(fcst)
mpe(fcst)
mape(fcst)
all_metrics(fcst)
}
\arguments{
\item{fcst}{Dataframe output of `predict`.}
}
\value{
metrics value (numeric)
}
\description{
A time-series forecast requires making a quantitative prediction of future values.
After forecast, we also have to provide accurracy of forecasts to check wether the forecast serves our need.
Metrics for time series forecasts are so useful in telling you how your model is good and helping you determine which particular forecasting models work best.
}
\details{
Here, as a notation, we assume that \eqn{y} is the actual value and \eqn{yhat} is the forecast value.
Mean Error (ME, \code{me})
The Mean Error (ME) is defined by the formula:
\deqn{ \frac{1}{n} \sum_{t=1}^{n} y_{t}-yhat_{t} .}
Mean Squared Error (MSE, \code{mse})
The Mean Squared Error (MSE) is defined by the formula:
\deqn{ \frac{1}{n} \sum_{t=1}^{n} (y_{t}-yhat_{t})^2 .}
Root Mean Square Error (RMSE, \code{rmse})
Root Mean Square Error (RMSE) is define by the formula:
\deqn{ \sqrt{\frac{1}{n} \sum_{t=1}^{n} (y_{t}-yhat_{t})^2} .}
Mean Absolute Error (MAE, \code{mae})
The Mean Absolute Error (MAE) is defined by the formula:
\deqn{ \frac{1}{n} \sum_{t=1}^{n} | y_{t}-yhat_{t} | .}
Mean Percentage Error (MPE, \code{mpe})
The Mean Percentage Error (MPE) is usually expressed as a percentage
and is defined by the formula:
\deqn{ \frac{100}{n} \sum_{t=1}^{n} \frac {y_{t}-yhat_{t}}{y_{t}} .}
Mean Absolute Percentage Error (MAPE, \code{mape})
The Mean absolute Percentage Error (MAPE), also known as Mean Absolute Percentage Deviation (MAPD), is usually expressed as a percentage,
and is defined by the formula:
\deqn{ \frac{100}{n} \sum_{t=1}^{n} | \frac {y_{t}-yhat_{t}}{y_{t}}| .}
}
\examples{
\dontrun{
# Create example model
library(readr)
df <- read_csv('../tests/testthat/data.csv')
m <- prophet(df)
# You can check your models's accuracy using me, mse, rmse ...etc.
print(rmse(m))
}
}

View file

@ -0,0 +1,60 @@
library(prophet)
context("Prophet metrics tests")
## Makes R CMD CHECK happy due to dplyr syntax below
globalVariables(c("y", "yhat"))
DATA <- head(read.csv('data.csv'), 100)
DATA$ds <- as.Date(DATA$ds)
test_that("metrics_tests_using_model", {
# Create dummy model
m <- prophet(DATA)
# Create metric data
forecast <- predict(m, NULL)
df <- na.omit(dplyr::inner_join(m$history, forecast, by="ds"))
# Check all metrics wether it is equal to its definition
y <- df$y
yhat <- df$yhat
expect_equal(me(m), mean(y-yhat))
expect_equal(mse(m), mean((y-yhat)^2))
expect_equal(rmse(m), sqrt(mean((y-yhat)^2)))
expect_equal(mae(m), mean(abs(y-yhat)))
expect_equal(mpe(m), 100*mean((y-yhat)/y))
expect_equal(mape(m), 100*mean(abs((y-yhat)/y)))
answer <- data.frame(
me=me(m),
mse=mse(m),
rmse=rmse(m),
mae=mae(m),
mpe=mpe(m),
mape=mape(m)
)
expect_equal(all_metrics(m), answer)
})
test_that("metrics_tests_using_simulated_historical_forecast", {
#skip_if_not(Sys.getenv('R_ARCH') != '/i386')
# Create dummy model
m <- prophet(DATA)
# Run simulated historical forecast
df <- simulated_historical_forecasts(m, horizon = 3, units = 'days', k = 2, period = 3)
# Check all metrics wether it is equal to its definition
y <- df$y
yhat <- df$yhat
expect_equal(me(df=df), mean(y-yhat))
expect_equal(mse(df=df), mean((y-yhat)^2))
expect_equal(rmse(df=df), sqrt(mean((y-yhat)^2)))
expect_equal(mae(df=df), mean(abs(y-yhat)))
expect_equal(mpe(df=df), 100*mean((y-yhat)/y))
expect_equal(mape(df=df), 100*mean(abs((y-yhat)/y)))
answer <- data.frame(
me=me(df=df),
mse=mse(df=df),
rmse=rmse(df=df),
mae=mae(df=df),
mpe=mpe(df=df),
mape=mape(df=df)
)
expect_equal(all_metrics(df=df), answer)
})