diff --git a/R/DESCRIPTION b/R/DESCRIPTION index 44c6b73..b99208d 100644 --- a/R/DESCRIPTION +++ b/R/DESCRIPTION @@ -25,6 +25,8 @@ Imports: stats, tidyr (>= 0.6.1), utils, + purrr, + rlang, xts Suggests: knitr, diff --git a/R/NAMESPACE b/R/NAMESPACE index 1e460d9..86ba8ff 100644 --- a/R/NAMESPACE +++ b/R/NAMESPACE @@ -1,18 +1,25 @@ # Generated by roxygen2: do not edit by hand -S3method(plot,prophet) S3method(predict,prophet) export(add_regressor) export(add_seasonality) +export(all_metrics) export(cross_validation) export(dyplot.prophet) export(fit.prophet) export(layer_changepoints) +export(mae) export(make_future_dataframe) +export(mape) +export(me) +export(mpe) +export(mse) +export(plot.prophet) export(plot_forecast_component) export(predictive_samples) export(prophet) export(prophet_plot_components) +export(rmse) export(simulated_historical_forecasts) import(Rcpp) importFrom(dplyr,"%>%") diff --git a/R/R/metrics.R b/R/R/metrics.R new file mode 100644 index 0000000..ce9a5ce --- /dev/null +++ b/R/R/metrics.R @@ -0,0 +1,150 @@ +## Copyright (c) 2017-present, Facebook, Inc. +## All rights reserved. + +## This source code is licensed under the BSD-style license found in the +## LICENSE file in the root directory of this source tree. An additional grant +## of patent rights can be found in the PATENTS file in the same directory. + +#' @title Metrics for Time Series Forecasts +#' +#' @description +#' A time-series forecast requires making a quantitative prediction of future values. +#' After forecast, we also have to provide accurracy of forecasts to check wether the forecast serves our need. +#' Metrics for time series forecasts are so useful in telling you how your model is good and helping you determine which particular forecasting models work best. +#' +#' @details +#' Here, as a notation, we assume that \eqn{y} is the actual value and \eqn{yhat} is the forecast value. +#' +#' Mean Error (ME, \code{me}) +#' +#' The Mean Error (ME) is defined by the formula: +#' \deqn{ \frac{1}{n} \sum_{t=1}^{n} y_{t}-yhat_{t} .} +#' +#' Mean Squared Error (MSE, \code{mse}) +#' +#' The Mean Squared Error (MSE) is defined by the formula: +#' \deqn{ \frac{1}{n} \sum_{t=1}^{n} (y_{t}-yhat_{t})^2 .} +#' +#' Root Mean Square Error (RMSE, \code{rmse}) +#' +#' Root Mean Square Error (RMSE) is define by the formula: +#' \deqn{ \sqrt{\frac{1}{n} \sum_{t=1}^{n} (y_{t}-yhat_{t})^2} .} +#' +#' Mean Absolute Error (MAE, \code{mae}) +#' +#' The Mean Absolute Error (MAE) is defined by the formula: +#' \deqn{ \frac{1}{n} \sum_{t=1}^{n} | y_{t}-yhat_{t} | .} +#' +#' Mean Percentage Error (MPE, \code{mpe}) +#' +#' The Mean Percentage Error (MPE) is usually expressed as a percentage +#' and is defined by the formula: +#' \deqn{ \frac{100}{n} \sum_{t=1}^{n} \frac {y_{t}-yhat_{t}}{y_{t}} .} +#' +#' Mean Absolute Percentage Error (MAPE, \code{mape}) +#' +#' The Mean absolute Percentage Error (MAPE), also known as Mean Absolute Percentage Deviation (MAPD), is usually expressed as a percentage, +#' and is defined by the formula: +#' \deqn{ \frac{100}{n} \sum_{t=1}^{n} | \frac {y_{t}-yhat_{t}}{y_{t}}| .} +#' +#' @param m Prophet object. Default NULL +#' @param df A dataframe which is output of `simulated_historical_forecasts` or `cross_validation` Default NULL +#' +#' @return metrics value (numeric) +#' +#'@examples +#'\dontrun{ +#' # Create example model +#' library(readr) +#' library(prophet) +#' df <- read_csv('../tests/testthat/data.csv') +#' m <- prophet(df) +#' future <- make_future_dataframe(m, periods = 365) +#' forecast <- predict(m, future) +#' all_metrics(forecast) +#' df.cv <- cross_validation(m, horizon = 100, units = 'days') +#' all_metrics(df.cv) +#' # You can check your models's accuracy using me, mse, rmse ...etc. +#' print(rmse(m)) +#'} +#' @name metrics +NULL + +#' Prepare dataframe for metrics calculation. +#' +#' @param m Prophet object. Default NULL +#' @param df A dataframe which is output of `simulated_historical_forecasts` or `cross_validation` Default NULL +#' +#' @return A dataframe only with y and yhat as a column. +#' +#' @keywords internal +create_metric_data <- function(m=NULL, df=NULL) +{ + if(is.null(m) && is.null(df)) + { + stop("You have to specify one of `m` and `df` at least.") + } + if(!is.null(m) && !is.null(df)) + { + warning("You specify both of `m` and `df`. `df` is used for metrics calclation.") + } + + data <- if(!is.null(df)){ + df + } else if("prophet" %in% class(m)) { + dplyr::inner_join(m$history, predict(m, NULL), by="ds") + } + + dplyr::select(data, y, yhat) %>% na.omit() +} + +#' Meta function to make the function which evaluate metrics. +#' +#' @param metrics metrics function +#' +#' @return A function using for metrics evaluation. +#' +#' @keywords internal +make_metrics_function <- function(metrics) +{ + function(m=NULL, df=NULL) + { + data <- create_metric_data(m, df) + metrics(data$y, data$yhat) + } +} + +#' @rdname metrics +#' @export +me <- make_metrics_function(function(y, yhat){mean(y - yhat)}) + +#' @rdname metrics +#' @export +mse <- make_metrics_function(function(y, yhat){mean((y - yhat)^2)}) + +#' @rdname metrics +#' @export +rmse <- make_metrics_function(function(y, yhat){sqrt(mean((y - yhat)^2))}) + +#' @rdname metrics +#' @export +mae <- make_metrics_function(function(y, yhat){mean(abs(y - yhat))}) + +#' @rdname metrics +#' @export +mpe <- make_metrics_function(function(y, yhat){100*mean((y - yhat)/y)}) + +#' @rdname metrics +#' @export +mape <- make_metrics_function(function(y, yhat){100*mean(abs((y - yhat)/y))}) + +#' @rdname metrics +#' @export +all_metrics <- function(m=NULL, df=NULL) +{ + # Define all metrics functions as a character + metrics <- rlang::set_names(c("me", "mse", "rmse", "mae", "mpe", "mape")) + # Convert character to function and evalate each metrics in invoke_map_df + # The result is data.frame with each metrics name + purrr::invoke_map_df(metrics, list(list(m, df))) +} diff --git a/R/man/create_metric_data.Rd b/R/man/create_metric_data.Rd new file mode 100644 index 0000000..6dc5e9d --- /dev/null +++ b/R/man/create_metric_data.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/metrics.R +\name{create_metric_data} +\alias{create_metric_data} +\title{Prepare dataframe for metrics calculation.} +\usage{ +create_metric_data(fcst) +} +\arguments{ +\item{fcst}{Dataframe output of `predict`.} +} +\value{ +A dataframe only with y and yhat as a column. +} +\description{ +Prepare dataframe for metrics calculation. +} +\keyword{internal} diff --git a/R/man/metrics.Rd b/R/man/metrics.Rd new file mode 100644 index 0000000..96d74c2 --- /dev/null +++ b/R/man/metrics.Rd @@ -0,0 +1,83 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/metrics.R +\name{metrics} +\alias{metrics} +\alias{me} +\alias{mse} +\alias{rmse} +\alias{mae} +\alias{mpe} +\alias{mape} +\alias{all_metrics} +\title{Metrics for Time Series Forecasts} +\usage{ +me(fcst) + +mse(fcst) + +rmse(fcst) + +mae(fcst) + +mpe(fcst) + +mape(fcst) + +all_metrics(fcst) +} +\arguments{ +\item{fcst}{Dataframe output of `predict`.} +} +\value{ +metrics value (numeric) +} +\description{ +A time-series forecast requires making a quantitative prediction of future values. +After forecast, we also have to provide accurracy of forecasts to check wether the forecast serves our need. +Metrics for time series forecasts are so useful in telling you how your model is good and helping you determine which particular forecasting models work best. +} +\details{ +Here, as a notation, we assume that \eqn{y} is the actual value and \eqn{yhat} is the forecast value. + +Mean Error (ME, \code{me}) + +The Mean Error (ME) is defined by the formula: +\deqn{ \frac{1}{n} \sum_{t=1}^{n} y_{t}-yhat_{t} .} + +Mean Squared Error (MSE, \code{mse}) + +The Mean Squared Error (MSE) is defined by the formula: +\deqn{ \frac{1}{n} \sum_{t=1}^{n} (y_{t}-yhat_{t})^2 .} + +Root Mean Square Error (RMSE, \code{rmse}) + +Root Mean Square Error (RMSE) is define by the formula: +\deqn{ \sqrt{\frac{1}{n} \sum_{t=1}^{n} (y_{t}-yhat_{t})^2} .} + +Mean Absolute Error (MAE, \code{mae}) + +The Mean Absolute Error (MAE) is defined by the formula: +\deqn{ \frac{1}{n} \sum_{t=1}^{n} | y_{t}-yhat_{t} | .} + +Mean Percentage Error (MPE, \code{mpe}) + +The Mean Percentage Error (MPE) is usually expressed as a percentage +and is defined by the formula: +\deqn{ \frac{100}{n} \sum_{t=1}^{n} \frac {y_{t}-yhat_{t}}{y_{t}} .} + +Mean Absolute Percentage Error (MAPE, \code{mape}) + +The Mean absolute Percentage Error (MAPE), also known as Mean Absolute Percentage Deviation (MAPD), is usually expressed as a percentage, +and is defined by the formula: +\deqn{ \frac{100}{n} \sum_{t=1}^{n} | \frac {y_{t}-yhat_{t}}{y_{t}}| .} +} +\examples{ +\dontrun{ +# Create example model +library(readr) +df <- read_csv('../tests/testthat/data.csv') +m <- prophet(df) +# You can check your models's accuracy using me, mse, rmse ...etc. +print(rmse(m)) +} +} diff --git a/R/tests/testthat/test_metrics.R b/R/tests/testthat/test_metrics.R new file mode 100644 index 0000000..a592ed2 --- /dev/null +++ b/R/tests/testthat/test_metrics.R @@ -0,0 +1,60 @@ +library(prophet) +context("Prophet metrics tests") + +## Makes R CMD CHECK happy due to dplyr syntax below +globalVariables(c("y", "yhat")) + +DATA <- head(read.csv('data.csv'), 100) +DATA$ds <- as.Date(DATA$ds) + +test_that("metrics_tests_using_model", { + # Create dummy model + m <- prophet(DATA) + # Create metric data + forecast <- predict(m, NULL) + df <- na.omit(dplyr::inner_join(m$history, forecast, by="ds")) + # Check all metrics wether it is equal to its definition + y <- df$y + yhat <- df$yhat + expect_equal(me(m), mean(y-yhat)) + expect_equal(mse(m), mean((y-yhat)^2)) + expect_equal(rmse(m), sqrt(mean((y-yhat)^2))) + expect_equal(mae(m), mean(abs(y-yhat))) + expect_equal(mpe(m), 100*mean((y-yhat)/y)) + expect_equal(mape(m), 100*mean(abs((y-yhat)/y))) + answer <- data.frame( + me=me(m), + mse=mse(m), + rmse=rmse(m), + mae=mae(m), + mpe=mpe(m), + mape=mape(m) + ) + expect_equal(all_metrics(m), answer) +}) + +test_that("metrics_tests_using_simulated_historical_forecast", { + #skip_if_not(Sys.getenv('R_ARCH') != '/i386') + # Create dummy model + m <- prophet(DATA) + # Run simulated historical forecast + df <- simulated_historical_forecasts(m, horizon = 3, units = 'days', k = 2, period = 3) + # Check all metrics wether it is equal to its definition + y <- df$y + yhat <- df$yhat + expect_equal(me(df=df), mean(y-yhat)) + expect_equal(mse(df=df), mean((y-yhat)^2)) + expect_equal(rmse(df=df), sqrt(mean((y-yhat)^2))) + expect_equal(mae(df=df), mean(abs(y-yhat))) + expect_equal(mpe(df=df), 100*mean((y-yhat)/y)) + expect_equal(mape(df=df), 100*mean(abs((y-yhat)/y))) + answer <- data.frame( + me=me(df=df), + mse=mse(df=df), + rmse=rmse(df=df), + mae=mae(df=df), + mpe=mpe(df=df), + mape=mape(df=df) + ) + expect_equal(all_metrics(df=df), answer) +})