From ade2f7a28afcbe55d90f47d06f291f68060fc2f9 Mon Sep 17 00:00:00 2001
From: Ben Letham <bletham@gmail.com>
Date: Wed, 17 Jun 2020 17:16:37 -0700
Subject: [PATCH] Initial take at negative binomial likelihood

---
 python/fbprophet/forecaster.py | 38 +++++++++++++++++++++++++++++++---
 python/stan/unix/prophet.stan  | 19 ++++++++++-------
 2 files changed, 47 insertions(+), 10 deletions(-)

diff --git a/python/fbprophet/forecaster.py b/python/fbprophet/forecaster.py
index e90835a..488e609 100644
--- a/python/fbprophet/forecaster.py
+++ b/python/fbprophet/forecaster.py
@@ -13,6 +13,7 @@ from datetime import timedelta, datetime
 
 import numpy as np
 import pandas as pd
+from scipy.stats import nbinom
 
 from fbprophet.make_holidays import get_holiday_names, make_holidays_df
 from fbprophet.models import StanBackendEnum
@@ -71,6 +72,7 @@ class Prophet(object):
     uncertainty_samples: Number of simulated draws used to estimate
         uncertainty intervals. Settings this value to 0 or False will disable
         uncertainty estimation and speed up the calculation.
+    likelihood: str "Gaussian", "NegBinomial", or "auto". Defaults "auto".
     stan_backend: str as defined in StanBackendEnum default: None - will try to
         iterate over all available backends and find the working one
     """
@@ -92,7 +94,8 @@ class Prophet(object):
             mcmc_samples=0,
             interval_width=0.80,
             uncertainty_samples=1000,
-            stan_backend=None
+            likelihood='auto',
+            stan_backend=None,
     ):
         self.growth = growth
 
@@ -119,6 +122,7 @@ class Prophet(object):
         self.mcmc_samples = mcmc_samples
         self.interval_width = interval_width
         self.uncertainty_samples = uncertainty_samples
+        self.likelihood = likelihood
 
         # Set during fitting or by other methods
         self.start = None
@@ -185,6 +189,11 @@ class Prophet(object):
             raise ValueError(
                 'seasonality_mode must be "additive" or "multiplicative"'
             )
+        if self.likelihood not in ['Gaussian', 'NegBinomial', 'auto']:
+            raise ValueError('Incorrect likelihood specification')
+        if self.likelihood == 'NegBinomial' and self.growth == 'linear':
+            # In the future we can get rid of the requirement for cap, but we must have a positive trend.
+            raise ValueError('Must use logistic trend with NegBinomial likelihood')
 
     def validate_column_name(self, name, check_holidays=True,
                              check_seasonalities=True, check_regressors=True):
@@ -1118,8 +1127,14 @@ class Prophet(object):
         self.fit_kwargs = deepcopy(kwargs)
 
         self.set_changepoints()
+        if self.likelihood == 'auto':
+            self.likelihood = 'Gaussian'  # TODO detect small-int data and use NB by default
 
         trend_indicator = {'linear': 0, 'logistic': 1, 'flat': 2}
+        likelihood_indicator = {'Gaussian': 0, 'NegBinomial': 1}
+        if self.likelihood == 'NegBinomial':
+            assert not self.logistic_floor  # some subtlety here that must be worked out
+            # Should verify that data are actually ints
 
         dat = {
             'T': history.shape[0],
@@ -1132,8 +1147,11 @@ class Prophet(object):
             'sigmas': prior_scales,
             'tau': self.changepoint_prior_scale,
             'trend_indicator': trend_indicator[self.growth],
+            'likelihood_indicator': likelihood_indicator[self.likelihood],
             's_a': component_cols['additive_terms'],
             's_m': component_cols['multiplicative_terms'],
+            'y_scale': self.y_scale,
+            'y_unscaled_int': history['y'].values.astype('int'),
         }
 
         if self.growth == 'linear':
@@ -1217,6 +1235,8 @@ class Prophet(object):
                 df2['trend'] * (1 + df2['multiplicative_terms'])
                 + df2['additive_terms']
         )
+        if self.likelihood == 'NegBinomial':
+            df2['yhat'] = self.y_scale * 0.01 * np.log(1 + np.exp(100 * df2['yhat'] / self.y_scale))
         return df2
 
     @staticmethod
@@ -1463,10 +1483,22 @@ class Prophet(object):
         Xb_m = np.matmul(seasonal_features.values, beta * s_m.values)
 
         sigma = self.params['sigma_obs'][iteration]
-        noise = np.random.normal(0, sigma, df.shape[0]) * self.y_scale
+
+        y_mean = trend * (1 + Xb_m) + Xb_a
+        if self.likelihood == 'Gaussian':
+            noise = np.random.normal(0, sigma, df.shape[0]) * self.y_scale
+            yhat = y_mean + noise
+        elif self.likelihood == 'NegBinomial':
+            # mu and phi in the Stan parameterization notation:
+            phi = 1 / sigma
+            mu = self.y_scale * 0.01 * np.log(1 + np.exp(100 * y_mean / self.y_scale))
+            # Convert to n and p in the scipy parameterization notation:
+            n = phi
+            p = phi / (mu + phi)
+            yhat = nbinom.rvs(n, p)
 
         return pd.DataFrame({
-            'yhat': trend * (1 + Xb_m) + Xb_a + noise,
+            'yhat': yhat,
             'trend': trend
         })
 
diff --git a/python/stan/unix/prophet.stan b/python/stan/unix/prophet.stan
index cb18633..989c5b1 100644
--- a/python/stan/unix/prophet.stan
+++ b/python/stan/unix/prophet.stan
@@ -96,8 +96,11 @@ data {
   vector[K] sigmas;     // Scale on seasonality prior
   real<lower=0> tau;    // Scale on changepoints prior
   int trend_indicator;  // 0 for linear, 1 for logistic, 2 for flat
+  int likelihood_indicator; // 0 for Gaussian, 1 for negative binomial
   vector[K] s_a;        // Indicator of additive features
   vector[K] s_m;        // Indicator of multiplicative features
+  real y_scale;         // Scale of y
+  int y_unscaled_int[T];    // unscaled y data
 }
 
 transformed data {
@@ -115,6 +118,7 @@ parameters {
 
 transformed parameters {
   vector[T] trend;
+  vector[T] y_mean;
   if (trend_indicator == 0) {
     trend = linear_trend(k, m, delta, t, A, t_change);
   } else if (trend_indicator == 1) {
@@ -122,6 +126,7 @@ transformed parameters {
   } else if (trend_indicator == 2) {
     trend = flat_trend(m, T);
   }
+  y_mean = trend .* (1 + X * (beta .* s_m)) + X * (beta .* s_a);
 }
 
 model {
@@ -129,14 +134,14 @@ model {
   k ~ normal(0, 5);
   m ~ normal(0, 5);
   delta ~ double_exponential(0, tau);
-  sigma_obs ~ normal(0, 0.5);
   beta ~ normal(0, sigmas);
 
   // Likelihood
-  y ~ normal(
-  trend
-  .* (1 + X * (beta .* s_m))
-  + X * (beta .* s_a),
-  sigma_obs
-  );
+  if (likelihood_indicator == 0) {
+    sigma_obs ~ normal(0, 0.5);
+    y ~ normal(y_mean, sigma_obs);
+  } else if (likelihood_indicator == 1) {
+    sigma_obs ~ normal(0, 10 * y_scale^2);
+    y_unscaled_int ~ neg_binomial_2(y_scale * 0.01 * log1p_exp(100 * y_mean), 1 / sigma_obs);
+  }
 }