From 496facb152ab2c4e59dfe4edb3df6390fdadaa6a Mon Sep 17 00:00:00 2001 From: Jack Dent Date: Wed, 5 Feb 2020 13:10:01 -0500 Subject: [PATCH] Use np.percentile if array does not contain NaNs (#1311) Co-authored-by: jackd-stripe <41304233+jackd-stripe@users.noreply.github.com> --- python/fbprophet/forecaster.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/python/fbprophet/forecaster.py b/python/fbprophet/forecaster.py index 5804b5b..31f6648 100644 --- a/python/fbprophet/forecaster.py +++ b/python/fbprophet/forecaster.py @@ -1329,10 +1329,10 @@ class Prophet(object): comp *= self.y_scale data[component] = np.nanmean(comp, axis=1) if self.uncertainty_samples: - data[component + '_lower'] = np.nanpercentile( + data[component + '_lower'] = self.percentile( comp, lower_p, axis=1, ) - data[component + '_upper'] = np.nanpercentile( + data[component + '_upper'] = self.percentile( comp, upper_p, axis=1, ) return pd.DataFrame(data) @@ -1410,9 +1410,9 @@ class Prophet(object): series = {} for key in ['yhat', 'trend']: - series['{}_lower'.format(key)] = np.nanpercentile( + series['{}_lower'.format(key)] = self.percentile( sim_values[key], lower_p, axis=1) - series['{}_upper'.format(key)] = np.nanpercentile( + series['{}_upper'.format(key)] = self.percentile( sim_values[key], upper_p, axis=1) return pd.DataFrame(series) @@ -1498,6 +1498,17 @@ class Prophet(object): return trend * self.y_scale + df['floor'] + def percentile(self, a, *args, **kwargs): + """ + We rely on np.nanpercentile in the rare instances where there + are a small number of bad samples with MCMC that contain NaNs. + However, since np.nanpercentile is far slower than np.percentile, + we only fall back to it if the array contains NaNs. See + https://github.com/facebook/prophet/issues/1310 for more details. + """ + fn = np.nanpercentile if np.isnan(a).any() else np.percentile + return fn(a, *args, **kwargs) + def make_future_dataframe(self, periods, freq='D', include_history=True): """Simulate the trend using the extrapolated generative model.