diff --git a/python/prophet/forecaster.py b/python/prophet/forecaster.py index 5de5cd3..d834bb5 100644 --- a/python/prophet/forecaster.py +++ b/python/prophet/forecaster.py @@ -94,7 +94,8 @@ class Prophet(object): mcmc_samples=0, interval_width=0.80, uncertainty_samples=1000, - stan_backend=None + stan_backend=None, + scaling: str = 'absmax', ): self.growth = growth @@ -121,9 +122,13 @@ class Prophet(object): self.mcmc_samples = mcmc_samples self.interval_width = interval_width self.uncertainty_samples = uncertainty_samples + if scaling not in ("absmax", "minmax"): + raise ValueError("scaling must be one of 'absmax' or 'minmax'") + self.scaling = scaling # Set during fitting or by other methods self.start = None + self.y_min = None self.y_scale = None self.logistic_floor = False self.t_scale = None @@ -313,7 +318,10 @@ class Prophet(object): if 'floor' not in df: raise ValueError('Expected column "floor".') else: - df['floor'] = 0 + if self.scaling == "absmax": + df['floor'] = 0. + elif self.scaling == "minmax": + df['floor'] = self.y_min if self.growth == 'logistic': if 'cap' not in df: raise ValueError( @@ -346,14 +354,25 @@ class Prophet(object): """ if not initialize_scales: return + if self.growth == 'logistic' and 'floor' in df: self.logistic_floor = True - floor = df['floor'] + if self.scaling == "absmax": + self.y_min = float((df['y'] - df['floor']).abs().min()) + self.y_scale = float((df['y'] - df['floor']).abs().max()) + elif self.scaling == "minmax": + self.y_min = df['floor'].min() + self.y_scale = float(df['cap'].max() - self.y_min) else: - floor = 0. - self.y_scale = float((df['y'] - floor).abs().max()) + if self.scaling == "absmax": + self.y_min = 0. + self.y_scale = float((df['y']).abs().max()) + elif self.scaling == "minmax": + self.y_min = df['y'].min() + self.y_scale = float(df['y'].max() - self.y_min) if self.y_scale == 0: self.y_scale = 1.0 + self.start = df['ds'].min() self.t_scale = df['ds'].max() - self.start for name, props in self.extra_regressors.items(): diff --git a/python/prophet/serialize.py b/python/prophet/serialize.py index 00fb8f2..0ff62d6 100644 --- a/python/prophet/serialize.py +++ b/python/prophet/serialize.py @@ -27,7 +27,7 @@ SIMPLE_ATTRIBUTES = [ 'yearly_seasonality', 'weekly_seasonality', 'daily_seasonality', 'seasonality_mode', 'seasonality_prior_scale', 'changepoint_prior_scale', 'holidays_prior_scale', 'mcmc_samples', 'interval_width', 'uncertainty_samples', - 'y_scale', 'logistic_floor', 'country_holidays', 'component_modes' + 'y_scale', 'y_min', 'scaling', 'logistic_floor', 'country_holidays', 'component_modes' ] PD_SERIES = ['changepoints', 'history_dates', 'train_holiday_names'] @@ -129,6 +129,13 @@ def model_to_json(model): return json.dumps(model_json) +def _handle_simple_attributes_backwards_compat(model_dict): + """Handle backwards compatibility for SIMPLE_ATTRIBUTES.""" + # prophet<=1.1.4: handle scaling parameters introduced in #2470 + if 'scaling' not in model_dict: + model_dict['scaling'] = 'absmax' + model_dict['y_min'] = 0. + def model_from_dict(model_dict): """Recreate a Prophet model from a dictionary. @@ -144,6 +151,7 @@ def model_from_dict(model_dict): """ model = Prophet() # We will overwrite all attributes set in init anyway # Simple types + _handle_simple_attributes_backwards_compat(model_dict) for attribute in SIMPLE_ATTRIBUTES: setattr(model, attribute, model_dict[attribute]) for attribute in PD_SERIES: diff --git a/python/prophet/tests/conftest.py b/python/prophet/tests/conftest.py index f3544e4..bc16821 100644 --- a/python/prophet/tests/conftest.py +++ b/python/prophet/tests/conftest.py @@ -16,6 +16,12 @@ def subdaily_univariate_ts() -> pd.DataFrame: return pd.read_csv(Path(__file__).parent / "data2.csv", parse_dates=["ds"]) +@pytest.fixture(scope="package") +def large_numbers_ts() -> pd.DataFrame: + """Univariate time series with large values to test scaling""" + return pd.read_csv(Path(__file__).parent / "data3.csv", parse_dates=["ds"]) + + def pytest_configure(config): config.addinivalue_line("markers", "slow: mark tests as slow (include in run with --test-slow)") diff --git a/python/prophet/tests/data3.csv b/python/prophet/tests/data3.csv new file mode 100644 index 0000000..eac84a1 --- /dev/null +++ b/python/prophet/tests/data3.csv @@ -0,0 +1,71 @@ +ds,y +2023-03-02,623031970.0 +2023-03-06,623032040.0 +2023-03-07,623032054.0 +2023-03-08,623032091.0 +2023-03-09,623032123.0 +2023-03-10,623032152.0 +2023-03-11,623032177.0 +2023-03-12,623032184.0 +2023-03-13,623032193.0 +2023-03-16,623032296.0 +2023-03-17,623032316.0 +2023-03-18,623032328.0 +2023-03-19,623032339.0 +2023-03-20,623032352.0 +2023-03-21,623032385.0 +2023-03-22,623032410.0 +2023-03-23,623032427.0 +2023-03-25,623032479.0 +2023-03-26,623032496.0 +2023-03-27,623032506.0 +2023-03-28,623032533.0 +2023-03-29,623032598.0 +2023-03-30,623032643.0 +2023-03-31,623032681.0 +2023-04-01,623032727.0 +2023-04-02,623032756.0 +2023-04-03,623032767.0 +2023-04-04,623032799.0 +2023-04-05,623032843.0 +2023-04-06,623032890.0 +2023-04-07,623032934.0 +2023-04-08,623032954.0 +2023-04-09,623032959.0 +2023-04-10,623032964.0 +2023-04-11,623032997.0 +2023-04-12,623033041.0 +2023-04-13,623033062.0 +2023-04-14,623033095.0 +2023-04-15,623033122.0 +2023-04-16,623033163.0 +2023-04-17,623033190.0 +2023-04-18,623033227.0 +2023-04-19,623033258.0 +2023-04-20,623033294.0 +2023-04-21,623033329.0 +2023-04-22,623033361.0 +2023-04-23,623033385.0 +2023-04-24,623033397.0 +2023-04-25,623033419.0 +2023-04-26,623033440.0 +2023-04-27,623033482.0 +2023-04-28,623033535.0 +2023-04-29,623033575.0 +2023-04-30,623033600.0 +2023-05-01,623033610.0 +2023-05-02,623033632.0 +2023-05-03,623033666.0 +2023-05-04,623033704.0 +2023-05-05,623033714.0 +2023-05-06,623033752.0 +2023-05-07,623033760.0 +2023-05-08,623033769.0 +2023-05-09,623033784.0 +2023-05-10,623033823.0 +2023-05-11,623033853.0 +2023-05-12,623034010.0 +2023-05-13,623034041.0 +2023-05-14,623034060.0 +2023-05-15,623034068.0 +2023-05-16,623034084.0 diff --git a/python/prophet/tests/test_prophet.py b/python/prophet/tests/test_prophet.py index 4282400..728b531 100644 --- a/python/prophet/tests/test_prophet.py +++ b/python/prophet/tests/test_prophet.py @@ -22,29 +22,53 @@ def rmse(predictions, targets) -> float: class TestProphetFitPredictDefault: - def test_fit_predict(self, daily_univariate_ts, backend): + @pytest.mark.parametrize( + "scaling,expected", + [("absmax", 10.64), ("minmax", 11.13)], + ids=["absmax", "minmax"] + ) + def test_fit_predict(self, daily_univariate_ts, backend, scaling, expected): test_days = 30 train, test = train_test_split(daily_univariate_ts, test_days) - forecaster = Prophet(stan_backend=backend) + forecaster = Prophet(stan_backend=backend, scaling=scaling) forecaster.fit(train, seed=1237861298) np.random.seed(876543987) future = forecaster.make_future_dataframe(test_days, include_history=False) future = forecaster.predict(future) res = rmse(future["yhat"], test["y"]) - # this gives ~ 10.64 - assert 15 > res > 5, "backend: {}".format(forecaster.stan_backend) + assert res == pytest.approx(expected, 0.02), "backend: {}".format(forecaster.stan_backend) - def test_fit_predict_newton(self, daily_univariate_ts, backend): + @pytest.mark.parametrize( + "scaling,expected", + [("absmax", 23.44), ("minmax", 11.29)], + ids=["absmax", "minmax"] + ) + def test_fit_predict_newton(self, daily_univariate_ts, backend, scaling, expected): test_days = 30 train, test = train_test_split(daily_univariate_ts, test_days) - forecaster = Prophet(stan_backend=backend) + forecaster = Prophet(stan_backend=backend, scaling=scaling) forecaster.fit(train, algorithm="Newton", seed=1237861298) np.random.seed(876543987) future = forecaster.make_future_dataframe(test_days, include_history=False) future = forecaster.predict(future) - # this gives ~ 10.64 res = rmse(future["yhat"], test["y"]) - assert res == pytest.approx(23.44, 0.01), "backend: {}".format(forecaster.stan_backend) + assert res == pytest.approx(expected, 0.01), "backend: {}".format(forecaster.stan_backend) + + @pytest.mark.parametrize( + "scaling,expected", + [("absmax", 127.01), ("minmax", 93.45)], + ids=["absmax", "minmax"] + ) + def test_fit_predict_large_numbers(self, large_numbers_ts, backend, scaling, expected): + test_days = 30 + train, test = train_test_split(large_numbers_ts, test_days) + forecaster = Prophet(stan_backend=backend, scaling=scaling) + forecaster.fit(train, seed=1237861298) + np.random.seed(876543987) + future = forecaster.make_future_dataframe(test_days, include_history=False) + future = forecaster.predict(future) + res = rmse(future["yhat"], test["y"]) + assert res == pytest.approx(expected, 0.01), "backend: {}".format(forecaster.stan_backend) @pytest.mark.slow def test_fit_predict_sampling(self, daily_univariate_ts, backend): @@ -185,6 +209,26 @@ class TestProphetDataPrep: m2.fit(train) assert m2.history["y_scaled"][0] == pytest.approx(1.0, 0.01) + def test_logistic_floor_minmax(self, daily_univariate_ts, backend): + """Test the scaling of y with logistic growth and a floor/cap.""" + train, _ = train_test_split(daily_univariate_ts, daily_univariate_ts.shape[0] // 2) + train["floor"] = 10.0 + train["cap"] = 80.0 + m = Prophet(growth="logistic", stan_backend=backend, scaling="minmax") + m.fit(train) + assert m.logistic_floor + assert "floor" in m.history + assert m.history["y_scaled"].min() > 0.0 + assert m.history["y_scaled"].max() < 1.0 + for col in ["y", "floor", "cap"]: + train[col] += 10.0 + m2 = Prophet(growth="logistic", stan_backend=backend, scaling="minmax") + m2.fit(train) + assert m2.history["y_scaled"].min() > 0.0 + assert m2.history["y_scaled"].max() < 1.0 + # Check that the scaling is the same + assert m2.history['y_scaled'].mean() == m.history['y_scaled'].mean() + def test_make_future_dataframe(self, daily_univariate_ts, backend): train = daily_univariate_ts.head(468 // 2) forecaster = Prophet(stan_backend=backend) @@ -225,8 +269,28 @@ class TestProphetTrendComponent: assert k == 0 assert m == pytest.approx(0.49335657, abs=1e-4) - def test_flat_growth(self, backend): - m = Prophet(growth="flat", stan_backend=backend) + def test_growth_init_minmax(self, daily_univariate_ts, backend): + model = Prophet(growth="logistic", stan_backend=backend, scaling="minmax") + train = daily_univariate_ts.iloc[:468].copy() + train["cap"] = train["y"].max() + + history = model.setup_dataframe(train, initialize_scales=True) + + k, m = model.linear_growth_init(history) + assert k == pytest.approx(0.4053406) + assert m == pytest.approx(0.3775322) + + k, m = model.logistic_growth_init(history) + assert k == pytest.approx(1.782523, abs=1e-4) + assert m == pytest.approx(0.280521, abs=1e-4) + + k, m = model.flat_growth_init(history) + assert k == 0 + assert m == pytest.approx(0.32792770, abs=1e-4) + + @pytest.mark.parametrize("scaling",["absmax","minmax"]) + def test_flat_growth(self, backend, scaling): + m = Prophet(growth="flat", stan_backend=backend, scaling=scaling) x = np.linspace(0, 2 * np.pi, 8 * 7) history = pd.DataFrame( { @@ -240,8 +304,8 @@ class TestProphetTrendComponent: m_ = m.params["m"][0, 0] k = m.params["k"][0, 0] assert k == pytest.approx(0.0) - assert fcst["trend"].unique()[0] == pytest.approx(m_ * m.y_scale) - assert np.round(m_ * m.y_scale) == 30.0 + assert fcst["trend"].unique()[0] == pytest.approx((m_ * m.y_scale) + m.y_min) + assert np.round((m_ * m.y_scale) + m.y_min) == 30.0 def test_piecewise_linear(self, backend): model = Prophet(stan_backend=backend) @@ -791,6 +855,7 @@ class TestProphetHolidays: assert sum(fcst["special_day"] == 0) == 575 + class TestProphetRegressors: def test_added_regressors(self, daily_univariate_ts, backend): m = Prophet(stan_backend=backend)