mirror of
https://github.com/saymrwulf/prophet.git
synced 2026-05-14 20:48:08 +00:00
Add support for MinMax scaling (#2470)
This commit is contained in:
parent
2ac9e8fa76
commit
415bb6ef97
5 changed files with 187 additions and 18 deletions
|
|
@ -94,7 +94,8 @@ class Prophet(object):
|
|||
mcmc_samples=0,
|
||||
interval_width=0.80,
|
||||
uncertainty_samples=1000,
|
||||
stan_backend=None
|
||||
stan_backend=None,
|
||||
scaling: str = 'absmax',
|
||||
):
|
||||
self.growth = growth
|
||||
|
||||
|
|
@ -121,9 +122,13 @@ class Prophet(object):
|
|||
self.mcmc_samples = mcmc_samples
|
||||
self.interval_width = interval_width
|
||||
self.uncertainty_samples = uncertainty_samples
|
||||
if scaling not in ("absmax", "minmax"):
|
||||
raise ValueError("scaling must be one of 'absmax' or 'minmax'")
|
||||
self.scaling = scaling
|
||||
|
||||
# Set during fitting or by other methods
|
||||
self.start = None
|
||||
self.y_min = None
|
||||
self.y_scale = None
|
||||
self.logistic_floor = False
|
||||
self.t_scale = None
|
||||
|
|
@ -313,7 +318,10 @@ class Prophet(object):
|
|||
if 'floor' not in df:
|
||||
raise ValueError('Expected column "floor".')
|
||||
else:
|
||||
df['floor'] = 0
|
||||
if self.scaling == "absmax":
|
||||
df['floor'] = 0.
|
||||
elif self.scaling == "minmax":
|
||||
df['floor'] = self.y_min
|
||||
if self.growth == 'logistic':
|
||||
if 'cap' not in df:
|
||||
raise ValueError(
|
||||
|
|
@ -346,14 +354,25 @@ class Prophet(object):
|
|||
"""
|
||||
if not initialize_scales:
|
||||
return
|
||||
|
||||
if self.growth == 'logistic' and 'floor' in df:
|
||||
self.logistic_floor = True
|
||||
floor = df['floor']
|
||||
if self.scaling == "absmax":
|
||||
self.y_min = float((df['y'] - df['floor']).abs().min())
|
||||
self.y_scale = float((df['y'] - df['floor']).abs().max())
|
||||
elif self.scaling == "minmax":
|
||||
self.y_min = df['floor'].min()
|
||||
self.y_scale = float(df['cap'].max() - self.y_min)
|
||||
else:
|
||||
floor = 0.
|
||||
self.y_scale = float((df['y'] - floor).abs().max())
|
||||
if self.scaling == "absmax":
|
||||
self.y_min = 0.
|
||||
self.y_scale = float((df['y']).abs().max())
|
||||
elif self.scaling == "minmax":
|
||||
self.y_min = df['y'].min()
|
||||
self.y_scale = float(df['y'].max() - self.y_min)
|
||||
if self.y_scale == 0:
|
||||
self.y_scale = 1.0
|
||||
|
||||
self.start = df['ds'].min()
|
||||
self.t_scale = df['ds'].max() - self.start
|
||||
for name, props in self.extra_regressors.items():
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ SIMPLE_ATTRIBUTES = [
|
|||
'yearly_seasonality', 'weekly_seasonality', 'daily_seasonality',
|
||||
'seasonality_mode', 'seasonality_prior_scale', 'changepoint_prior_scale',
|
||||
'holidays_prior_scale', 'mcmc_samples', 'interval_width', 'uncertainty_samples',
|
||||
'y_scale', 'logistic_floor', 'country_holidays', 'component_modes'
|
||||
'y_scale', 'y_min', 'scaling', 'logistic_floor', 'country_holidays', 'component_modes'
|
||||
]
|
||||
|
||||
PD_SERIES = ['changepoints', 'history_dates', 'train_holiday_names']
|
||||
|
|
@ -129,6 +129,13 @@ def model_to_json(model):
|
|||
return json.dumps(model_json)
|
||||
|
||||
|
||||
def _handle_simple_attributes_backwards_compat(model_dict):
|
||||
"""Handle backwards compatibility for SIMPLE_ATTRIBUTES."""
|
||||
# prophet<=1.1.4: handle scaling parameters introduced in #2470
|
||||
if 'scaling' not in model_dict:
|
||||
model_dict['scaling'] = 'absmax'
|
||||
model_dict['y_min'] = 0.
|
||||
|
||||
def model_from_dict(model_dict):
|
||||
"""Recreate a Prophet model from a dictionary.
|
||||
|
||||
|
|
@ -144,6 +151,7 @@ def model_from_dict(model_dict):
|
|||
"""
|
||||
model = Prophet() # We will overwrite all attributes set in init anyway
|
||||
# Simple types
|
||||
_handle_simple_attributes_backwards_compat(model_dict)
|
||||
for attribute in SIMPLE_ATTRIBUTES:
|
||||
setattr(model, attribute, model_dict[attribute])
|
||||
for attribute in PD_SERIES:
|
||||
|
|
|
|||
|
|
@ -16,6 +16,12 @@ def subdaily_univariate_ts() -> pd.DataFrame:
|
|||
return pd.read_csv(Path(__file__).parent / "data2.csv", parse_dates=["ds"])
|
||||
|
||||
|
||||
@pytest.fixture(scope="package")
|
||||
def large_numbers_ts() -> pd.DataFrame:
|
||||
"""Univariate time series with large values to test scaling"""
|
||||
return pd.read_csv(Path(__file__).parent / "data3.csv", parse_dates=["ds"])
|
||||
|
||||
|
||||
def pytest_configure(config):
|
||||
config.addinivalue_line("markers", "slow: mark tests as slow (include in run with --test-slow)")
|
||||
|
||||
|
|
|
|||
71
python/prophet/tests/data3.csv
Normal file
71
python/prophet/tests/data3.csv
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
ds,y
|
||||
2023-03-02,623031970.0
|
||||
2023-03-06,623032040.0
|
||||
2023-03-07,623032054.0
|
||||
2023-03-08,623032091.0
|
||||
2023-03-09,623032123.0
|
||||
2023-03-10,623032152.0
|
||||
2023-03-11,623032177.0
|
||||
2023-03-12,623032184.0
|
||||
2023-03-13,623032193.0
|
||||
2023-03-16,623032296.0
|
||||
2023-03-17,623032316.0
|
||||
2023-03-18,623032328.0
|
||||
2023-03-19,623032339.0
|
||||
2023-03-20,623032352.0
|
||||
2023-03-21,623032385.0
|
||||
2023-03-22,623032410.0
|
||||
2023-03-23,623032427.0
|
||||
2023-03-25,623032479.0
|
||||
2023-03-26,623032496.0
|
||||
2023-03-27,623032506.0
|
||||
2023-03-28,623032533.0
|
||||
2023-03-29,623032598.0
|
||||
2023-03-30,623032643.0
|
||||
2023-03-31,623032681.0
|
||||
2023-04-01,623032727.0
|
||||
2023-04-02,623032756.0
|
||||
2023-04-03,623032767.0
|
||||
2023-04-04,623032799.0
|
||||
2023-04-05,623032843.0
|
||||
2023-04-06,623032890.0
|
||||
2023-04-07,623032934.0
|
||||
2023-04-08,623032954.0
|
||||
2023-04-09,623032959.0
|
||||
2023-04-10,623032964.0
|
||||
2023-04-11,623032997.0
|
||||
2023-04-12,623033041.0
|
||||
2023-04-13,623033062.0
|
||||
2023-04-14,623033095.0
|
||||
2023-04-15,623033122.0
|
||||
2023-04-16,623033163.0
|
||||
2023-04-17,623033190.0
|
||||
2023-04-18,623033227.0
|
||||
2023-04-19,623033258.0
|
||||
2023-04-20,623033294.0
|
||||
2023-04-21,623033329.0
|
||||
2023-04-22,623033361.0
|
||||
2023-04-23,623033385.0
|
||||
2023-04-24,623033397.0
|
||||
2023-04-25,623033419.0
|
||||
2023-04-26,623033440.0
|
||||
2023-04-27,623033482.0
|
||||
2023-04-28,623033535.0
|
||||
2023-04-29,623033575.0
|
||||
2023-04-30,623033600.0
|
||||
2023-05-01,623033610.0
|
||||
2023-05-02,623033632.0
|
||||
2023-05-03,623033666.0
|
||||
2023-05-04,623033704.0
|
||||
2023-05-05,623033714.0
|
||||
2023-05-06,623033752.0
|
||||
2023-05-07,623033760.0
|
||||
2023-05-08,623033769.0
|
||||
2023-05-09,623033784.0
|
||||
2023-05-10,623033823.0
|
||||
2023-05-11,623033853.0
|
||||
2023-05-12,623034010.0
|
||||
2023-05-13,623034041.0
|
||||
2023-05-14,623034060.0
|
||||
2023-05-15,623034068.0
|
||||
2023-05-16,623034084.0
|
||||
|
|
|
@ -22,29 +22,53 @@ def rmse(predictions, targets) -> float:
|
|||
|
||||
|
||||
class TestProphetFitPredictDefault:
|
||||
def test_fit_predict(self, daily_univariate_ts, backend):
|
||||
@pytest.mark.parametrize(
|
||||
"scaling,expected",
|
||||
[("absmax", 10.64), ("minmax", 11.13)],
|
||||
ids=["absmax", "minmax"]
|
||||
)
|
||||
def test_fit_predict(self, daily_univariate_ts, backend, scaling, expected):
|
||||
test_days = 30
|
||||
train, test = train_test_split(daily_univariate_ts, test_days)
|
||||
forecaster = Prophet(stan_backend=backend)
|
||||
forecaster = Prophet(stan_backend=backend, scaling=scaling)
|
||||
forecaster.fit(train, seed=1237861298)
|
||||
np.random.seed(876543987)
|
||||
future = forecaster.make_future_dataframe(test_days, include_history=False)
|
||||
future = forecaster.predict(future)
|
||||
res = rmse(future["yhat"], test["y"])
|
||||
# this gives ~ 10.64
|
||||
assert 15 > res > 5, "backend: {}".format(forecaster.stan_backend)
|
||||
assert res == pytest.approx(expected, 0.02), "backend: {}".format(forecaster.stan_backend)
|
||||
|
||||
def test_fit_predict_newton(self, daily_univariate_ts, backend):
|
||||
@pytest.mark.parametrize(
|
||||
"scaling,expected",
|
||||
[("absmax", 23.44), ("minmax", 11.29)],
|
||||
ids=["absmax", "minmax"]
|
||||
)
|
||||
def test_fit_predict_newton(self, daily_univariate_ts, backend, scaling, expected):
|
||||
test_days = 30
|
||||
train, test = train_test_split(daily_univariate_ts, test_days)
|
||||
forecaster = Prophet(stan_backend=backend)
|
||||
forecaster = Prophet(stan_backend=backend, scaling=scaling)
|
||||
forecaster.fit(train, algorithm="Newton", seed=1237861298)
|
||||
np.random.seed(876543987)
|
||||
future = forecaster.make_future_dataframe(test_days, include_history=False)
|
||||
future = forecaster.predict(future)
|
||||
# this gives ~ 10.64
|
||||
res = rmse(future["yhat"], test["y"])
|
||||
assert res == pytest.approx(23.44, 0.01), "backend: {}".format(forecaster.stan_backend)
|
||||
assert res == pytest.approx(expected, 0.01), "backend: {}".format(forecaster.stan_backend)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"scaling,expected",
|
||||
[("absmax", 127.01), ("minmax", 93.45)],
|
||||
ids=["absmax", "minmax"]
|
||||
)
|
||||
def test_fit_predict_large_numbers(self, large_numbers_ts, backend, scaling, expected):
|
||||
test_days = 30
|
||||
train, test = train_test_split(large_numbers_ts, test_days)
|
||||
forecaster = Prophet(stan_backend=backend, scaling=scaling)
|
||||
forecaster.fit(train, seed=1237861298)
|
||||
np.random.seed(876543987)
|
||||
future = forecaster.make_future_dataframe(test_days, include_history=False)
|
||||
future = forecaster.predict(future)
|
||||
res = rmse(future["yhat"], test["y"])
|
||||
assert res == pytest.approx(expected, 0.01), "backend: {}".format(forecaster.stan_backend)
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_fit_predict_sampling(self, daily_univariate_ts, backend):
|
||||
|
|
@ -185,6 +209,26 @@ class TestProphetDataPrep:
|
|||
m2.fit(train)
|
||||
assert m2.history["y_scaled"][0] == pytest.approx(1.0, 0.01)
|
||||
|
||||
def test_logistic_floor_minmax(self, daily_univariate_ts, backend):
|
||||
"""Test the scaling of y with logistic growth and a floor/cap."""
|
||||
train, _ = train_test_split(daily_univariate_ts, daily_univariate_ts.shape[0] // 2)
|
||||
train["floor"] = 10.0
|
||||
train["cap"] = 80.0
|
||||
m = Prophet(growth="logistic", stan_backend=backend, scaling="minmax")
|
||||
m.fit(train)
|
||||
assert m.logistic_floor
|
||||
assert "floor" in m.history
|
||||
assert m.history["y_scaled"].min() > 0.0
|
||||
assert m.history["y_scaled"].max() < 1.0
|
||||
for col in ["y", "floor", "cap"]:
|
||||
train[col] += 10.0
|
||||
m2 = Prophet(growth="logistic", stan_backend=backend, scaling="minmax")
|
||||
m2.fit(train)
|
||||
assert m2.history["y_scaled"].min() > 0.0
|
||||
assert m2.history["y_scaled"].max() < 1.0
|
||||
# Check that the scaling is the same
|
||||
assert m2.history['y_scaled'].mean() == m.history['y_scaled'].mean()
|
||||
|
||||
def test_make_future_dataframe(self, daily_univariate_ts, backend):
|
||||
train = daily_univariate_ts.head(468 // 2)
|
||||
forecaster = Prophet(stan_backend=backend)
|
||||
|
|
@ -225,8 +269,28 @@ class TestProphetTrendComponent:
|
|||
assert k == 0
|
||||
assert m == pytest.approx(0.49335657, abs=1e-4)
|
||||
|
||||
def test_flat_growth(self, backend):
|
||||
m = Prophet(growth="flat", stan_backend=backend)
|
||||
def test_growth_init_minmax(self, daily_univariate_ts, backend):
|
||||
model = Prophet(growth="logistic", stan_backend=backend, scaling="minmax")
|
||||
train = daily_univariate_ts.iloc[:468].copy()
|
||||
train["cap"] = train["y"].max()
|
||||
|
||||
history = model.setup_dataframe(train, initialize_scales=True)
|
||||
|
||||
k, m = model.linear_growth_init(history)
|
||||
assert k == pytest.approx(0.4053406)
|
||||
assert m == pytest.approx(0.3775322)
|
||||
|
||||
k, m = model.logistic_growth_init(history)
|
||||
assert k == pytest.approx(1.782523, abs=1e-4)
|
||||
assert m == pytest.approx(0.280521, abs=1e-4)
|
||||
|
||||
k, m = model.flat_growth_init(history)
|
||||
assert k == 0
|
||||
assert m == pytest.approx(0.32792770, abs=1e-4)
|
||||
|
||||
@pytest.mark.parametrize("scaling",["absmax","minmax"])
|
||||
def test_flat_growth(self, backend, scaling):
|
||||
m = Prophet(growth="flat", stan_backend=backend, scaling=scaling)
|
||||
x = np.linspace(0, 2 * np.pi, 8 * 7)
|
||||
history = pd.DataFrame(
|
||||
{
|
||||
|
|
@ -240,8 +304,8 @@ class TestProphetTrendComponent:
|
|||
m_ = m.params["m"][0, 0]
|
||||
k = m.params["k"][0, 0]
|
||||
assert k == pytest.approx(0.0)
|
||||
assert fcst["trend"].unique()[0] == pytest.approx(m_ * m.y_scale)
|
||||
assert np.round(m_ * m.y_scale) == 30.0
|
||||
assert fcst["trend"].unique()[0] == pytest.approx((m_ * m.y_scale) + m.y_min)
|
||||
assert np.round((m_ * m.y_scale) + m.y_min) == 30.0
|
||||
|
||||
def test_piecewise_linear(self, backend):
|
||||
model = Prophet(stan_backend=backend)
|
||||
|
|
@ -791,6 +855,7 @@ class TestProphetHolidays:
|
|||
assert sum(fcst["special_day"] == 0) == 575
|
||||
|
||||
|
||||
|
||||
class TestProphetRegressors:
|
||||
def test_added_regressors(self, daily_univariate_ts, backend):
|
||||
m = Prophet(stan_backend=backend)
|
||||
|
|
|
|||
Loading…
Reference in a new issue