From 0616bfb5daa6888e9665bba1f95d9d67e91fed66 Mon Sep 17 00:00:00 2001 From: Ben Sully Date: Wed, 21 Apr 2021 02:10:06 +0100 Subject: [PATCH] Add model_to_dict and model_from_dict functions (#1871) This commit adds the model_to_dict and model_from_dict functions, using all of the logic that previously lived in model_to_json and model_from_json, and converting those functions to simply reuse the new ones. This is useful because sometimes the user may want to serialize the dict in some other way (e.g. another JSON serialization library such as ujson or orjson, or something entirely different). --- python/prophet/serialize.py | 224 +++++++++++++++++++++--------------- 1 file changed, 130 insertions(+), 94 deletions(-) diff --git a/python/prophet/serialize.py b/python/prophet/serialize.py index 71e2300..85140ee 100644 --- a/python/prophet/serialize.py +++ b/python/prophet/serialize.py @@ -38,6 +38,73 @@ NP_ARRAY = ['changepoints_t'] ORDEREDDICT = ['seasonalities', 'extra_regressors'] +def model_to_dict(model): + """Convert a Prophet model to a dictionary suitable for JSON serialization. + + Model must be fitted. Skips Stan objects that are not needed for predict. + + Can be reversed with model_from_dict. + + Parameters + ---------- + model: Prophet model object. + + Returns + ------- + dict that can be used to serialize a Prophet model as JSON or loaded back + into a Prophet model. + """ + if model.history is None: + raise ValueError( + "This can only be used to serialize models that have already been fit." + ) + + model_dict = { + attribute: getattr(model, attribute) for attribute in SIMPLE_ATTRIBUTES + } + # Handle attributes of non-core types + for attribute in PD_SERIES: + if getattr(model, attribute) is None: + model_dict[attribute] = None + else: + model_dict[attribute] = getattr(model, attribute).to_json( + orient='split', date_format='iso' + ) + for attribute in PD_TIMESTAMP: + model_dict[attribute] = getattr(model, attribute).timestamp() + for attribute in PD_TIMEDELTA: + model_dict[attribute] = getattr(model, attribute).total_seconds() + for attribute in PD_DATAFRAME: + if getattr(model, attribute) is None: + model_dict[attribute] = None + else: + model_dict[attribute] = getattr(model, attribute).to_json(orient='table', index=False) + for attribute in NP_ARRAY: + model_dict[attribute] = getattr(model, attribute).tolist() + for attribute in ORDEREDDICT: + model_dict[attribute] = [ + list(getattr(model, attribute).keys()), + getattr(model, attribute), + ] + # Other attributes with special handling + # fit_kwargs -> Transform any numpy types before serializing. + # They do not need to be transformed back on deserializing. + fit_kwargs = deepcopy(model.fit_kwargs) + if 'init' in fit_kwargs: + for k, v in fit_kwargs['init'].items(): + if isinstance(v, np.ndarray): + fit_kwargs['init'][k] = v.tolist() + elif isinstance(v, np.floating): + fit_kwargs['init'][k] = float(v) + model_dict['fit_kwargs'] = fit_kwargs + + # Params (Dict[str, np.ndarray]) + model_dict['params'] = {k: v.tolist() for k, v in model.params.items()} + # Attributes that are skipped: stan_fit, stan_backend + model_dict['__prophet_version'] = __version__ + return model_dict + + def model_to_json(model): """Serialize a Prophet model to json string. @@ -53,57 +120,70 @@ def model_to_json(model): ------- json string that can be deserialized into a Prophet model. """ - if model.history is None: - raise ValueError( - "This can only be used to serialize models that have already been fit." - ) - - model_json = { - attribute: getattr(model, attribute) for attribute in SIMPLE_ATTRIBUTES - } - # Handle attributes of non-core types - for attribute in PD_SERIES: - if getattr(model, attribute) is None: - model_json[attribute] = None - else: - model_json[attribute] = getattr(model, attribute).to_json( - orient='split', date_format='iso' - ) - for attribute in PD_TIMESTAMP: - model_json[attribute] = getattr(model, attribute).timestamp() - for attribute in PD_TIMEDELTA: - model_json[attribute] = getattr(model, attribute).total_seconds() - for attribute in PD_DATAFRAME: - if getattr(model, attribute) is None: - model_json[attribute] = None - else: - model_json[attribute] = getattr(model, attribute).to_json(orient='table', index=False) - for attribute in NP_ARRAY: - model_json[attribute] = getattr(model, attribute).tolist() - for attribute in ORDEREDDICT: - model_json[attribute] = [ - list(getattr(model, attribute).keys()), - getattr(model, attribute), - ] - # Other attributes with special handling - # fit_kwargs -> Transform any numpy types before serializing. - # They do not need to be transformed back on deserializing. - fit_kwargs = deepcopy(model.fit_kwargs) - if 'init' in fit_kwargs: - for k, v in fit_kwargs['init'].items(): - if isinstance(v, np.ndarray): - fit_kwargs['init'][k] = v.tolist() - elif isinstance(v, np.floating): - fit_kwargs['init'][k] = float(v) - model_json['fit_kwargs'] = fit_kwargs - - # Params (Dict[str, np.ndarray]) - model_json['params'] = {k: v.tolist() for k, v in model.params.items()} - # Attributes that are skipped: stan_fit, stan_backend - model_json['__prophet_version'] = __version__ + model_json = model_to_dict(model) return json.dumps(model_json) +def model_from_dict(model_dict): + """Recreate a Prophet model from a dictionary. + + Recreates models that were converted with model_to_dict. + + Parameters + ---------- + model_dict: Dictionary containing model, created with model_to_dict. + + Returns + ------- + Prophet model. + """ + model = Prophet() # We will overwrite all attributes set in init anyway + # Simple types + for attribute in SIMPLE_ATTRIBUTES: + setattr(model, attribute, model_dict[attribute]) + for attribute in PD_SERIES: + if model_dict[attribute] is None: + setattr(model, attribute, None) + else: + s = pd.read_json(model_dict[attribute], typ='series', orient='split') + if s.name == 'ds': + if len(s) == 0: + s = pd.to_datetime(s) + s = s.dt.tz_localize(None) + setattr(model, attribute, s) + for attribute in PD_TIMESTAMP: + setattr(model, attribute, pd.Timestamp.utcfromtimestamp(model_dict[attribute])) + for attribute in PD_TIMEDELTA: + setattr(model, attribute, pd.Timedelta(seconds=model_dict[attribute])) + for attribute in PD_DATAFRAME: + if model_dict[attribute] is None: + setattr(model, attribute, None) + else: + df = pd.read_json(model_dict[attribute], typ='frame', orient='table', convert_dates=['ds']) + if attribute == 'train_component_cols': + # Special handling because of named index column + df.columns.name = 'component' + df.index.name = 'col' + setattr(model, attribute, df) + for attribute in NP_ARRAY: + setattr(model, attribute, np.array(model_dict[attribute])) + for attribute in ORDEREDDICT: + key_list, unordered_dict = model_dict[attribute] + od = OrderedDict() + for key in key_list: + od[key] = unordered_dict[key] + setattr(model, attribute, od) + # Other attributes with special handling + # fit_kwargs + model.fit_kwargs = model_dict['fit_kwargs'] + # Params (Dict[str, np.ndarray]) + model.params = {k: np.array(v) for k, v in model_dict['params'].items()} + # Skipped attributes + model.stan_backend = None + model.stan_fit = None + return model + + def model_from_json(model_json): """Deserialize a Prophet model from json string. @@ -117,49 +197,5 @@ def model_from_json(model_json): ------- Prophet model. """ - attr_dict = json.loads(model_json) - model = Prophet() # We will overwrite all attributes set in init anyway - # Simple types - for attribute in SIMPLE_ATTRIBUTES: - setattr(model, attribute, attr_dict[attribute]) - for attribute in PD_SERIES: - if attr_dict[attribute] is None: - setattr(model, attribute, None) - else: - s = pd.read_json(attr_dict[attribute], typ='series', orient='split') - if s.name == 'ds': - if len(s) == 0: - s = pd.to_datetime(s) - s = s.dt.tz_localize(None) - setattr(model, attribute, s) - for attribute in PD_TIMESTAMP: - setattr(model, attribute, pd.Timestamp.utcfromtimestamp(attr_dict[attribute])) - for attribute in PD_TIMEDELTA: - setattr(model, attribute, pd.Timedelta(seconds=attr_dict[attribute])) - for attribute in PD_DATAFRAME: - if attr_dict[attribute] is None: - setattr(model, attribute, None) - else: - df = pd.read_json(attr_dict[attribute], typ='frame', orient='table', convert_dates=['ds']) - if attribute == 'train_component_cols': - # Special handling because of named index column - df.columns.name = 'component' - df.index.name = 'col' - setattr(model, attribute, df) - for attribute in NP_ARRAY: - setattr(model, attribute, np.array(attr_dict[attribute])) - for attribute in ORDEREDDICT: - key_list, unordered_dict = attr_dict[attribute] - od = OrderedDict() - for key in key_list: - od[key] = unordered_dict[key] - setattr(model, attribute, od) - # Other attributes with special handling - # fit_kwargs - model.fit_kwargs = attr_dict['fit_kwargs'] - # Params (Dict[str, np.ndarray]) - model.params = {k: np.array(v) for k, v in attr_dict['params'].items()} - # Skipped attributes - model.stan_backend = None - model.stan_fit = None - return model + model_dict = json.loads(model_json) + return model_from_dict(model_dict)