Add model_to_dict and model_from_dict functions (#1871)

This commit adds the model_to_dict and model_from_dict functions, using
all of the logic that previously lived in model_to_json and
model_from_json, and converting those functions to simply reuse the new
ones.

This is useful because sometimes the user may want to serialize the dict
in some other way (e.g. another JSON serialization library such as ujson
or orjson, or something entirely different).
This commit is contained in:
Ben Sully 2021-04-21 02:10:06 +01:00 committed by GitHub
parent 9f0ce77be5
commit 0616bfb5da
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -38,6 +38,73 @@ NP_ARRAY = ['changepoints_t']
ORDEREDDICT = ['seasonalities', 'extra_regressors']
def model_to_dict(model):
"""Convert a Prophet model to a dictionary suitable for JSON serialization.
Model must be fitted. Skips Stan objects that are not needed for predict.
Can be reversed with model_from_dict.
Parameters
----------
model: Prophet model object.
Returns
-------
dict that can be used to serialize a Prophet model as JSON or loaded back
into a Prophet model.
"""
if model.history is None:
raise ValueError(
"This can only be used to serialize models that have already been fit."
)
model_dict = {
attribute: getattr(model, attribute) for attribute in SIMPLE_ATTRIBUTES
}
# Handle attributes of non-core types
for attribute in PD_SERIES:
if getattr(model, attribute) is None:
model_dict[attribute] = None
else:
model_dict[attribute] = getattr(model, attribute).to_json(
orient='split', date_format='iso'
)
for attribute in PD_TIMESTAMP:
model_dict[attribute] = getattr(model, attribute).timestamp()
for attribute in PD_TIMEDELTA:
model_dict[attribute] = getattr(model, attribute).total_seconds()
for attribute in PD_DATAFRAME:
if getattr(model, attribute) is None:
model_dict[attribute] = None
else:
model_dict[attribute] = getattr(model, attribute).to_json(orient='table', index=False)
for attribute in NP_ARRAY:
model_dict[attribute] = getattr(model, attribute).tolist()
for attribute in ORDEREDDICT:
model_dict[attribute] = [
list(getattr(model, attribute).keys()),
getattr(model, attribute),
]
# Other attributes with special handling
# fit_kwargs -> Transform any numpy types before serializing.
# They do not need to be transformed back on deserializing.
fit_kwargs = deepcopy(model.fit_kwargs)
if 'init' in fit_kwargs:
for k, v in fit_kwargs['init'].items():
if isinstance(v, np.ndarray):
fit_kwargs['init'][k] = v.tolist()
elif isinstance(v, np.floating):
fit_kwargs['init'][k] = float(v)
model_dict['fit_kwargs'] = fit_kwargs
# Params (Dict[str, np.ndarray])
model_dict['params'] = {k: v.tolist() for k, v in model.params.items()}
# Attributes that are skipped: stan_fit, stan_backend
model_dict['__prophet_version'] = __version__
return model_dict
def model_to_json(model):
"""Serialize a Prophet model to json string.
@ -53,57 +120,70 @@ def model_to_json(model):
-------
json string that can be deserialized into a Prophet model.
"""
if model.history is None:
raise ValueError(
"This can only be used to serialize models that have already been fit."
)
model_json = {
attribute: getattr(model, attribute) for attribute in SIMPLE_ATTRIBUTES
}
# Handle attributes of non-core types
for attribute in PD_SERIES:
if getattr(model, attribute) is None:
model_json[attribute] = None
else:
model_json[attribute] = getattr(model, attribute).to_json(
orient='split', date_format='iso'
)
for attribute in PD_TIMESTAMP:
model_json[attribute] = getattr(model, attribute).timestamp()
for attribute in PD_TIMEDELTA:
model_json[attribute] = getattr(model, attribute).total_seconds()
for attribute in PD_DATAFRAME:
if getattr(model, attribute) is None:
model_json[attribute] = None
else:
model_json[attribute] = getattr(model, attribute).to_json(orient='table', index=False)
for attribute in NP_ARRAY:
model_json[attribute] = getattr(model, attribute).tolist()
for attribute in ORDEREDDICT:
model_json[attribute] = [
list(getattr(model, attribute).keys()),
getattr(model, attribute),
]
# Other attributes with special handling
# fit_kwargs -> Transform any numpy types before serializing.
# They do not need to be transformed back on deserializing.
fit_kwargs = deepcopy(model.fit_kwargs)
if 'init' in fit_kwargs:
for k, v in fit_kwargs['init'].items():
if isinstance(v, np.ndarray):
fit_kwargs['init'][k] = v.tolist()
elif isinstance(v, np.floating):
fit_kwargs['init'][k] = float(v)
model_json['fit_kwargs'] = fit_kwargs
# Params (Dict[str, np.ndarray])
model_json['params'] = {k: v.tolist() for k, v in model.params.items()}
# Attributes that are skipped: stan_fit, stan_backend
model_json['__prophet_version'] = __version__
model_json = model_to_dict(model)
return json.dumps(model_json)
def model_from_dict(model_dict):
"""Recreate a Prophet model from a dictionary.
Recreates models that were converted with model_to_dict.
Parameters
----------
model_dict: Dictionary containing model, created with model_to_dict.
Returns
-------
Prophet model.
"""
model = Prophet() # We will overwrite all attributes set in init anyway
# Simple types
for attribute in SIMPLE_ATTRIBUTES:
setattr(model, attribute, model_dict[attribute])
for attribute in PD_SERIES:
if model_dict[attribute] is None:
setattr(model, attribute, None)
else:
s = pd.read_json(model_dict[attribute], typ='series', orient='split')
if s.name == 'ds':
if len(s) == 0:
s = pd.to_datetime(s)
s = s.dt.tz_localize(None)
setattr(model, attribute, s)
for attribute in PD_TIMESTAMP:
setattr(model, attribute, pd.Timestamp.utcfromtimestamp(model_dict[attribute]))
for attribute in PD_TIMEDELTA:
setattr(model, attribute, pd.Timedelta(seconds=model_dict[attribute]))
for attribute in PD_DATAFRAME:
if model_dict[attribute] is None:
setattr(model, attribute, None)
else:
df = pd.read_json(model_dict[attribute], typ='frame', orient='table', convert_dates=['ds'])
if attribute == 'train_component_cols':
# Special handling because of named index column
df.columns.name = 'component'
df.index.name = 'col'
setattr(model, attribute, df)
for attribute in NP_ARRAY:
setattr(model, attribute, np.array(model_dict[attribute]))
for attribute in ORDEREDDICT:
key_list, unordered_dict = model_dict[attribute]
od = OrderedDict()
for key in key_list:
od[key] = unordered_dict[key]
setattr(model, attribute, od)
# Other attributes with special handling
# fit_kwargs
model.fit_kwargs = model_dict['fit_kwargs']
# Params (Dict[str, np.ndarray])
model.params = {k: np.array(v) for k, v in model_dict['params'].items()}
# Skipped attributes
model.stan_backend = None
model.stan_fit = None
return model
def model_from_json(model_json):
"""Deserialize a Prophet model from json string.
@ -117,49 +197,5 @@ def model_from_json(model_json):
-------
Prophet model.
"""
attr_dict = json.loads(model_json)
model = Prophet() # We will overwrite all attributes set in init anyway
# Simple types
for attribute in SIMPLE_ATTRIBUTES:
setattr(model, attribute, attr_dict[attribute])
for attribute in PD_SERIES:
if attr_dict[attribute] is None:
setattr(model, attribute, None)
else:
s = pd.read_json(attr_dict[attribute], typ='series', orient='split')
if s.name == 'ds':
if len(s) == 0:
s = pd.to_datetime(s)
s = s.dt.tz_localize(None)
setattr(model, attribute, s)
for attribute in PD_TIMESTAMP:
setattr(model, attribute, pd.Timestamp.utcfromtimestamp(attr_dict[attribute]))
for attribute in PD_TIMEDELTA:
setattr(model, attribute, pd.Timedelta(seconds=attr_dict[attribute]))
for attribute in PD_DATAFRAME:
if attr_dict[attribute] is None:
setattr(model, attribute, None)
else:
df = pd.read_json(attr_dict[attribute], typ='frame', orient='table', convert_dates=['ds'])
if attribute == 'train_component_cols':
# Special handling because of named index column
df.columns.name = 'component'
df.index.name = 'col'
setattr(model, attribute, df)
for attribute in NP_ARRAY:
setattr(model, attribute, np.array(attr_dict[attribute]))
for attribute in ORDEREDDICT:
key_list, unordered_dict = attr_dict[attribute]
od = OrderedDict()
for key in key_list:
od[key] = unordered_dict[key]
setattr(model, attribute, od)
# Other attributes with special handling
# fit_kwargs
model.fit_kwargs = attr_dict['fit_kwargs']
# Params (Dict[str, np.ndarray])
model.params = {k: np.array(v) for k, v in attr_dict['params'].items()}
# Skipped attributes
model.stan_backend = None
model.stan_fit = None
return model
model_dict = json.loads(model_json)
return model_from_dict(model_dict)