# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import inspect
import unicodedata
import warnings

import pandas as pd
import numpy as np

import holidays as hdays_part1
import fbprophet.hdays as hdays_part2


def generate_holidays_file():
    """Generate csv file of all possible holiday names, ds,
     and countries, year combination
    """
    years = np.arange(1995, 2045, 1)
    all_holidays = []
    # class names in holiday packages which are not countries
    class_to_exclude = set(['rd', 'datetime', 'date', 'HolidayBase', 'Calendar',
                            'LunarDate', 'timedelta', 'date'])

    class_list2 = inspect.getmembers(hdays_part2, inspect.isclass)
    country_set2 = set(list(zip(*class_list2))[0])
    country_set2 -= class_to_exclude
    for country in country_set2:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            temp = getattr(hdays_part2, country)(years=years)
        temp_df = pd.DataFrame(list(temp.items()),
                                columns=['ds', 'holiday'])
        temp_df['country'] = country
        all_holidays.append(temp_df)

    class_list1 = inspect.getmembers(hdays_part1, inspect.isclass)
    country_set1 = set(list(zip(*class_list1))[0])
    country_set1 -= class_to_exclude
    # Avoid overwrting holidays get from hdays_part2
    country_set1 -= country_set2
    for country in country_set1:
        temp = getattr(hdays_part1, country)(years=years)
        temp_df = pd.DataFrame(list(temp.items()),
                                columns=['ds', 'holiday'])
        temp_df['country'] = country
        all_holidays.append(temp_df)

    generated_holidays = pd.concat(all_holidays, axis=0, ignore_index=True)
    generated_holidays['year'] = generated_holidays.ds.apply(lambda x: x.year)
    generated_holidays.sort_values(['country', 'ds', 'holiday'], inplace=True)

    # The holidays often have utf-8 characters.
    # These are not allowed in R package data (they generate a NOTE).
    # TODO: revisit whether we want to do this lossy conversion.
    def utf8_to_ascii(text):
        return (
            unicodedata.normalize('NFD', text)
            .encode('ascii', 'ignore')
            .decode('ascii')
        )

    generated_holidays['holiday'] = generated_holidays['holiday'].apply(utf8_to_ascii)
    generated_holidays.to_csv("../R/data-raw/generated_holidays.csv", index=False)


if __name__ == "__main__":
    # execute only if run as a script
    generate_holidays_file()