zipline/tests/data/test_adjustments.py

import logbook
import numpy as np
import pandas as pd

from zipline.data.adjustments import (
    SQLiteAdjustmentReader,
    SQLiteAdjustmentWriter,
)
from zipline.data.in_memory_daily_bars import InMemoryDailyBarReader
from zipline.testing import parameter_space
from zipline.testing.predicates import assert_equal
from zipline.testing.fixtures import (
    WithInstanceTmpDir,
    WithTradingCalendars,
    WithLogger,
    ZiplineTestCase,
)


nat = pd.Timestamp('nat')


class TestSQLiteAdjustmentsWriter(WithTradingCalendars,
                                  WithInstanceTmpDir,
                                  WithLogger,
                                  ZiplineTestCase):
    make_log_handler = logbook.TestHandler

    def init_instance_fixtures(self):
        super(TestSQLiteAdjustmentsWriter, self).init_instance_fixtures()
        self.db_path = self.instance_tmpdir.getpath('adjustments.db')

    def writer(self, session_bar_reader):
        return self.enter_instance_context(
            SQLiteAdjustmentWriter(
                self.db_path,
                session_bar_reader,
                overwrite=True,
            ),
        )

    def component_dataframes(self, convert_dates=True):
        with SQLiteAdjustmentReader(self.db_path) as r:
            return r.unpack_db_to_component_dfs(convert_dates=convert_dates)

    def empty_in_memory_reader(self, dates, sids):
        nan_frame = pd.DataFrame(
            np.nan,
            index=dates,
            columns=sids,
        )
        frames = {
            key: nan_frame
            for key in ('open', 'high', 'low', 'close', 'volume')
        }

        return InMemoryDailyBarReader(
            frames,
            self.trading_calendar,
            currency_codes=pd.Series(index=sids, data='USD'),
        )

    def writer_without_pricing(self, dates, sids):
        return self.writer(self.empty_in_memory_reader(dates, sids))

    def in_memory_reader_for_close(self, close):
        nan_frame = pd.DataFrame(
            np.nan,
            index=close.index,
            columns=close.columns,
        )
        frames = {'close': close}
        for key in 'open', 'high', 'low', 'volume':
            frames[key] = nan_frame
        return InMemoryDailyBarReader(
            frames,
            self.trading_calendar,
            currency_codes=pd.Series(index=close.columns, data='USD'),
        )

    def writer_from_close(self, close):
        return self.writer(self.in_memory_reader_for_close(close))

    def assert_all_empty(self, dfs):
        for k, v in dfs.items():
            assert_equal(len(v), 0, msg='%s dataframe should be empty' % k)

    def test_calculate_dividend_ratio(self):
        first_date_ix = 200
        dates = self.trading_calendar.all_sessions[
            first_date_ix:first_date_ix + 3
        ]

        before_pricing_data = \
            (dates[0] - self.trading_calendar.day).tz_convert(None)
        one_day_past_pricing_data = \
            (dates[-1] + self.trading_calendar.day).tz_convert(None)
        ten_days_past_pricing_data = \
            (dates[-1] + self.trading_calendar.day * 10).tz_convert(None)

        def T(n):
            return dates[n].tz_convert(None)

        close = pd.DataFrame(
            [[10.0, 0.5,   30.0],   # noqa
             [ 9.5, 0.4, np.nan],   # noqa
             [15.0, 0.6,   np.nan]],  # noqa
            columns=[0, 1, 2],
            index=dates,
        )

        dividends = pd.DataFrame(
            [
                # ex_date of >=0 means that we cannot get the previous day's
                # close, so we should not expect to see this dividend in the
                # output
                [0, before_pricing_data, 10],
                [0, T(0), 10],

                # previous price was 0.4, meaning the dividend amount
                # is greater than or equal to price and the ratio would be
                # negative. we should warn and drop this row
                [1, T(1), 0.51],

                # previous price was 0.4, meaning the dividend amount
                # is exactly equal to price and the ratio would be 0.
                # we should warn and drop this row
                [1, T(2), 0.4],

                # previous price is nan, so we cannot compute the ratio.
                # we should warn and drop this row
                [2, T(2), 10],

                # previous price was 10, expected ratio is 0.95
                [0, T(1), 0.5],

                # previous price was 0.4, expected ratio is 0.9
                [1, T(2), 0.04],

                # we shouldn't crash in the process of warning/dropping this
                # row even though it is past the range of `dates`
                [2, one_day_past_pricing_data, 0.1],
                [2, ten_days_past_pricing_data, 0.1],

            ],
            columns=['sid', 'ex_date', 'amount'],
        )

        # give every extra date field a unique date so that we can make sure
        # they appear unchanged in the dividends payouts
        ix = first_date_ix
        for col in 'declared_date', 'record_date', 'pay_date':
            extra_dates = self.trading_calendar.all_sessions[
                ix:ix + len(dividends)
            ]
            ix += len(dividends)
            dividends[col] = extra_dates

        self.writer_from_close(close).write(dividends=dividends)
        dfs = self.component_dataframes()
        dividend_payouts = dfs.pop('dividend_payouts')
        dividend_ratios = dfs.pop('dividends')
        self.assert_all_empty(dfs)

        payout_sort_key = ['sid', 'ex_date', 'amount']
        dividend_payouts = dividend_payouts.sort_values(payout_sort_key)
        dividend_payouts = dividend_payouts.reset_index(drop=True)

        expected_dividend_payouts = dividend_payouts.sort_values(
            payout_sort_key,
        )
        expected_dividend_payouts = expected_dividend_payouts.reset_index(
            drop=True,
        )
        assert_equal(dividend_payouts, expected_dividend_payouts)

        expected_dividend_ratios = pd.DataFrame(
            [[T(1), 0.95, 0],
             [T(2), 0.90, 1]],
            columns=['effective_date', 'ratio', 'sid'],
        )
        dividend_ratios = dividend_ratios.sort_values(
            ['effective_date', 'sid'],
        )
        dividend_ratios = dividend_ratios.reset_index(drop=True)
        assert_equal(dividend_ratios, expected_dividend_ratios)

        self.assertTrue(self.log_handler.has_warning(
            "Couldn't compute ratio for dividend sid=2, ex_date=1990-10-18,"
            " amount=10.000",
        ))
        self.assertTrue(self.log_handler.has_warning(
            "Couldn't compute ratio for dividend sid=2, ex_date=1990-10-19,"
            " amount=0.100",
        ))
        self.assertTrue(self.log_handler.has_warning(
            "Couldn't compute ratio for dividend sid=2, ex_date=1990-11-01,"
            " amount=0.100",
        ))
        self.assertTrue(self.log_handler.has_warning(
            'Dividend ratio <= 0 for dividend sid=1, ex_date=1990-10-17,'
            ' amount=0.510',
        ))
        self.assertTrue(self.log_handler.has_warning(
            'Dividend ratio <= 0 for dividend sid=1, ex_date=1990-10-18,'
            ' amount=0.400',
        ))

    def _test_identity(self, name):
        sids = np.arange(5)
        dates = self.trading_calendar.all_sessions.tz_convert(None)

        def T(n):
            return dates[n]

        sort_key = ['effective_date', 'sid', 'ratio']
        input_ = pd.DataFrame(
            [[T(0), 0.1, 1],
             [T(1), 2.0, 1],
             [T(0), 0.1, 2],
             [T(4), 2.0, 2],
             [T(8), 2.4, 2]],
            columns=['effective_date', 'ratio', 'sid'],
        ).sort_values(sort_key)

        self.writer_without_pricing(dates, sids).write(**{name: input_})
        dfs = self.component_dataframes()

        output = dfs.pop(name).sort_values(sort_key)
        self.assert_all_empty(dfs)

        assert_equal(input_, output)

    def test_splits(self):
        self._test_identity('splits')

    def test_mergers(self):
        self._test_identity('mergers')

    def test_stock_dividends(self):
        sids = np.arange(5)
        dates = self.trading_calendar.all_sessions.tz_convert(None)

        def T(n):
            return dates[n]

        sort_key = ['sid', 'ex_date', 'payment_sid', 'ratio']
        input_ = pd.DataFrame(
            [[0, T(0), 1.5, 1],
             [0, T(1), 0.5, 2],

             # the same asset has two stock dividends for different assets on
             # the same day
             [1, T(0), 1, 2],
             [1, T(0), 1.2, 3]],
            columns=['sid', 'ex_date', 'ratio', 'payment_sid'],
        ).sort_values(sort_key)

        # give every extra date field a unique date so that we can make sure
        # they appear unchanged in the dividends payouts
        ix = 0
        for col in 'declared_date', 'record_date', 'pay_date':
            extra_dates = dates[ix:ix + len(input_)]
            ix += len(input_)
            input_[col] = extra_dates

        self.writer_without_pricing(dates, sids).write(stock_dividends=input_)
        dfs = self.component_dataframes()

        output = dfs.pop('stock_dividend_payouts').sort_values(sort_key)
        self.assert_all_empty(dfs)

        assert_equal(output, input_)

    @parameter_space(convert_dates=[True, False])
    def test_empty_frame_dtypes(self, convert_dates):
        """Test that dataframe dtypes are preserved for empty tables.
        """
        sids = np.arange(5)
        dates = self.trading_calendar.all_sessions.tz_convert(None)

        if convert_dates:
            date_dtype = np.dtype('M8[ns]')
        else:
            date_dtype = np.dtype('int64')

        # Write all empty frames.
        self.writer_without_pricing(dates, sids).write()

        dfs = self.component_dataframes(convert_dates)

        for df in dfs.values():
            assert_equal(len(df), 0)

        for key in 'splits', 'mergers', 'dividends':
            result = dfs[key].dtypes
            expected = pd.Series({
                'effective_date': date_dtype,
                'ratio': np.dtype('float64'),
                'sid': np.dtype('int64'),
            }).sort_index()
            assert_equal(result, expected)

        result = dfs['dividend_payouts'].dtypes
        expected = pd.Series({
            'sid': np.dtype('int64'),
            'ex_date': date_dtype,
            'declared_date': date_dtype,
            'record_date': date_dtype,
            'pay_date': date_dtype,
            'amount': np.dtype('float64'),
        }).sort_index()

        assert_equal(result, expected)

        result = dfs['stock_dividend_payouts'].dtypes
        expected = pd.Series({
            'sid': np.dtype('int64'),
            'ex_date': date_dtype,
            'declared_date': date_dtype,
            'record_date': date_dtype,
            'pay_date': date_dtype,
            'payment_sid': np.dtype('int64'),
            'ratio': np.dtype('float64'),
        }).sort_index()

        assert_equal(result, expected)