zipline/tests/data/test_fx.py
Scott Sanderson 2546708d0e MAINT: Forward-fill fx rates past file end.
If an FX rate query requests a date that's greater than the last date in the fx
rate file, forward-fill from the last value in the file rather than raising an
error.

We do this for a few reasons:

1. We'd like to gracefully handle the possibility of an FX rates file that's
   older than another input file.

2. Relative to other non-erroring behaviors, forward-filling is the simplest
   thing to implement. Specifically, it's what the implementation prior to this
   change would do naturally if there weren't an explicit check to prevent it.

3. For an FX rates file containing prices on a 24/5 calendar, some amount of
   forward-filling is required to handle any market with a non-weekday date.
2020-02-04 18:23:53 -05:00

286 lines
11 KiB
Python

import itertools
import pandas as pd
import numpy as np
from zipline.data.fx import DEFAULT_FX_RATE
from zipline.testing.predicates import assert_equal
import zipline.testing.fixtures as zp_fixtures
class _FXReaderTestCase(zp_fixtures.WithFXRates,
zp_fixtures.ZiplineTestCase):
"""
Base class for testing FXRateReader implementations.
To test a new FXRateReader implementation, subclass from this base class
and implement the ``reader`` property, returning an FXRateReader that uses
the data stored in ``cls.fx_rates``.
"""
FX_RATES_START_DATE = pd.Timestamp('2014-01-01', tz='UTC')
FX_RATES_END_DATE = pd.Timestamp('2014-01-31', tz='UTC')
# Calendar to which exchange rates data is aligned.
FX_RATES_CALENDAR = '24/5'
# Currencies between which exchange rates can be calculated.
FX_RATES_CURRENCIES = ["USD", "CAD", "GBP", "EUR"]
# Fields for which exchange rate data is present.
FX_RATES_RATE_NAMES = ["london_mid", "tokyo_mid"]
# Field to be used on a lookup of `'default'`.
FX_RATES_DEFAULT_RATE = 'london_mid'
# Used by WithFXRates.
@classmethod
def make_fx_rates(cls, fields, currencies, sessions):
ndays = len(sessions)
# Give each currency a timeseries of "true" values, and compute fx
# rates as ratios between true values.
reference = pd.DataFrame({
'USD': np.linspace(1.0, 2.0, num=ndays),
'CAD': np.linspace(2.0, 3.0, num=ndays),
'GBP': np.linspace(3.0, 4.0, num=ndays),
'EUR': np.linspace(4.0, 5.0, num=ndays),
}, index=sessions, columns=currencies)
cls.tokyo_mid_rates = cls.make_fx_rates_from_reference(reference)
# Make london_mid different by adding +1 to reference values.
cls.london_mid_rates = cls.make_fx_rates_from_reference(reference + 1)
# This will be set as cls.fx_rates by WithFXRates.
return {
'london_mid': cls.london_mid_rates,
'tokyo_mid': cls.tokyo_mid_rates,
}
@property
def reader(self):
raise NotImplementedError("Must be implemented by test suite.")
def test_scalar_lookup(self):
reader = self.reader
rates = self.FX_RATES_RATE_NAMES
quotes = self.FX_RATES_CURRENCIES
bases = self.FX_RATES_CURRENCIES + [None]
dates = pd.date_range(
self.FX_RATES_START_DATE - pd.Timedelta('1 day'),
self.FX_RATES_END_DATE + pd.Timedelta('1 day'),
)
cases = itertools.product(rates, quotes, bases, dates)
for rate, quote, base, dt in cases:
dts = pd.DatetimeIndex([dt], tz='UTC')
bases = np.array([base], dtype=object)
result = reader.get_rates(rate, quote, bases, dts)
assert_equal(result.shape, (1, 1))
result_scalar = result[0, 0]
if dt >= self.FX_RATES_START_DATE and quote == base:
assert_equal(result_scalar, 1.0)
expected = self.get_expected_fx_rate_scalar(rate, quote, base, dt)
assert_equal(result_scalar, expected)
col_result = reader.get_rates_columnar(rate, quote, bases, dts)
assert_equal(col_result, result.ravel())
alt_result_scalar = reader.get_rate_scalar(rate, quote, base, dt)
assert_equal(result_scalar, alt_result_scalar)
def test_2d_lookup(self):
rand = np.random.RandomState(42)
dates = pd.date_range(
self.FX_RATES_START_DATE - pd.Timedelta('2 days'),
self.FX_RATES_END_DATE + pd.Timedelta('2 days'),
)
rates = self.FX_RATES_RATE_NAMES + [DEFAULT_FX_RATE]
possible_quotes = self.FX_RATES_CURRENCIES
possible_bases = self.FX_RATES_CURRENCIES + [None]
# For every combination of rate name and quote currency...
for rate, quote in itertools.product(rates, possible_quotes):
# Choose N random distinct days...
for ndays in 1, 2, 7, 20:
dts_raw = rand.choice(dates, ndays, replace=False)
dts = pd.DatetimeIndex(dts_raw, tz='utc').sort_values()
# Choose M random possibly-non-distinct currencies...
for nbases in 1, 2, 10, 200:
bases = (
rand.choice(possible_bases, nbases, replace=True)
.astype(object)
)
# ...And check that we get the expected result when querying
# for those dates/currencies.
result = self.reader.get_rates(rate, quote, bases, dts)
expected = self.get_expected_fx_rates(rate, quote, bases, dts)
assert_equal(result, expected)
def test_columnar_lookup(self):
rand = np.random.RandomState(42)
dates = pd.date_range(
self.FX_RATES_START_DATE - pd.Timedelta('2 days'),
self.FX_RATES_END_DATE + pd.Timedelta('2 days'),
)
rates = self.FX_RATES_RATE_NAMES + [DEFAULT_FX_RATE]
possible_quotes = self.FX_RATES_CURRENCIES
possible_bases = self.FX_RATES_CURRENCIES + [None]
reader = self.reader
# For every combination of rate name and quote currency...
for rate, quote in itertools.product(rates, possible_quotes):
for N in 1, 2, 10, 200:
# Choose N (date, base) pairs randomly with replacement.
dts_raw = rand.choice(dates, N, replace=True)
dts = pd.DatetimeIndex(dts_raw, tz='utc')
bases = (
rand.choice(possible_bases, N, replace=True)
.astype(object)
)
# ... And check that we get the expected result when querying
# for those dates/currencies.
result = reader.get_rates_columnar(rate, quote, bases, dts)
expected = self.get_expected_fx_rates_columnar(
rate,
quote,
bases,
dts,
)
assert_equal(result, expected)
def test_load_everything(self):
# Sanity check for the randomized tests above: check that we get
# exactly the rates we set up in make_fx_rates if we query for their
# indices.
for currency in self.FX_RATES_CURRENCIES:
tokyo_rates = self.tokyo_mid_rates[currency]
tokyo_result = self.reader.get_rates(
'tokyo_mid',
currency,
tokyo_rates.columns,
tokyo_rates.index,
)
assert_equal(tokyo_result, tokyo_rates.values)
london_rates = self.london_mid_rates[currency]
london_result = self.reader.get_rates(
'london_mid',
currency,
london_rates.columns,
london_rates.index,
)
default_result = self.reader.get_rates(
DEFAULT_FX_RATE,
currency,
london_rates.columns,
london_rates.index,
)
assert_equal(london_result, default_result)
assert_equal(london_result, london_rates.values)
def test_read_before_start_date(self):
# Reads from before the start of our data should emit NaN. We do this
# because, for some Pipeline loaders, it's hard to put a lower bound on
# input asof dates, so we end up making queries for asof_dates that
# might be before the start of FX data. When that happens, we want to
# emit NaN, but we don't want to fail.
for bad_date in (self.FX_RATES_START_DATE - pd.Timedelta('1 day'),
self.FX_RATES_START_DATE - pd.Timedelta('1000 days')):
for rate in self.FX_RATES_RATE_NAMES:
quote = 'USD'
bases = np.array(['CAD'], dtype=object)
dts = pd.DatetimeIndex([bad_date])
result = self.reader.get_rates(rate, quote, bases, dts)
assert_equal(result.shape, (1, 1))
assert_equal(np.nan, result[0, 0])
def test_read_after_end_date(self):
# Reads from **after** the end of our data, on the other hand, should
# fail. We can always upper bound the relevant asofs that we're
# interested in, and having fx rates forward-fill past the end of data
# is confusing and takes a while to debug.
for bad_date in (self.FX_RATES_END_DATE + pd.Timedelta('1 day'),
self.FX_RATES_END_DATE + pd.Timedelta('1000 days')):
for rate in self.FX_RATES_RATE_NAMES:
quote = 'USD'
bases = np.array(['CAD'], dtype=object)
dts = pd.DatetimeIndex([bad_date])
result = self.reader.get_rates(rate, quote, bases, dts)
assert_equal(result.shape, (1, 1))
expected = self.get_expected_fx_rate_scalar(
rate,
quote,
'CAD',
self.FX_RATES_END_DATE,
)
assert_equal(expected, result[0, 0])
def test_read_unknown_base(self):
for rate in self.FX_RATES_RATE_NAMES:
quote = 'USD'
for unknown_base in 'XXX', None:
bases = np.array([unknown_base], dtype=object)
dts = pd.DatetimeIndex([self.FX_RATES_START_DATE])
result = self.reader.get_rates(rate, quote, bases, dts)[0, 0]
assert_equal(result, np.nan)
class InMemoryFXReaderTestCase(_FXReaderTestCase):
@property
def reader(self):
return self.in_memory_fx_rate_reader
class HDF5FXReaderTestCase(zp_fixtures.WithTmpDir,
_FXReaderTestCase):
@classmethod
def init_class_fixtures(cls):
super(HDF5FXReaderTestCase, cls).init_class_fixtures()
path = cls.tmpdir.getpath('fx_rates.h5')
cls.h5_fx_reader = cls.write_h5_fx_rates(path)
@property
def reader(self):
return self.h5_fx_reader
class FastGetLocTestCase(zp_fixtures.ZiplineTestCase):
def test_fast_get_loc_ffilled(self):
dts = pd.to_datetime([
'2014-01-02',
'2014-01-03',
# Skip 2014-01-04
'2014-01-05',
'2014-01-06',
])
for dt in pd.date_range('2014-01-02', '2014-01-08'):
result = zp_fixtures.fast_get_loc_ffilled(dts.values, dt.asm8)
expected = dts.get_loc(dt, method='ffill')
assert_equal(result, expected)
with self.assertRaises(KeyError):
dts.get_loc(pd.Timestamp('2014-01-01'), method='ffill')
with self.assertRaises(KeyError):
zp_fixtures.fast_get_loc_ffilled(dts, pd.Timestamp('2014-01-01'))