zipline/tests/pipeline/test_frameload.py
Scott Sanderson e491ebef53 ENH: Initial support for international pipelines.
This commit adds initial support for international markets in the Pipeline API
via addition of the concept of "domains".

Significant changes on this branch include:

- Added `zipline.pipeline.domain` and the `Domain` interface.
  - Added several canonical domain definitions:
    - GENERIC
    - US_EQUITIES
    - CA_EQUITIES
    - GB_EQUITIES

- Added `zipline.country` as a place to define canonical country code names.

- Made `PipelineLoader` an interface and added new `domain` parameter to
  signature of `load_adjusted_array`.

- Added a new system for "generic" and "specialized" DataSet and BoundColumn
  objects.

- Added a new system for inferring a domain from a pipeline that contains a mix
  of generic and non-generic terms.

- Reworked the built-in pricing dataset. USEquityPricing dataset is now a
  specialized version of a generic EquityPricing dataset, and most built-in
  factors are now implemented in terms of EquityPricing rather than
  USEquityPricing.

- Removed `zipline.data.us_equity_pricing`.
  - Moved BcolzDailyBar{Reader,Writer} to `zipline.data.bcolz_daily_bars`.
  - Moved SQLiteAdjustment{Reader,Writer} to `zipline.data.adjustments`.

New dependencies added as part of this work:

- iso3166 (for canonical country code definitions)
- python-interface (for strict interface definitions).

See https://github.com/quantopian/zipline/issues/2265 for a full description of
the design for domains.
2018-09-10 10:05:39 -05:00

249 lines
7.7 KiB
Python

"""
Tests for zipline.pipeline.loaders.frame.DataFrameLoader.
"""
from unittest import TestCase
from mock import patch
from numpy import arange, ones
from numpy.testing import assert_array_equal
from pandas import (
DataFrame,
DatetimeIndex,
Int64Index,
)
from trading_calendars import get_calendar
from zipline.lib.adjustment import (
ADD,
Float64Add,
Float64Multiply,
Float64Overwrite,
MULTIPLY,
OVERWRITE,
)
from zipline.pipeline.data import USEquityPricing
from zipline.pipeline.domain import US_EQUITIES
from zipline.pipeline.loaders.frame import DataFrameLoader
class DataFrameLoaderTestCase(TestCase):
def setUp(self):
self.trading_day = get_calendar("NYSE").day
self.nsids = 5
self.ndates = 20
self.sids = Int64Index(range(self.nsids))
self.dates = DatetimeIndex(
start='2014-01-02',
freq=self.trading_day,
periods=self.ndates,
)
self.mask = ones((len(self.dates), len(self.sids)), dtype=bool)
def tearDown(self):
pass
def test_bad_input(self):
data = arange(100).reshape(self.ndates, self.nsids)
baseline = DataFrame(data, index=self.dates, columns=self.sids)
loader = DataFrameLoader(
USEquityPricing.close,
baseline,
)
with self.assertRaises(ValueError):
# Wrong column.
loader.load_adjusted_array(
US_EQUITIES,
[USEquityPricing.open],
self.dates,
self.sids,
self.mask,
)
with self.assertRaises(ValueError):
# Too many columns.
loader.load_adjusted_array(
US_EQUITIES,
[USEquityPricing.open, USEquityPricing.close],
self.dates,
self.sids,
self.mask,
)
def test_baseline(self):
data = arange(100).reshape(self.ndates, self.nsids)
baseline = DataFrame(data, index=self.dates, columns=self.sids)
loader = DataFrameLoader(USEquityPricing.close, baseline)
dates_slice = slice(None, 10, None)
sids_slice = slice(1, 3, None)
[adj_array] = loader.load_adjusted_array(
US_EQUITIES,
[USEquityPricing.close],
self.dates[dates_slice],
self.sids[sids_slice],
self.mask[dates_slice, sids_slice],
).values()
for idx, window in enumerate(adj_array.traverse(window_length=3)):
expected = baseline.values[dates_slice, sids_slice][idx:idx + 3]
assert_array_equal(window, expected)
def test_adjustments(self):
data = arange(100).reshape(self.ndates, self.nsids)
baseline = DataFrame(data, index=self.dates, columns=self.sids)
# Use the dates from index 10 on and sids 1-3.
dates_slice = slice(10, None, None)
sids_slice = slice(1, 4, None)
# Adjustments that should actually affect the output.
relevant_adjustments = [
{
'sid': 1,
'start_date': None,
'end_date': self.dates[15],
'apply_date': self.dates[16],
'value': 0.5,
'kind': MULTIPLY,
},
{
'sid': 2,
'start_date': self.dates[5],
'end_date': self.dates[15],
'apply_date': self.dates[16],
'value': 1.0,
'kind': ADD,
},
{
'sid': 2,
'start_date': self.dates[15],
'end_date': self.dates[16],
'apply_date': self.dates[17],
'value': 1.0,
'kind': ADD,
},
{
'sid': 3,
'start_date': self.dates[16],
'end_date': self.dates[17],
'apply_date': self.dates[18],
'value': 99.0,
'kind': OVERWRITE,
},
]
# These adjustments shouldn't affect the output.
irrelevant_adjustments = [
{ # Sid Not Requested
'sid': 0,
'start_date': self.dates[16],
'end_date': self.dates[17],
'apply_date': self.dates[18],
'value': -9999.0,
'kind': OVERWRITE,
},
{ # Sid Unknown
'sid': 9999,
'start_date': self.dates[16],
'end_date': self.dates[17],
'apply_date': self.dates[18],
'value': -9999.0,
'kind': OVERWRITE,
},
{ # Date Not Requested
'sid': 2,
'start_date': self.dates[1],
'end_date': self.dates[2],
'apply_date': self.dates[3],
'value': -9999.0,
'kind': OVERWRITE,
},
{ # Date Before Known Data
'sid': 2,
'start_date': self.dates[0] - (2 * self.trading_day),
'end_date': self.dates[0] - self.trading_day,
'apply_date': self.dates[0] - self.trading_day,
'value': -9999.0,
'kind': OVERWRITE,
},
{ # Date After Known Data
'sid': 2,
'start_date': self.dates[-1] + self.trading_day,
'end_date': self.dates[-1] + (2 * self.trading_day),
'apply_date': self.dates[-1] + (3 * self.trading_day),
'value': -9999.0,
'kind': OVERWRITE,
},
]
adjustments = DataFrame(relevant_adjustments + irrelevant_adjustments)
loader = DataFrameLoader(
USEquityPricing.close,
baseline,
adjustments=adjustments,
)
expected_baseline = baseline.iloc[dates_slice, sids_slice]
formatted_adjustments = loader.format_adjustments(
self.dates[dates_slice],
self.sids[sids_slice],
)
expected_formatted_adjustments = {
6: [
Float64Multiply(
first_row=0,
last_row=5,
first_col=0,
last_col=0,
value=0.5,
),
Float64Add(
first_row=0,
last_row=5,
first_col=1,
last_col=1,
value=1.0,
),
],
7: [
Float64Add(
first_row=5,
last_row=6,
first_col=1,
last_col=1,
value=1.0,
),
],
8: [
Float64Overwrite(
first_row=6,
last_row=7,
first_col=2,
last_col=2,
value=99.0,
)
],
}
self.assertEqual(formatted_adjustments, expected_formatted_adjustments)
mask = self.mask[dates_slice, sids_slice]
with patch('zipline.pipeline.loaders.frame.AdjustedArray') as m:
loader.load_adjusted_array(
US_EQUITIES,
columns=[USEquityPricing.close],
dates=self.dates[dates_slice],
sids=self.sids[sids_slice],
mask=mask,
)
self.assertEqual(m.call_count, 1)
args, kwargs = m.call_args
assert_array_equal(kwargs['data'], expected_baseline.values)
self.assertEqual(kwargs['adjustments'], expected_formatted_adjustments)