mirror of
https://github.com/saymrwulf/zipline.git
synced 2026-05-16 21:10:11 +00:00
This commit adds initial support for international markets in the Pipeline API
via addition of the concept of "domains".
Significant changes on this branch include:
- Added `zipline.pipeline.domain` and the `Domain` interface.
- Added several canonical domain definitions:
- GENERIC
- US_EQUITIES
- CA_EQUITIES
- GB_EQUITIES
- Added `zipline.country` as a place to define canonical country code names.
- Made `PipelineLoader` an interface and added new `domain` parameter to
signature of `load_adjusted_array`.
- Added a new system for "generic" and "specialized" DataSet and BoundColumn
objects.
- Added a new system for inferring a domain from a pipeline that contains a mix
of generic and non-generic terms.
- Reworked the built-in pricing dataset. USEquityPricing dataset is now a
specialized version of a generic EquityPricing dataset, and most built-in
factors are now implemented in terms of EquityPricing rather than
USEquityPricing.
- Removed `zipline.data.us_equity_pricing`.
- Moved BcolzDailyBar{Reader,Writer} to `zipline.data.bcolz_daily_bars`.
- Moved SQLiteAdjustment{Reader,Writer} to `zipline.data.adjustments`.
New dependencies added as part of this work:
- iso3166 (for canonical country code definitions)
- python-interface (for strict interface definitions).
See https://github.com/quantopian/zipline/issues/2265 for a full description of
the design for domains.
249 lines
7.7 KiB
Python
249 lines
7.7 KiB
Python
"""
|
|
Tests for zipline.pipeline.loaders.frame.DataFrameLoader.
|
|
"""
|
|
from unittest import TestCase
|
|
|
|
from mock import patch
|
|
from numpy import arange, ones
|
|
from numpy.testing import assert_array_equal
|
|
from pandas import (
|
|
DataFrame,
|
|
DatetimeIndex,
|
|
Int64Index,
|
|
)
|
|
from trading_calendars import get_calendar
|
|
|
|
from zipline.lib.adjustment import (
|
|
ADD,
|
|
Float64Add,
|
|
Float64Multiply,
|
|
Float64Overwrite,
|
|
MULTIPLY,
|
|
OVERWRITE,
|
|
)
|
|
from zipline.pipeline.data import USEquityPricing
|
|
from zipline.pipeline.domain import US_EQUITIES
|
|
from zipline.pipeline.loaders.frame import DataFrameLoader
|
|
|
|
|
|
class DataFrameLoaderTestCase(TestCase):
|
|
|
|
def setUp(self):
|
|
self.trading_day = get_calendar("NYSE").day
|
|
|
|
self.nsids = 5
|
|
self.ndates = 20
|
|
|
|
self.sids = Int64Index(range(self.nsids))
|
|
self.dates = DatetimeIndex(
|
|
start='2014-01-02',
|
|
freq=self.trading_day,
|
|
periods=self.ndates,
|
|
)
|
|
|
|
self.mask = ones((len(self.dates), len(self.sids)), dtype=bool)
|
|
|
|
def tearDown(self):
|
|
pass
|
|
|
|
def test_bad_input(self):
|
|
data = arange(100).reshape(self.ndates, self.nsids)
|
|
baseline = DataFrame(data, index=self.dates, columns=self.sids)
|
|
loader = DataFrameLoader(
|
|
USEquityPricing.close,
|
|
baseline,
|
|
)
|
|
|
|
with self.assertRaises(ValueError):
|
|
# Wrong column.
|
|
loader.load_adjusted_array(
|
|
US_EQUITIES,
|
|
[USEquityPricing.open],
|
|
self.dates,
|
|
self.sids,
|
|
self.mask,
|
|
)
|
|
|
|
with self.assertRaises(ValueError):
|
|
# Too many columns.
|
|
loader.load_adjusted_array(
|
|
US_EQUITIES,
|
|
[USEquityPricing.open, USEquityPricing.close],
|
|
self.dates,
|
|
self.sids,
|
|
self.mask,
|
|
)
|
|
|
|
def test_baseline(self):
|
|
data = arange(100).reshape(self.ndates, self.nsids)
|
|
baseline = DataFrame(data, index=self.dates, columns=self.sids)
|
|
loader = DataFrameLoader(USEquityPricing.close, baseline)
|
|
|
|
dates_slice = slice(None, 10, None)
|
|
sids_slice = slice(1, 3, None)
|
|
[adj_array] = loader.load_adjusted_array(
|
|
US_EQUITIES,
|
|
[USEquityPricing.close],
|
|
self.dates[dates_slice],
|
|
self.sids[sids_slice],
|
|
self.mask[dates_slice, sids_slice],
|
|
).values()
|
|
|
|
for idx, window in enumerate(adj_array.traverse(window_length=3)):
|
|
expected = baseline.values[dates_slice, sids_slice][idx:idx + 3]
|
|
assert_array_equal(window, expected)
|
|
|
|
def test_adjustments(self):
|
|
data = arange(100).reshape(self.ndates, self.nsids)
|
|
baseline = DataFrame(data, index=self.dates, columns=self.sids)
|
|
|
|
# Use the dates from index 10 on and sids 1-3.
|
|
dates_slice = slice(10, None, None)
|
|
sids_slice = slice(1, 4, None)
|
|
|
|
# Adjustments that should actually affect the output.
|
|
relevant_adjustments = [
|
|
{
|
|
'sid': 1,
|
|
'start_date': None,
|
|
'end_date': self.dates[15],
|
|
'apply_date': self.dates[16],
|
|
'value': 0.5,
|
|
'kind': MULTIPLY,
|
|
},
|
|
{
|
|
'sid': 2,
|
|
'start_date': self.dates[5],
|
|
'end_date': self.dates[15],
|
|
'apply_date': self.dates[16],
|
|
'value': 1.0,
|
|
'kind': ADD,
|
|
},
|
|
{
|
|
'sid': 2,
|
|
'start_date': self.dates[15],
|
|
'end_date': self.dates[16],
|
|
'apply_date': self.dates[17],
|
|
'value': 1.0,
|
|
'kind': ADD,
|
|
},
|
|
{
|
|
'sid': 3,
|
|
'start_date': self.dates[16],
|
|
'end_date': self.dates[17],
|
|
'apply_date': self.dates[18],
|
|
'value': 99.0,
|
|
'kind': OVERWRITE,
|
|
},
|
|
]
|
|
|
|
# These adjustments shouldn't affect the output.
|
|
irrelevant_adjustments = [
|
|
{ # Sid Not Requested
|
|
'sid': 0,
|
|
'start_date': self.dates[16],
|
|
'end_date': self.dates[17],
|
|
'apply_date': self.dates[18],
|
|
'value': -9999.0,
|
|
'kind': OVERWRITE,
|
|
},
|
|
{ # Sid Unknown
|
|
'sid': 9999,
|
|
'start_date': self.dates[16],
|
|
'end_date': self.dates[17],
|
|
'apply_date': self.dates[18],
|
|
'value': -9999.0,
|
|
'kind': OVERWRITE,
|
|
},
|
|
{ # Date Not Requested
|
|
'sid': 2,
|
|
'start_date': self.dates[1],
|
|
'end_date': self.dates[2],
|
|
'apply_date': self.dates[3],
|
|
'value': -9999.0,
|
|
'kind': OVERWRITE,
|
|
},
|
|
{ # Date Before Known Data
|
|
'sid': 2,
|
|
'start_date': self.dates[0] - (2 * self.trading_day),
|
|
'end_date': self.dates[0] - self.trading_day,
|
|
'apply_date': self.dates[0] - self.trading_day,
|
|
'value': -9999.0,
|
|
'kind': OVERWRITE,
|
|
},
|
|
{ # Date After Known Data
|
|
'sid': 2,
|
|
'start_date': self.dates[-1] + self.trading_day,
|
|
'end_date': self.dates[-1] + (2 * self.trading_day),
|
|
'apply_date': self.dates[-1] + (3 * self.trading_day),
|
|
'value': -9999.0,
|
|
'kind': OVERWRITE,
|
|
},
|
|
]
|
|
|
|
adjustments = DataFrame(relevant_adjustments + irrelevant_adjustments)
|
|
loader = DataFrameLoader(
|
|
USEquityPricing.close,
|
|
baseline,
|
|
adjustments=adjustments,
|
|
)
|
|
|
|
expected_baseline = baseline.iloc[dates_slice, sids_slice]
|
|
|
|
formatted_adjustments = loader.format_adjustments(
|
|
self.dates[dates_slice],
|
|
self.sids[sids_slice],
|
|
)
|
|
expected_formatted_adjustments = {
|
|
6: [
|
|
Float64Multiply(
|
|
first_row=0,
|
|
last_row=5,
|
|
first_col=0,
|
|
last_col=0,
|
|
value=0.5,
|
|
),
|
|
Float64Add(
|
|
first_row=0,
|
|
last_row=5,
|
|
first_col=1,
|
|
last_col=1,
|
|
value=1.0,
|
|
),
|
|
],
|
|
7: [
|
|
Float64Add(
|
|
first_row=5,
|
|
last_row=6,
|
|
first_col=1,
|
|
last_col=1,
|
|
value=1.0,
|
|
),
|
|
],
|
|
8: [
|
|
Float64Overwrite(
|
|
first_row=6,
|
|
last_row=7,
|
|
first_col=2,
|
|
last_col=2,
|
|
value=99.0,
|
|
)
|
|
],
|
|
}
|
|
self.assertEqual(formatted_adjustments, expected_formatted_adjustments)
|
|
|
|
mask = self.mask[dates_slice, sids_slice]
|
|
with patch('zipline.pipeline.loaders.frame.AdjustedArray') as m:
|
|
loader.load_adjusted_array(
|
|
US_EQUITIES,
|
|
columns=[USEquityPricing.close],
|
|
dates=self.dates[dates_slice],
|
|
sids=self.sids[sids_slice],
|
|
mask=mask,
|
|
)
|
|
|
|
self.assertEqual(m.call_count, 1)
|
|
|
|
args, kwargs = m.call_args
|
|
assert_array_equal(kwargs['data'], expected_baseline.values)
|
|
self.assertEqual(kwargs['adjustments'], expected_formatted_adjustments)
|