zipline/tests/test_data_portal.py
ernestoeperez88 5cda2ad542
Merge pull request #2309 from quantopian/float-precision-fix
BUG: Round values before truncating when writing bcolz minute/day tables.
2018-10-01 14:21:29 -07:00

589 lines
22 KiB
Python

#
# Copyright 2016 Quantopian, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import OrderedDict
from numpy import array, append, nan, full
from numpy.testing import assert_almost_equal
import pandas as pd
from pandas import Timedelta
from six import iteritems
from zipline.assets import Equity, Future
from zipline.data.data_portal import HISTORY_FREQUENCIES, OHLCV_FIELDS
from zipline.data.minute_bars import (
FUTURES_MINUTES_PER_DAY,
US_EQUITIES_MINUTES_PER_DAY,
)
from zipline.testing import parameter_space
from zipline.testing.fixtures import (
ZiplineTestCase,
WithTradingSessions,
WithDataPortal,
alias,
)
from zipline.testing.predicates import assert_equal
from zipline.utils.numpy_utils import float64_dtype
class DataPortalTestBase(WithDataPortal,
WithTradingSessions):
ASSET_FINDER_EQUITY_SIDS = (1, 2, 3)
DIVIDEND_ASSET_SID = 3
START_DATE = pd.Timestamp('2016-08-01')
END_DATE = pd.Timestamp('2016-08-08')
TRADING_CALENDAR_STRS = ('NYSE', 'us_futures')
EQUITY_DAILY_BAR_SOURCE_FROM_MINUTE = True
# Since the future with sid 10001 has a tick size of 0.0001, its prices
# should be rounded out to 4 decimal places. To test that this rounding
# occurs correctly, store its prices out to 5 decimal places by using a
# multiplier of 100,000 when writing its values.
OHLC_RATIOS_PER_SID = {10001: 100000}
@classmethod
def make_root_symbols_info(self):
return pd.DataFrame({
'root_symbol': ['BAR', 'BUZ'],
'root_symbol_id': [1, 2],
'exchange': ['CMES', 'CMES'],
})
@classmethod
def make_futures_info(cls):
trading_sessions = cls.trading_sessions['us_futures']
return pd.DataFrame({
'sid': [10000, 10001],
'root_symbol': ['BAR', 'BUZ'],
'symbol': ['BARA', 'BUZZ'],
'start_date': [trading_sessions[1], trading_sessions[0]],
'end_date': [cls.END_DATE, cls.END_DATE],
# TODO: Make separate from 'end_date'
'notice_date': [cls.END_DATE, cls.END_DATE],
'expiration_date': [cls.END_DATE, cls.END_DATE],
'tick_size': [0.01, 0.0001],
'multiplier': [500, 50000],
'exchange': ['CMES', 'CMES'],
})
@classmethod
def make_equity_minute_bar_data(cls):
trading_calendar = cls.trading_calendars[Equity]
# No data on first day.
dts = trading_calendar.minutes_for_session(cls.trading_days[0])
dfs = []
dfs.append(pd.DataFrame(
{
'open': full(len(dts), nan),
'high': full(len(dts), nan),
'low': full(len(dts), nan),
'close': full(len(dts), nan),
'volume': full(len(dts), 0),
},
index=dts))
dts = trading_calendar.minutes_for_session(cls.trading_days[1])
dfs.append(pd.DataFrame(
{
'open': append(100.5, full(len(dts) - 1, nan)),
'high': append(100.9, full(len(dts) - 1, nan)),
'low': append(100.1, full(len(dts) - 1, nan)),
'close': append(100.3, full(len(dts) - 1, nan)),
'volume': append(1000, full(len(dts) - 1, nan)),
},
index=dts))
dts = trading_calendar.minutes_for_session(cls.trading_days[2])
dfs.append(pd.DataFrame(
{
'open': [nan, 103.50, 102.50, 104.50, 101.50, nan],
'high': [nan, 103.90, 102.90, 104.90, 101.90, nan],
'low': [nan, 103.10, 102.10, 104.10, 101.10, nan],
'close': [nan, 103.30, 102.30, 104.30, 101.30, nan],
'volume': [0, 1003, 1002, 1004, 1001, 0]
},
index=dts[:6]
))
dts = trading_calendar.minutes_for_session(cls.trading_days[3])
dfs.append(pd.DataFrame(
{
'open': full(len(dts), nan),
'high': full(len(dts), nan),
'low': full(len(dts), nan),
'close': full(len(dts), nan),
'volume': full(len(dts), 0),
},
index=dts))
asset1_df = pd.concat(dfs)
yield 1, asset1_df
asset2_df = pd.DataFrame(
{
'open': 1.0055,
'high': 1.0059,
'low': 1.0051,
'close': 1.0055,
'volume': 100,
},
index=asset1_df.index,
)
yield 2, asset2_df
yield cls.DIVIDEND_ASSET_SID, asset2_df.copy()
@classmethod
def make_future_minute_bar_data(cls):
trading_calendar = cls.trading_calendars[Future]
trading_sessions = cls.trading_sessions['us_futures']
# No data on first day, future asset intentionally not on the same
# dates as equities, so that cross-wiring of results do not create a
# false positive.
dts = trading_calendar.minutes_for_session(trading_sessions[1])
dfs = []
dfs.append(pd.DataFrame(
{
'open': full(len(dts), nan),
'high': full(len(dts), nan),
'low': full(len(dts), nan),
'close': full(len(dts), nan),
'volume': full(len(dts), 0),
},
index=dts))
dts = trading_calendar.minutes_for_session(trading_sessions[2])
dfs.append(pd.DataFrame(
{
'open': append(200.5, full(len(dts) - 1, nan)),
'high': append(200.9, full(len(dts) - 1, nan)),
'low': append(200.1, full(len(dts) - 1, nan)),
'close': append(200.3, full(len(dts) - 1, nan)),
'volume': append(2000, full(len(dts) - 1, nan)),
},
index=dts))
dts = trading_calendar.minutes_for_session(trading_sessions[3])
dfs.append(pd.DataFrame(
{
'open': [nan, 203.50, 202.50, 204.50, 201.50, nan],
'high': [nan, 203.90, 202.90, 204.90, 201.90, nan],
'low': [nan, 203.10, 202.10, 204.10, 201.10, nan],
'close': [nan, 203.30, 202.30, 204.30, 201.30, nan],
'volume': [0, 2003, 2002, 2004, 2001, 0]
},
index=dts[:6]
))
dts = trading_calendar.minutes_for_session(trading_sessions[4])
dfs.append(pd.DataFrame(
{
'open': full(len(dts), nan),
'high': full(len(dts), nan),
'low': full(len(dts), nan),
'close': full(len(dts), nan),
'volume': full(len(dts), 0),
},
index=dts))
asset10000_df = pd.concat(dfs)
yield 10000, asset10000_df
missing_dts = trading_calendar.minutes_for_session(trading_sessions[0])
asset10001_df = pd.DataFrame(
{
'open': 1.00549,
'high': 1.00591,
'low': 1.00507,
'close': 1.0055,
'volume': 100,
},
index=missing_dts.append(asset10000_df.index),
)
yield 10001, asset10001_df
@classmethod
def make_dividends_data(cls):
return pd.DataFrame([
{
# only care about ex date, the other dates don't matter here
'ex_date':
cls.trading_days[2].to_datetime64(),
'record_date':
cls.trading_days[2].to_datetime64(),
'declared_date':
cls.trading_days[2].to_datetime64(),
'pay_date':
cls.trading_days[2].to_datetime64(),
'amount': 0.5,
'sid': cls.DIVIDEND_ASSET_SID,
}],
columns=[
'ex_date',
'record_date',
'declared_date',
'pay_date',
'amount',
'sid'],
)
def test_get_last_traded_equity_minute(self):
trading_calendar = self.trading_calendars[Equity]
# Case: Missing data at front of data set, and request dt is before
# first value.
dts = trading_calendar.minutes_for_session(self.trading_days[0])
asset = self.asset_finder.retrieve_asset(1)
self.assertTrue(pd.isnull(
self.data_portal.get_last_traded_dt(
asset, dts[0], 'minute')))
# Case: Data on requested dt.
dts = trading_calendar.minutes_for_session(self.trading_days[2])
self.assertEqual(dts[1],
self.data_portal.get_last_traded_dt(
asset, dts[1], 'minute'))
# Case: No data on dt, but data occuring before dt.
self.assertEqual(dts[4],
self.data_portal.get_last_traded_dt(
asset, dts[5], 'minute'))
def test_get_last_traded_future_minute(self):
asset = self.asset_finder.retrieve_asset(10000)
trading_calendar = self.trading_calendars[Future]
# Case: Missing data at front of data set, and request dt is before
# first value.
dts = trading_calendar.minutes_for_session(self.trading_days[0])
self.assertTrue(pd.isnull(
self.data_portal.get_last_traded_dt(
asset, dts[0], 'minute')))
# Case: Data on requested dt.
dts = trading_calendar.minutes_for_session(self.trading_days[3])
self.assertEqual(dts[1],
self.data_portal.get_last_traded_dt(
asset, dts[1], 'minute'))
# Case: No data on dt, but data occuring before dt.
self.assertEqual(dts[4],
self.data_portal.get_last_traded_dt(
asset, dts[5], 'minute'))
def test_get_last_traded_dt_equity_daily(self):
# Case: Missing data at front of data set, and request dt is before
# first value.
asset = self.asset_finder.retrieve_asset(1)
self.assertTrue(pd.isnull(
self.data_portal.get_last_traded_dt(
asset, self.trading_days[0], 'daily')))
# Case: Data on requested dt.
self.assertEqual(self.trading_days[1],
self.data_portal.get_last_traded_dt(
asset, self.trading_days[1], 'daily'))
# Case: No data on dt, but data occuring before dt.
self.assertEqual(self.trading_days[2],
self.data_portal.get_last_traded_dt(
asset, self.trading_days[3], 'daily'))
def test_get_spot_value_equity_minute(self):
trading_calendar = self.trading_calendars[Equity]
asset = self.asset_finder.retrieve_asset(1)
dts = trading_calendar.minutes_for_session(self.trading_days[2])
# Case: Get data on exact dt.
dt = dts[1]
expected = OrderedDict({
'open': 103.5,
'high': 103.9,
'low': 103.1,
'close': 103.3,
'volume': 1003,
'price': 103.3
})
result = [self.data_portal.get_spot_value(asset,
field,
dt,
'minute')
for field in expected.keys()]
assert_almost_equal(array(list(expected.values())), result)
# Case: Get data on empty dt, return nan or most recent data for price.
dt = dts[100]
expected = OrderedDict({
'open': nan,
'high': nan,
'low': nan,
'close': nan,
'volume': 0,
'price': 101.3
})
result = [self.data_portal.get_spot_value(asset,
field,
dt,
'minute')
for field in expected.keys()]
assert_almost_equal(array(list(expected.values())), result)
def test_get_spot_value_future_minute(self):
trading_calendar = self.trading_calendars[Future]
asset = self.asset_finder.retrieve_asset(10000)
dts = trading_calendar.minutes_for_session(self.trading_days[3])
# Case: Get data on exact dt.
dt = dts[1]
expected = OrderedDict({
'open': 203.5,
'high': 203.9,
'low': 203.1,
'close': 203.3,
'volume': 2003,
'price': 203.3
})
result = [self.data_portal.get_spot_value(asset,
field,
dt,
'minute')
for field in expected.keys()]
assert_almost_equal(array(list(expected.values())), result)
# Case: Get data on empty dt, return nan or most recent data for price.
dt = dts[100]
expected = OrderedDict({
'open': nan,
'high': nan,
'low': nan,
'close': nan,
'volume': 0,
'price': 201.3
})
result = [self.data_portal.get_spot_value(asset,
field,
dt,
'minute')
for field in expected.keys()]
assert_almost_equal(array(list(expected.values())), result)
def test_get_spot_value_multiple_assets(self):
equity = self.asset_finder.retrieve_asset(1)
future = self.asset_finder.retrieve_asset(10000)
trading_calendar = self.trading_calendars[Future]
dts = trading_calendar.minutes_for_session(self.trading_days[3])
# We expect the outputs to be lists of spot values.
expected = pd.DataFrame(
{
equity: [nan, nan, nan, nan, 0, 101.3],
future: [203.5, 203.9, 203.1, 203.3, 2003, 203.3],
},
index=['open', 'high', 'low', 'close', 'volume', 'price'],
)
result = [
self.data_portal.get_spot_value(
assets=[equity, future],
field=field,
dt=dts[1],
data_frequency='minute',
)
for field in expected.index
]
assert_almost_equal(expected.values.tolist(), result)
@parameter_space(data_frequency=['daily', 'minute'],
field=['close', 'price'])
def test_get_adjustments(self, data_frequency, field):
asset = self.asset_finder.retrieve_asset(self.DIVIDEND_ASSET_SID)
calendar = self.trading_calendars[Equity]
day = calendar.day
dividend_date = self.trading_days[2]
prev_day_price = 1.006
dividend_amount = 0.5 # see self.make_dividends_data
ratio = 1.0 - dividend_amount / prev_day_price
cases = OrderedDict([
((dividend_date - day, dividend_date - day), 1.0),
((dividend_date - day, dividend_date), ratio),
((dividend_date - day, dividend_date + day), ratio),
((dividend_date, dividend_date), 1.0),
((dividend_date, dividend_date + day), 1.0),
((dividend_date + day, dividend_date + day), 1.0),
])
for (dt, perspective_dt), expected in iteritems(cases):
if data_frequency == 'minute':
dt = calendar.session_open(dt)
perspective_dt = calendar.session_open(perspective_dt)
val = self.data_portal.get_adjustments(
asset,
field,
dt,
perspective_dt,
)[0]
assert_almost_equal(val, expected,
err_msg="at dt={} perspective={}"
.format(dt, perspective_dt))
def test_bar_count_for_simple_transforms(self):
# July 2015
# Su Mo Tu We Th Fr Sa
# 1 2 3 4
# 5 6 7 8 9 10 11
# 12 13 14 15 16 17 18
# 19 20 21 22 23 24 25
# 26 27 28 29 30 31
# half an hour into july 9, getting a 4-"day" window should get us
# all the minutes of 7/6, 7/7, 7/8, and 31 minutes of 7/9
july_9_dt = self.trading_calendar.open_and_close_for_session(
pd.Timestamp("2015-07-09", tz='UTC')
)[0] + Timedelta("30 minutes")
self.assertEqual(
(3 * 390) + 31,
self.data_portal._get_minute_count_for_transform(july_9_dt, 4)
)
# November 2015
# Su Mo Tu We Th Fr Sa
# 1 2 3 4 5 6 7
# 8 9 10 11 12 13 14
# 15 16 17 18 19 20 21
# 22 23 24 25 26 27 28
# 29 30
# nov 26th closed
# nov 27th was an early close
# half an hour into nov 30, getting a 4-"day" window should get us
# all the minutes of 11/24, 11/25, 11/27 (half day!), and 31 minutes
# of 11/30
nov_30_dt = self.trading_calendar.open_and_close_for_session(
pd.Timestamp("2015-11-30", tz='UTC')
)[0] + Timedelta("30 minutes")
self.assertEqual(
390 + 390 + 210 + 31,
self.data_portal._get_minute_count_for_transform(nov_30_dt, 4)
)
def test_get_last_traded_dt_minute(self):
minutes = self.nyse_calendar.minutes_for_session(
self.trading_days[2])
equity = self.asset_finder.retrieve_asset(1)
result = self.data_portal.get_last_traded_dt(equity,
minutes[3],
'minute')
self.assertEqual(minutes[3], result,
"Asset 1 had a trade on third minute, so should "
"return that as the last trade on that dt.")
result = self.data_portal.get_last_traded_dt(equity,
minutes[5],
'minute')
self.assertEqual(minutes[4], result,
"Asset 1 had a trade on fourth minute, so should "
"return that as the last trade on the fifth.")
future = self.asset_finder.retrieve_asset(10000)
calendar = self.trading_calendars[Future]
minutes = calendar.minutes_for_session(self.trading_days[3])
result = self.data_portal.get_last_traded_dt(future,
minutes[3],
'minute')
self.assertEqual(minutes[3], result,
"Asset 10000 had a trade on the third minute, so "
"return that as the last trade on that dt.")
result = self.data_portal.get_last_traded_dt(future,
minutes[5],
'minute')
self.assertEqual(minutes[4], result,
"Asset 10000 had a trade on fourth minute, so should "
"return that as the last trade on the fifth.")
def test_get_empty_splits(self):
splits = self.data_portal.get_splits([], self.trading_days[2])
self.assertEqual([], splits)
@parameter_space(frequency=HISTORY_FREQUENCIES, field=OHLCV_FIELDS)
def test_price_rounding(self, frequency, field):
equity = self.asset_finder.retrieve_asset(2)
future = self.asset_finder.retrieve_asset(10001)
cf = self.data_portal.asset_finder.create_continuous_future(
'BUZ', 0, 'calendar', None,
)
minutes = self.nyse_calendar.minutes_for_session(self.trading_days[0])
if frequency == '1m':
minute = minutes[0]
expected_equity_volume = 100
expected_future_volume = 100
data_frequency = 'minute'
else:
minute = minutes[0].normalize()
expected_equity_volume = 100 * US_EQUITIES_MINUTES_PER_DAY
expected_future_volume = 100 * FUTURES_MINUTES_PER_DAY
data_frequency = 'daily'
# Equity prices should be floored to three decimal places.
expected_equity_values = {
'open': 1.006,
'high': 1.006,
'low': 1.005,
'close': 1.006,
'volume': expected_equity_volume,
}
# Futures prices should be rounded to four decimal places.
expected_future_values = {
'open': 1.0055,
'high': 1.0059,
'low': 1.0051,
'close': 1.0055,
'volume': expected_future_volume,
}
result = self.data_portal.get_history_window(
assets=[equity, future, cf],
end_dt=minute,
bar_count=1,
frequency=frequency,
field=field,
data_frequency=data_frequency,
)
expected_result = pd.DataFrame(
{
equity: expected_equity_values[field],
future: expected_future_values[field],
cf: expected_future_values[field],
},
index=[minute],
dtype=float64_dtype,
)
assert_equal(result, expected_result)
class TestDataPortal(DataPortalTestBase,
ZiplineTestCase):
DATA_PORTAL_LAST_AVAILABLE_SESSION = None
DATA_PORTAL_LAST_AVAILABLE_MINUTE = None
class TestDataPortalExplicitLastAvailable(DataPortalTestBase,
ZiplineTestCase):
DATA_PORTAL_LAST_AVAILABLE_SESSION = alias('START_DATE')
DATA_PORTAL_LAST_AVAILABLE_MINUTE = alias('END_DATE')