mirror of
https://github.com/saymrwulf/zipline.git
synced 2026-05-15 21:01:32 +00:00
2540 lines
91 KiB
Python
2540 lines
91 KiB
Python
#
|
|
# Copyright 2015 Quantopian, Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""
|
|
Tests for the zipline.assets package
|
|
"""
|
|
from collections import namedtuple
|
|
from datetime import timedelta
|
|
from functools import partial
|
|
import os
|
|
import pickle
|
|
import string
|
|
import sys
|
|
from types import GetSetDescriptorType
|
|
from unittest import TestCase
|
|
import uuid
|
|
import warnings
|
|
|
|
from nose_parameterized import parameterized
|
|
import numpy as np
|
|
from numpy import full, int32, int64
|
|
import pandas as pd
|
|
from six import viewkeys
|
|
import sqlalchemy as sa
|
|
|
|
from zipline.assets import (
|
|
Asset,
|
|
ExchangeInfo,
|
|
Equity,
|
|
Future,
|
|
AssetDBWriter,
|
|
AssetFinder,
|
|
)
|
|
from zipline.assets.assets import OwnershipPeriod
|
|
from zipline.assets.synthetic import (
|
|
make_commodity_future_info,
|
|
make_rotating_equity_info,
|
|
make_simple_equity_info,
|
|
)
|
|
from six import itervalues, integer_types
|
|
from toolz import valmap, concat
|
|
|
|
from zipline.assets.asset_writer import (
|
|
check_version_info,
|
|
write_version_info,
|
|
_futures_defaults,
|
|
SQLITE_MAX_VARIABLE_NUMBER,
|
|
)
|
|
from zipline.assets.asset_db_schema import ASSET_DB_VERSION
|
|
from zipline.assets.asset_db_migrations import (
|
|
downgrade
|
|
)
|
|
from zipline.errors import (
|
|
EquitiesNotFound,
|
|
FutureContractsNotFound,
|
|
MultipleSymbolsFound,
|
|
MultipleSymbolsFoundForFuzzySymbol,
|
|
MultipleValuesFoundForField,
|
|
MultipleValuesFoundForSid,
|
|
NoValueForSid,
|
|
AssetDBVersionError,
|
|
SameSymbolUsedAcrossCountries,
|
|
SidsNotFound,
|
|
SymbolNotFound,
|
|
AssetDBImpossibleDowngrade,
|
|
ValueNotFoundForField,
|
|
)
|
|
from zipline.testing import (
|
|
all_subindices,
|
|
empty_assets_db,
|
|
parameter_space,
|
|
powerset,
|
|
tmp_assets_db,
|
|
tmp_asset_finder,
|
|
)
|
|
from zipline.testing.predicates import assert_equal, assert_not_equal
|
|
from zipline.testing.fixtures import (
|
|
WithAssetFinder,
|
|
ZiplineTestCase,
|
|
WithTradingCalendars,
|
|
WithTmpDir,
|
|
WithInstanceTmpDir,
|
|
)
|
|
from zipline.utils.range import range
|
|
|
|
|
|
Case = namedtuple('Case', 'finder inputs as_of country_code expected')
|
|
|
|
minute = pd.Timedelta(minutes=1)
|
|
|
|
|
|
def build_lookup_generic_cases():
|
|
"""
|
|
Generate test cases for the type of asset finder specific by
|
|
asset_finder_type for test_lookup_generic.
|
|
"""
|
|
unique_start = pd.Timestamp('2013-01-01', tz='UTC')
|
|
unique_end = pd.Timestamp('2014-01-01', tz='UTC')
|
|
|
|
dupe_old_start = pd.Timestamp('2013-01-01', tz='UTC')
|
|
dupe_old_end = pd.Timestamp('2013-01-02', tz='UTC')
|
|
dupe_new_start = pd.Timestamp('2013-01-03', tz='UTC')
|
|
dupe_new_end = pd.Timestamp('2013-01-03', tz='UTC')
|
|
|
|
equities = pd.DataFrame.from_records(
|
|
[
|
|
# These symbols are duplicated within the US, but have different
|
|
# lifetimes.
|
|
{
|
|
'sid': 0,
|
|
'symbol': 'duplicated_in_us',
|
|
'start_date': dupe_old_start.value,
|
|
'end_date': dupe_old_end.value,
|
|
'exchange': 'US_EXCHANGE',
|
|
},
|
|
{
|
|
'sid': 1,
|
|
'symbol': 'duplicated_in_us',
|
|
'start_date': dupe_new_start.value,
|
|
'end_date': dupe_new_end.value,
|
|
'exchange': 'US_EXCHANGE',
|
|
},
|
|
# This asset is unique.
|
|
{
|
|
'sid': 2,
|
|
'symbol': 'unique',
|
|
'start_date': unique_start.value,
|
|
'end_date': unique_end.value,
|
|
'exchange': 'US_EXCHANGE',
|
|
},
|
|
# These assets appear with the same ticker at the same time in
|
|
# different countries.
|
|
{
|
|
'sid': 3,
|
|
'symbol': 'duplicated_globally',
|
|
'start_date': unique_start.value,
|
|
'end_date': unique_start.value,
|
|
'exchange': 'US_EXCHANGE',
|
|
},
|
|
{
|
|
'sid': 4,
|
|
'symbol': 'duplicated_globally',
|
|
'start_date': unique_start.value,
|
|
'end_date': unique_start.value,
|
|
'exchange': 'CA_EXCHANGE',
|
|
},
|
|
],
|
|
index='sid'
|
|
)
|
|
|
|
fof14_sid = 10000
|
|
|
|
futures = pd.DataFrame.from_records(
|
|
[
|
|
{
|
|
'sid': fof14_sid,
|
|
'symbol': 'FOF14',
|
|
'root_symbol': 'FO',
|
|
'start_date': unique_start.value,
|
|
'end_date': unique_end.value,
|
|
'auto_close_date': unique_end.value,
|
|
'exchange': 'US_FUT',
|
|
},
|
|
],
|
|
index='sid'
|
|
)
|
|
|
|
root_symbols = pd.DataFrame({
|
|
'root_symbol': ['FO'],
|
|
'root_symbol_id': [1],
|
|
'exchange': ['US_FUT'],
|
|
})
|
|
|
|
exchanges = pd.DataFrame.from_records([
|
|
{'exchange': 'US_EXCHANGE', 'country_code': 'US'},
|
|
{'exchange': 'CA_EXCHANGE', 'country_code': 'CA'},
|
|
{'exchange': 'US_FUT', 'country_code': 'US'},
|
|
])
|
|
|
|
temp_db = tmp_assets_db(
|
|
equities=equities,
|
|
futures=futures,
|
|
root_symbols=root_symbols,
|
|
exchanges=exchanges,
|
|
)
|
|
|
|
with temp_db as assets_db:
|
|
finder = AssetFinder(assets_db)
|
|
|
|
case = partial(Case, finder)
|
|
|
|
equities = finder.retrieve_all(range(5))
|
|
dupe_old, dupe_new, unique, dupe_us, dupe_ca = equities
|
|
|
|
fof14 = finder.retrieve_asset(fof14_sid)
|
|
cf = finder.create_continuous_future(
|
|
root_symbol=fof14.root_symbol,
|
|
offset=0,
|
|
roll_style='volume',
|
|
adjustment=None,
|
|
)
|
|
|
|
all_assets = list(equities) + [fof14, cf]
|
|
|
|
for asset in list(equities) + [fof14, cf]:
|
|
# Looking up an asset object directly should yield itself.
|
|
yield case(asset, None, None, asset)
|
|
# Looking up an asset by sid should yield the asset.
|
|
yield case(asset.sid, None, None, asset)
|
|
|
|
# Duplicated US equity symbol with resolution date.
|
|
for country in ('US', None):
|
|
# On or before dupe_new_start, we should get dupe_old.
|
|
yield case('DUPLICATED_IN_US', dupe_old_start, country, dupe_old)
|
|
yield case(
|
|
'DUPLICATED_IN_US', dupe_new_start - minute, country, dupe_old,
|
|
)
|
|
# After that, we should get dupe_new.
|
|
yield case('DUPLICATED_IN_US', dupe_new_start, country, dupe_new)
|
|
yield case(
|
|
'DUPLICATED_IN_US', dupe_new_start + minute, country, dupe_new,
|
|
)
|
|
|
|
# Unique symbol, disambiguated by country, with or without resolution
|
|
# date.
|
|
for asset, country in ((dupe_us, 'US'),
|
|
(dupe_ca, 'CA')):
|
|
yield case('DUPLICATED_GLOBALLY', unique_start, country, asset)
|
|
yield case('DUPLICATED_GLOBALLY', None, country, asset)
|
|
|
|
# Future symbols should be unique, but including as_of date
|
|
# make sure that code path is exercised.
|
|
yield case('FOF14', None, None, fof14)
|
|
yield case('FOF14', unique_start, None, fof14)
|
|
|
|
##
|
|
# Iterables
|
|
# Iterables of Asset objects.
|
|
yield case(all_assets, None, None, all_assets)
|
|
yield case(iter(all_assets), None, None, all_assets)
|
|
|
|
# Iterables of ints
|
|
yield case((0, 1), None, None, equities[:2])
|
|
yield case(iter((0, 1)), None, None, equities[:2])
|
|
|
|
# Iterables of symbols.
|
|
yield case(
|
|
inputs=('DUPLICATED_IN_US', 'UNIQUE', 'DUPLICATED_GLOBALLY'),
|
|
as_of=dupe_old_start,
|
|
country_code='US',
|
|
expected=[dupe_old, unique, dupe_us],
|
|
)
|
|
yield case(
|
|
inputs=['DUPLICATED_GLOBALLY'],
|
|
as_of=dupe_new_start,
|
|
country_code='CA',
|
|
expected=[dupe_ca],
|
|
)
|
|
|
|
# Mixed types
|
|
yield case(
|
|
inputs=(
|
|
'DUPLICATED_IN_US', # dupe_old b/c of as_of
|
|
dupe_new, # dupe_new
|
|
2, # unique
|
|
'UNIQUE', # unique
|
|
'DUPLICATED_GLOBALLY', # dupe_us b/c of country_code
|
|
dupe_ca, # dupe_ca
|
|
),
|
|
as_of=dupe_old_start,
|
|
country_code='US',
|
|
expected=[dupe_old, dupe_new, unique, unique, dupe_us, dupe_ca],
|
|
)
|
|
|
|
# Futures and Equities
|
|
yield case(['FOF14', 0], None, None, [fof14, equities[0]])
|
|
yield case(
|
|
inputs=['FOF14', 'DUPLICATED_IN_US', 'DUPLICATED_GLOBALLY'],
|
|
as_of=dupe_new_start,
|
|
country_code='US',
|
|
expected=[fof14, dupe_new, dupe_us],
|
|
)
|
|
|
|
# ContinuousFuture and Equity
|
|
yield case([cf, 0], None, None, [cf, equities[0]])
|
|
yield case(
|
|
[cf, 'DUPLICATED_IN_US', 'DUPLICATED_GLOBALLY'],
|
|
as_of=dupe_new_start,
|
|
country_code='US',
|
|
expected=[cf, dupe_new, dupe_us],
|
|
)
|
|
|
|
|
|
class AssetTestCase(TestCase):
|
|
|
|
# Dynamically list the Asset properties we want to test.
|
|
asset_attrs = [name for name, value in vars(Asset).items()
|
|
if isinstance(value, GetSetDescriptorType)]
|
|
|
|
# Very wow
|
|
asset = Asset(
|
|
1337,
|
|
symbol="DOGE",
|
|
asset_name="DOGECOIN",
|
|
start_date=pd.Timestamp('2013-12-08 9:31', tz='UTC'),
|
|
end_date=pd.Timestamp('2014-06-25 11:21', tz='UTC'),
|
|
first_traded=pd.Timestamp('2013-12-08 9:31', tz='UTC'),
|
|
auto_close_date=pd.Timestamp('2014-06-26 11:21', tz='UTC'),
|
|
exchange_info=ExchangeInfo('THE MOON', 'MOON', '??'),
|
|
)
|
|
|
|
test_exchange = ExchangeInfo('test full', 'test', '??')
|
|
asset3 = Asset(3, exchange_info=test_exchange)
|
|
asset4 = Asset(4, exchange_info=test_exchange)
|
|
asset5 = Asset(
|
|
5,
|
|
exchange_info=ExchangeInfo('still testing', 'still testing', '??'),
|
|
)
|
|
|
|
def test_asset_object(self):
|
|
the_asset = Asset(
|
|
5061,
|
|
exchange_info=ExchangeInfo('bar', 'bar', '??'),
|
|
)
|
|
|
|
self.assertEquals({5061: 'foo'}[the_asset], 'foo')
|
|
self.assertEquals(the_asset, 5061)
|
|
self.assertEquals(5061, the_asset)
|
|
|
|
self.assertEquals(the_asset, the_asset)
|
|
self.assertEquals(int(the_asset), 5061)
|
|
|
|
self.assertEquals(str(the_asset), 'Asset(5061)')
|
|
|
|
def test_to_and_from_dict(self):
|
|
asset_from_dict = Asset.from_dict(self.asset.to_dict())
|
|
for attr in self.asset_attrs:
|
|
self.assertEqual(
|
|
getattr(self.asset, attr), getattr(asset_from_dict, attr),
|
|
)
|
|
|
|
def test_asset_is_pickleable(self):
|
|
asset_unpickled = pickle.loads(pickle.dumps(self.asset))
|
|
for attr in self.asset_attrs:
|
|
self.assertEqual(
|
|
getattr(self.asset, attr), getattr(asset_unpickled, attr),
|
|
)
|
|
|
|
def test_asset_comparisons(self):
|
|
|
|
s_23 = Asset(23, exchange_info=self.test_exchange)
|
|
s_24 = Asset(24, exchange_info=self.test_exchange)
|
|
|
|
self.assertEqual(s_23, s_23)
|
|
self.assertEqual(s_23, 23)
|
|
self.assertEqual(23, s_23)
|
|
self.assertEqual(int32(23), s_23)
|
|
self.assertEqual(int64(23), s_23)
|
|
self.assertEqual(s_23, int32(23))
|
|
self.assertEqual(s_23, int64(23))
|
|
# Check all int types (includes long on py2):
|
|
for int_type in integer_types:
|
|
self.assertEqual(int_type(23), s_23)
|
|
self.assertEqual(s_23, int_type(23))
|
|
|
|
self.assertNotEqual(s_23, s_24)
|
|
self.assertNotEqual(s_23, 24)
|
|
self.assertNotEqual(s_23, "23")
|
|
self.assertNotEqual(s_23, 23.5)
|
|
self.assertNotEqual(s_23, [])
|
|
self.assertNotEqual(s_23, None)
|
|
# Compare to a value that doesn't fit into a platform int:
|
|
self.assertNotEqual(s_23, sys.maxsize + 1)
|
|
|
|
self.assertLess(s_23, s_24)
|
|
self.assertLess(s_23, 24)
|
|
self.assertGreater(24, s_23)
|
|
self.assertGreater(s_24, s_23)
|
|
|
|
def test_lt(self):
|
|
self.assertTrue(self.asset3 < self.asset4)
|
|
self.assertFalse(self.asset4 < self.asset4)
|
|
self.assertFalse(self.asset5 < self.asset4)
|
|
|
|
def test_le(self):
|
|
self.assertTrue(self.asset3 <= self.asset4)
|
|
self.assertTrue(self.asset4 <= self.asset4)
|
|
self.assertFalse(self.asset5 <= self.asset4)
|
|
|
|
def test_eq(self):
|
|
self.assertFalse(self.asset3 == self.asset4)
|
|
self.assertTrue(self.asset4 == self.asset4)
|
|
self.assertFalse(self.asset5 == self.asset4)
|
|
|
|
def test_ge(self):
|
|
self.assertFalse(self.asset3 >= self.asset4)
|
|
self.assertTrue(self.asset4 >= self.asset4)
|
|
self.assertTrue(self.asset5 >= self.asset4)
|
|
|
|
def test_gt(self):
|
|
self.assertFalse(self.asset3 > self.asset4)
|
|
self.assertFalse(self.asset4 > self.asset4)
|
|
self.assertTrue(self.asset5 > self.asset4)
|
|
|
|
def test_type_mismatch(self):
|
|
if sys.version_info.major < 3:
|
|
self.assertIsNotNone(self.asset3 < 'a')
|
|
self.assertIsNotNone('a' < self.asset3)
|
|
else:
|
|
with self.assertRaises(TypeError):
|
|
self.asset3 < 'a'
|
|
with self.assertRaises(TypeError):
|
|
'a' < self.asset3
|
|
|
|
|
|
class TestFuture(WithAssetFinder, ZiplineTestCase):
|
|
@classmethod
|
|
def make_futures_info(cls):
|
|
return pd.DataFrame.from_dict(
|
|
{
|
|
2468: {
|
|
'symbol': 'OMH15',
|
|
'root_symbol': 'OM',
|
|
'notice_date': pd.Timestamp('2014-01-20', tz='UTC'),
|
|
'expiration_date': pd.Timestamp('2014-02-20', tz='UTC'),
|
|
'auto_close_date': pd.Timestamp('2014-01-18', tz='UTC'),
|
|
'tick_size': .01,
|
|
'multiplier': 500.0,
|
|
'exchange': "TEST",
|
|
},
|
|
0: {
|
|
'symbol': 'CLG06',
|
|
'root_symbol': 'CL',
|
|
'start_date': pd.Timestamp('2005-12-01', tz='UTC'),
|
|
'notice_date': pd.Timestamp('2005-12-20', tz='UTC'),
|
|
'expiration_date': pd.Timestamp('2006-01-20', tz='UTC'),
|
|
'multiplier': 1.0,
|
|
'exchange': 'TEST',
|
|
},
|
|
},
|
|
orient='index',
|
|
)
|
|
|
|
@classmethod
|
|
def init_class_fixtures(cls):
|
|
super(TestFuture, cls).init_class_fixtures()
|
|
cls.future = cls.asset_finder.lookup_future_symbol('OMH15')
|
|
cls.future2 = cls.asset_finder.lookup_future_symbol('CLG06')
|
|
|
|
def test_repr(self):
|
|
reprd = repr(self.future)
|
|
self.assertEqual("Future(2468 [OMH15])", reprd)
|
|
|
|
def test_reduce(self):
|
|
assert_equal(
|
|
pickle.loads(pickle.dumps(self.future)).to_dict(),
|
|
self.future.to_dict(),
|
|
)
|
|
|
|
def test_to_and_from_dict(self):
|
|
dictd = self.future.to_dict()
|
|
for field in _futures_defaults.keys():
|
|
self.assertTrue(field in dictd)
|
|
|
|
from_dict = Future.from_dict(dictd)
|
|
self.assertTrue(isinstance(from_dict, Future))
|
|
self.assertEqual(self.future, from_dict)
|
|
|
|
def test_root_symbol(self):
|
|
self.assertEqual('OM', self.future.root_symbol)
|
|
|
|
def test_lookup_future_symbol(self):
|
|
"""
|
|
Test the lookup_future_symbol method.
|
|
"""
|
|
om = TestFuture.asset_finder.lookup_future_symbol('OMH15')
|
|
self.assertEqual(om.sid, 2468)
|
|
self.assertEqual(om.symbol, 'OMH15')
|
|
self.assertEqual(om.root_symbol, 'OM')
|
|
self.assertEqual(om.notice_date, pd.Timestamp('2014-01-20', tz='UTC'))
|
|
self.assertEqual(om.expiration_date,
|
|
pd.Timestamp('2014-02-20', tz='UTC'))
|
|
self.assertEqual(om.auto_close_date,
|
|
pd.Timestamp('2014-01-18', tz='UTC'))
|
|
|
|
cl = TestFuture.asset_finder.lookup_future_symbol('CLG06')
|
|
self.assertEqual(cl.sid, 0)
|
|
self.assertEqual(cl.symbol, 'CLG06')
|
|
self.assertEqual(cl.root_symbol, 'CL')
|
|
self.assertEqual(cl.start_date, pd.Timestamp('2005-12-01', tz='UTC'))
|
|
self.assertEqual(cl.notice_date, pd.Timestamp('2005-12-20', tz='UTC'))
|
|
self.assertEqual(cl.expiration_date,
|
|
pd.Timestamp('2006-01-20', tz='UTC'))
|
|
|
|
with self.assertRaises(SymbolNotFound):
|
|
TestFuture.asset_finder.lookup_future_symbol('')
|
|
|
|
with self.assertRaises(SymbolNotFound):
|
|
TestFuture.asset_finder.lookup_future_symbol('#&?!')
|
|
|
|
with self.assertRaises(SymbolNotFound):
|
|
TestFuture.asset_finder.lookup_future_symbol('FOOBAR')
|
|
|
|
with self.assertRaises(SymbolNotFound):
|
|
TestFuture.asset_finder.lookup_future_symbol('XXX99')
|
|
|
|
|
|
class AssetFinderTestCase(WithTradingCalendars, ZiplineTestCase):
|
|
asset_finder_type = AssetFinder
|
|
|
|
def write_assets(self, **kwargs):
|
|
self._asset_writer.write(**kwargs)
|
|
|
|
def init_instance_fixtures(self):
|
|
super(AssetFinderTestCase, self).init_instance_fixtures()
|
|
|
|
conn = self.enter_instance_context(empty_assets_db())
|
|
self._asset_writer = AssetDBWriter(conn)
|
|
self.asset_finder = self.asset_finder_type(conn)
|
|
|
|
def test_blocked_lookup_symbol_query(self):
|
|
# we will try to query for more variables than sqlite supports
|
|
# to make sure we are properly chunking on the client side
|
|
as_of = pd.Timestamp('2013-01-01', tz='UTC')
|
|
# we need more sids than we can query from sqlite
|
|
nsids = SQLITE_MAX_VARIABLE_NUMBER + 10
|
|
sids = range(nsids)
|
|
frame = pd.DataFrame.from_records(
|
|
[
|
|
{
|
|
'sid': sid,
|
|
'symbol': 'TEST.%d' % sid,
|
|
'start_date': as_of.value,
|
|
'end_date': as_of.value,
|
|
'exchange': uuid.uuid4().hex
|
|
}
|
|
for sid in sids
|
|
]
|
|
)
|
|
self.write_assets(equities=frame)
|
|
assets = self.asset_finder.retrieve_equities(sids)
|
|
assert_equal(viewkeys(assets), set(sids))
|
|
|
|
def test_lookup_symbol_delimited(self):
|
|
as_of = pd.Timestamp('2013-01-01', tz='UTC')
|
|
frame = pd.DataFrame.from_records(
|
|
[
|
|
{
|
|
'sid': i,
|
|
'symbol': 'TEST.%d' % i,
|
|
'company_name': "company%d" % i,
|
|
'start_date': as_of.value,
|
|
'end_date': as_of.value,
|
|
'exchange': uuid.uuid4().hex
|
|
}
|
|
for i in range(3)
|
|
]
|
|
)
|
|
self.write_assets(equities=frame)
|
|
finder = self.asset_finder
|
|
asset_0, asset_1, asset_2 = (
|
|
finder.retrieve_asset(i) for i in range(3)
|
|
)
|
|
|
|
# we do it twice to catch caching bugs
|
|
for i in range(2):
|
|
with self.assertRaises(SymbolNotFound):
|
|
finder.lookup_symbol('TEST', as_of)
|
|
with self.assertRaises(SymbolNotFound):
|
|
finder.lookup_symbol('TEST1', as_of)
|
|
# '@' is not a supported delimiter
|
|
with self.assertRaises(SymbolNotFound):
|
|
finder.lookup_symbol('TEST@1', as_of)
|
|
|
|
# Adding an unnecessary fuzzy shouldn't matter.
|
|
for fuzzy_char in ['-', '/', '_', '.']:
|
|
self.assertEqual(
|
|
asset_1,
|
|
finder.lookup_symbol('TEST%s1' % fuzzy_char, as_of)
|
|
)
|
|
|
|
def test_lookup_symbol_fuzzy(self):
|
|
metadata = pd.DataFrame.from_records([
|
|
{'symbol': 'PRTY_HRD', 'exchange': "TEST"},
|
|
{'symbol': 'BRKA', 'exchange': "TEST"},
|
|
{'symbol': 'BRK_A', 'exchange': "TEST"},
|
|
])
|
|
self.write_assets(equities=metadata)
|
|
finder = self.asset_finder
|
|
dt = pd.Timestamp('2013-01-01', tz='UTC')
|
|
|
|
# Try combos of looking up PRTYHRD with and without a time or fuzzy
|
|
# Both non-fuzzys get no result
|
|
with self.assertRaises(SymbolNotFound):
|
|
finder.lookup_symbol('PRTYHRD', None)
|
|
with self.assertRaises(SymbolNotFound):
|
|
finder.lookup_symbol('PRTYHRD', dt)
|
|
# Both fuzzys work
|
|
self.assertEqual(0, finder.lookup_symbol('PRTYHRD', None, fuzzy=True))
|
|
self.assertEqual(0, finder.lookup_symbol('PRTYHRD', dt, fuzzy=True))
|
|
|
|
# Try combos of looking up PRTY_HRD, all returning sid 0
|
|
self.assertEqual(0, finder.lookup_symbol('PRTY_HRD', None))
|
|
self.assertEqual(0, finder.lookup_symbol('PRTY_HRD', dt))
|
|
self.assertEqual(0, finder.lookup_symbol('PRTY_HRD', None, fuzzy=True))
|
|
self.assertEqual(0, finder.lookup_symbol('PRTY_HRD', dt, fuzzy=True))
|
|
|
|
# Try combos of looking up BRKA, all returning sid 1
|
|
self.assertEqual(1, finder.lookup_symbol('BRKA', None))
|
|
self.assertEqual(1, finder.lookup_symbol('BRKA', dt))
|
|
self.assertEqual(1, finder.lookup_symbol('BRKA', None, fuzzy=True))
|
|
self.assertEqual(1, finder.lookup_symbol('BRKA', dt, fuzzy=True))
|
|
|
|
# Try combos of looking up BRK_A, all returning sid 2
|
|
self.assertEqual(2, finder.lookup_symbol('BRK_A', None))
|
|
self.assertEqual(2, finder.lookup_symbol('BRK_A', dt))
|
|
self.assertEqual(2, finder.lookup_symbol('BRK_A', None, fuzzy=True))
|
|
self.assertEqual(2, finder.lookup_symbol('BRK_A', dt, fuzzy=True))
|
|
|
|
def test_lookup_symbol_change_ticker(self):
|
|
T = partial(pd.Timestamp, tz='utc')
|
|
metadata = pd.DataFrame.from_records(
|
|
[
|
|
# sid 0
|
|
{
|
|
'symbol': 'A',
|
|
'asset_name': 'Asset A',
|
|
'start_date': T('2014-01-01'),
|
|
'end_date': T('2014-01-05'),
|
|
'exchange': "TEST",
|
|
},
|
|
{
|
|
'symbol': 'B',
|
|
'asset_name': 'Asset B',
|
|
'start_date': T('2014-01-06'),
|
|
'end_date': T('2014-01-10'),
|
|
'exchange': "TEST",
|
|
},
|
|
|
|
# sid 1
|
|
{
|
|
'symbol': 'C',
|
|
'asset_name': 'Asset C',
|
|
'start_date': T('2014-01-01'),
|
|
'end_date': T('2014-01-05'),
|
|
'exchange': "TEST",
|
|
},
|
|
{
|
|
'symbol': 'A', # claiming the unused symbol 'A'
|
|
'asset_name': 'Asset A',
|
|
'start_date': T('2014-01-06'),
|
|
'end_date': T('2014-01-10'),
|
|
'exchange': "TEST",
|
|
},
|
|
],
|
|
index=[0, 0, 1, 1],
|
|
)
|
|
self.write_assets(equities=metadata)
|
|
finder = self.asset_finder
|
|
|
|
# note: these assertions walk forward in time, starting at assertions
|
|
# about ownership before the start_date and ending with assertions
|
|
# after the end_date; new assertions should be inserted in the correct
|
|
# locations
|
|
|
|
# no one held 'A' before 01
|
|
with self.assertRaises(SymbolNotFound):
|
|
finder.lookup_symbol('A', T('2013-12-31'))
|
|
|
|
# no one held 'C' before 01
|
|
with self.assertRaises(SymbolNotFound):
|
|
finder.lookup_symbol('C', T('2013-12-31'))
|
|
|
|
for asof in pd.date_range('2014-01-01', '2014-01-05', tz='utc'):
|
|
# from 01 through 05 sid 0 held 'A'
|
|
A_result = finder.lookup_symbol('A', asof)
|
|
assert_equal(
|
|
A_result,
|
|
finder.retrieve_asset(0),
|
|
msg=str(asof),
|
|
)
|
|
# The symbol and asset_name should always be the last held values
|
|
assert_equal(A_result.symbol, 'B')
|
|
assert_equal(A_result.asset_name, 'Asset B')
|
|
|
|
# from 01 through 05 sid 1 held 'C'
|
|
C_result = finder.lookup_symbol('C', asof)
|
|
assert_equal(
|
|
C_result,
|
|
finder.retrieve_asset(1),
|
|
msg=str(asof),
|
|
)
|
|
# The symbol and asset_name should always be the last held values
|
|
assert_equal(C_result.symbol, 'A')
|
|
assert_equal(C_result.asset_name, 'Asset A')
|
|
|
|
# no one held 'B' before 06
|
|
with self.assertRaises(SymbolNotFound):
|
|
finder.lookup_symbol('B', T('2014-01-05'))
|
|
|
|
# no one held 'C' after 06, however, no one has claimed it yet
|
|
# so it still maps to sid 1
|
|
assert_equal(
|
|
finder.lookup_symbol('C', T('2014-01-07')),
|
|
finder.retrieve_asset(1),
|
|
)
|
|
|
|
for asof in pd.date_range('2014-01-06', '2014-01-11', tz='utc'):
|
|
# from 06 through 10 sid 0 held 'B'
|
|
# we test through the 11th because sid 1 is the last to hold 'B'
|
|
# so it should ffill
|
|
B_result = finder.lookup_symbol('B', asof)
|
|
assert_equal(
|
|
B_result,
|
|
finder.retrieve_asset(0),
|
|
msg=str(asof),
|
|
)
|
|
assert_equal(B_result.symbol, 'B')
|
|
assert_equal(B_result.asset_name, 'Asset B')
|
|
|
|
# from 06 through 10 sid 1 held 'A'
|
|
# we test through the 11th because sid 1 is the last to hold 'A'
|
|
# so it should ffill
|
|
A_result = finder.lookup_symbol('A', asof)
|
|
assert_equal(
|
|
A_result,
|
|
finder.retrieve_asset(1),
|
|
msg=str(asof),
|
|
)
|
|
assert_equal(A_result.symbol, 'A')
|
|
assert_equal(A_result.asset_name, 'Asset A')
|
|
|
|
def test_lookup_symbol(self):
|
|
|
|
# Incrementing by two so that start and end dates for each
|
|
# generated Asset don't overlap (each Asset's end_date is the
|
|
# day after its start date.)
|
|
dates = pd.date_range('2013-01-01', freq='2D', periods=5, tz='UTC')
|
|
df = pd.DataFrame.from_records(
|
|
[
|
|
{
|
|
'sid': i,
|
|
'symbol': 'existing',
|
|
'start_date': date.value,
|
|
'end_date': (date + timedelta(days=1)).value,
|
|
'exchange': 'NYSE',
|
|
}
|
|
for i, date in enumerate(dates)
|
|
]
|
|
)
|
|
self.write_assets(equities=df)
|
|
finder = self.asset_finder
|
|
for _ in range(2): # Run checks twice to test for caching bugs.
|
|
with self.assertRaises(SymbolNotFound):
|
|
finder.lookup_symbol('NON_EXISTING', dates[0])
|
|
|
|
with self.assertRaises(MultipleSymbolsFound):
|
|
finder.lookup_symbol('EXISTING', None)
|
|
|
|
for i, date in enumerate(dates):
|
|
# Verify that we correctly resolve multiple symbols using
|
|
# the supplied date
|
|
result = finder.lookup_symbol('EXISTING', date)
|
|
self.assertEqual(result.symbol, 'EXISTING')
|
|
self.assertEqual(result.sid, i)
|
|
|
|
def test_fail_to_write_overlapping_data(self):
|
|
df = pd.DataFrame.from_records(
|
|
[
|
|
{
|
|
'sid': 1,
|
|
'symbol': 'multiple',
|
|
'start_date': pd.Timestamp('2010-01-01'),
|
|
'end_date': pd.Timestamp('2012-01-01'),
|
|
'exchange': 'NYSE'
|
|
},
|
|
# Same as asset 1, but with a later end date.
|
|
{
|
|
'sid': 2,
|
|
'symbol': 'multiple',
|
|
'start_date': pd.Timestamp('2010-01-01'),
|
|
'end_date': pd.Timestamp('2013-01-01'),
|
|
'exchange': 'NYSE'
|
|
},
|
|
# Same as asset 1, but with a later start_date
|
|
{
|
|
'sid': 3,
|
|
'symbol': 'multiple',
|
|
'start_date': pd.Timestamp('2011-01-01'),
|
|
'end_date': pd.Timestamp('2012-01-01'),
|
|
'exchange': 'NYSE'
|
|
},
|
|
]
|
|
)
|
|
|
|
with self.assertRaises(ValueError) as e:
|
|
self.write_assets(equities=df)
|
|
|
|
expected_error_msg = (
|
|
"Ambiguous ownership for 1 symbol, multiple assets held the"
|
|
" following symbols:\n"
|
|
"MULTIPLE (??):\n"
|
|
" intersections: (('2010-01-01 00:00:00', '2012-01-01 00:00:00'),"
|
|
" ('2011-01-01 00:00:00', '2012-01-01 00:00:00'))\n"
|
|
" start_date end_date\n"
|
|
" sid \n"
|
|
" 1 2010-01-01 2012-01-01\n"
|
|
" 2 2010-01-01 2013-01-01\n"
|
|
" 3 2011-01-01 2012-01-01"
|
|
)
|
|
self.assertEqual(str(e.exception), expected_error_msg)
|
|
|
|
def test_lookup_generic(self):
|
|
"""
|
|
Ensure that lookup_generic works with various permutations of inputs.
|
|
"""
|
|
cases = build_lookup_generic_cases()
|
|
# Make sure we clean up temp resources in the generator if we don't
|
|
# consume the whole thing because of a failure.
|
|
self.add_instance_callback(cases.close)
|
|
for finder, inputs, reference_date, country, expected in cases:
|
|
results, missing = finder.lookup_generic(
|
|
inputs, reference_date, country,
|
|
)
|
|
self.assertEqual(results, expected)
|
|
self.assertEqual(missing, [])
|
|
|
|
def test_lookup_none_raises(self):
|
|
"""
|
|
If lookup_symbol is vectorized across multiple symbols, and one of them
|
|
is None, want to raise a TypeError.
|
|
"""
|
|
|
|
with self.assertRaises(TypeError):
|
|
self.asset_finder.lookup_symbol(None, pd.Timestamp('2013-01-01'))
|
|
|
|
def test_lookup_mult_are_one(self):
|
|
"""
|
|
Ensure that multiple symbols that return the same sid are collapsed to
|
|
a single returned asset.
|
|
"""
|
|
|
|
date = pd.Timestamp('2013-01-01', tz='UTC')
|
|
|
|
df = pd.DataFrame.from_records(
|
|
[
|
|
{
|
|
'sid': 1,
|
|
'symbol': symbol,
|
|
'start_date': date.value,
|
|
'end_date': (date + timedelta(days=30)).value,
|
|
'exchange': 'NYSE',
|
|
}
|
|
for symbol in ('FOOB', 'FOO_B')
|
|
]
|
|
)
|
|
self.write_assets(equities=df)
|
|
finder = self.asset_finder
|
|
|
|
# If we are able to resolve this with any result, means that we did not
|
|
# raise a MultipleSymbolError.
|
|
result = finder.lookup_symbol('FOO/B', date + timedelta(1), fuzzy=True)
|
|
self.assertEqual(result.sid, 1)
|
|
|
|
def test_endless_multiple_resolves(self):
|
|
"""
|
|
Situation:
|
|
1. Asset 1 w/ symbol FOOB changes to FOO_B, and then is delisted.
|
|
2. Asset 2 is listed with symbol FOO_B.
|
|
|
|
If someone asks for FOO_B with fuzzy matching after 2 has been listed,
|
|
they should be able to correctly get 2.
|
|
"""
|
|
|
|
date = pd.Timestamp('2013-01-01', tz='UTC')
|
|
|
|
df = pd.DataFrame.from_records(
|
|
[
|
|
{
|
|
'sid': 1,
|
|
'symbol': 'FOOB',
|
|
'start_date': date.value,
|
|
'end_date': date.max.value,
|
|
'exchange': 'NYSE',
|
|
},
|
|
{
|
|
'sid': 1,
|
|
'symbol': 'FOO_B',
|
|
'start_date': (date + timedelta(days=31)).value,
|
|
'end_date': (date + timedelta(days=60)).value,
|
|
'exchange': 'NYSE',
|
|
},
|
|
{
|
|
'sid': 2,
|
|
'symbol': 'FOO_B',
|
|
'start_date': (date + timedelta(days=61)).value,
|
|
'end_date': date.max.value,
|
|
'exchange': 'NYSE',
|
|
},
|
|
|
|
]
|
|
)
|
|
self.write_assets(equities=df)
|
|
finder = self.asset_finder
|
|
|
|
# If we are able to resolve this with any result, means that we did not
|
|
# raise a MultipleSymbolError.
|
|
result = finder.lookup_symbol(
|
|
'FOO/B',
|
|
date + timedelta(days=90),
|
|
fuzzy=True
|
|
)
|
|
self.assertEqual(result.sid, 2)
|
|
|
|
def test_lookup_generic_handle_missing(self):
|
|
data = pd.DataFrame.from_records(
|
|
[
|
|
{
|
|
'sid': 0,
|
|
'symbol': 'real',
|
|
'start_date': pd.Timestamp('2013-1-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
'exchange': 'TEST',
|
|
},
|
|
{
|
|
'sid': 1,
|
|
'symbol': 'also_real',
|
|
'start_date': pd.Timestamp('2013-1-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
'exchange': 'TEST',
|
|
},
|
|
# Sid whose end date is before our query date. We should
|
|
# still correctly find it.
|
|
{
|
|
'sid': 2,
|
|
'symbol': 'real_but_old',
|
|
'start_date': pd.Timestamp('2002-1-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2003-1-1', tz='UTC'),
|
|
'exchange': 'TEST',
|
|
},
|
|
# Sid whose start_date is **after** our query date. We should
|
|
# **not** find it.
|
|
{
|
|
'sid': 3,
|
|
'symbol': 'real_but_in_the_future',
|
|
'start_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2020-1-1', tz='UTC'),
|
|
'exchange': 'THE FUTURE',
|
|
},
|
|
]
|
|
)
|
|
self.write_assets(equities=data)
|
|
finder = self.asset_finder
|
|
results, missing = finder.lookup_generic(
|
|
['REAL', 1, 'FAKE', 'REAL_BUT_OLD', 'REAL_BUT_IN_THE_FUTURE'],
|
|
pd.Timestamp('2013-02-01', tz='UTC'),
|
|
country_code=None,
|
|
)
|
|
|
|
self.assertEqual(len(results), 3)
|
|
self.assertEqual(results[0].symbol, 'REAL')
|
|
self.assertEqual(results[0].sid, 0)
|
|
self.assertEqual(results[1].symbol, 'ALSO_REAL')
|
|
self.assertEqual(results[1].sid, 1)
|
|
self.assertEqual(results[2].symbol, 'REAL_BUT_OLD')
|
|
self.assertEqual(results[2].sid, 2)
|
|
|
|
self.assertEqual(len(missing), 2)
|
|
self.assertEqual(missing[0], 'FAKE')
|
|
self.assertEqual(missing[1], 'REAL_BUT_IN_THE_FUTURE')
|
|
|
|
def test_lookup_generic_multiple_symbols_across_countries(self):
|
|
data = pd.DataFrame.from_records(
|
|
[
|
|
{
|
|
'sid': 0,
|
|
'symbol': 'real',
|
|
'start_date': pd.Timestamp('2013-1-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
'exchange': 'US_EXCHANGE',
|
|
},
|
|
{
|
|
'sid': 1,
|
|
'symbol': 'real',
|
|
'start_date': pd.Timestamp('2013-1-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
'exchange': 'CA_EXCHANGE',
|
|
},
|
|
]
|
|
)
|
|
exchanges = pd.DataFrame.from_records([
|
|
{'exchange': 'US_EXCHANGE', 'country_code': 'US'},
|
|
{'exchange': 'CA_EXCHANGE', 'country_code': 'CA'},
|
|
])
|
|
|
|
self.write_assets(equities=data, exchanges=exchanges)
|
|
|
|
# looking up a symbol shared by two assets across countries should
|
|
# raise a SameSymbolUsedAcrossCountries if a country code is not passed
|
|
with self.assertRaises(SameSymbolUsedAcrossCountries):
|
|
self.asset_finder.lookup_generic(
|
|
'real',
|
|
as_of_date=pd.Timestamp('2014-1-1', tz='UTC'),
|
|
country_code=None,
|
|
)
|
|
|
|
with self.assertRaises(SameSymbolUsedAcrossCountries):
|
|
self.asset_finder.lookup_generic(
|
|
'real',
|
|
as_of_date=None,
|
|
country_code=None,
|
|
)
|
|
|
|
matches, missing = self.asset_finder.lookup_generic(
|
|
'real',
|
|
as_of_date=pd.Timestamp('2014-1-1', tz='UTC'),
|
|
country_code='US',
|
|
)
|
|
self.assertEqual([matches], [self.asset_finder.retrieve_asset(0)])
|
|
self.assertEqual(missing, [])
|
|
|
|
matches, missing = self.asset_finder.lookup_generic(
|
|
'real',
|
|
as_of_date=pd.Timestamp('2014-1-1', tz='UTC'),
|
|
country_code='CA',
|
|
)
|
|
self.assertEqual([matches], [self.asset_finder.retrieve_asset(1)])
|
|
self.assertEqual(missing, [])
|
|
|
|
def test_security_dates_warning(self):
|
|
|
|
# Build an asset with an end_date
|
|
eq_end = pd.Timestamp('2012-01-01', tz='UTC')
|
|
equity_asset = Equity(1, symbol="TESTEQ", end_date=eq_end,
|
|
exchange_info=ExchangeInfo("TEST", "TEST", "??"))
|
|
|
|
# Catch all warnings
|
|
with warnings.catch_warnings(record=True) as w:
|
|
# Cause all warnings to always be triggered
|
|
warnings.simplefilter("always")
|
|
equity_asset.security_start_date
|
|
equity_asset.security_end_date
|
|
equity_asset.security_name
|
|
# Verify the warning
|
|
self.assertEqual(3, len(w))
|
|
for warning in w:
|
|
self.assertTrue(issubclass(warning.category,
|
|
DeprecationWarning))
|
|
|
|
def test_compute_lifetimes(self):
|
|
assets_per_exchange = 4
|
|
trading_day = self.trading_calendar.day
|
|
first_start = pd.Timestamp('2015-04-01', tz='UTC')
|
|
|
|
equities = pd.concat(
|
|
[
|
|
make_rotating_equity_info(
|
|
num_assets=assets_per_exchange,
|
|
first_start=first_start,
|
|
frequency=trading_day,
|
|
periods_between_starts=3,
|
|
asset_lifetime=5,
|
|
exchange=exchange,
|
|
)
|
|
for exchange in (
|
|
'US_EXCHANGE_1',
|
|
'US_EXCHANGE_2',
|
|
'CA_EXCHANGE',
|
|
'JP_EXCHANGE',
|
|
)
|
|
],
|
|
ignore_index=True,
|
|
)
|
|
# make every symbol unique
|
|
equities['symbol'] = list(string.ascii_uppercase[:len(equities)])
|
|
|
|
# shuffle up the sids so they are not contiguous per exchange
|
|
sids = np.arange(len(equities))
|
|
np.random.RandomState(1337).shuffle(sids)
|
|
equities.index = sids
|
|
permute_sid = dict(zip(sids, range(len(sids)))).__getitem__
|
|
|
|
exchanges = pd.DataFrame.from_records([
|
|
{'exchange': 'US_EXCHANGE_1', 'country_code': 'US'},
|
|
{'exchange': 'US_EXCHANGE_2', 'country_code': 'US'},
|
|
{'exchange': 'CA_EXCHANGE', 'country_code': 'CA'},
|
|
{'exchange': 'JP_EXCHANGE', 'country_code': 'JP'},
|
|
])
|
|
sids_by_country = {
|
|
'US': equities.index[:2 * assets_per_exchange],
|
|
'CA': equities.index[
|
|
2 * assets_per_exchange:3 * assets_per_exchange
|
|
],
|
|
'JP': equities.index[3 * assets_per_exchange:],
|
|
}
|
|
self.write_assets(equities=equities, exchanges=exchanges)
|
|
finder = self.asset_finder
|
|
|
|
all_dates = pd.date_range(
|
|
start=first_start,
|
|
end=equities.end_date.max(),
|
|
freq=trading_day,
|
|
)
|
|
|
|
for dates in all_subindices(all_dates):
|
|
expected_with_start_raw = full(
|
|
shape=(len(dates), assets_per_exchange),
|
|
fill_value=False,
|
|
dtype=bool,
|
|
)
|
|
expected_no_start_raw = full(
|
|
shape=(len(dates), assets_per_exchange),
|
|
fill_value=False,
|
|
dtype=bool,
|
|
)
|
|
|
|
for i, date in enumerate(dates):
|
|
it = equities.iloc[:4][['start_date', 'end_date']].itertuples(
|
|
index=False,
|
|
)
|
|
for j, (start, end) in enumerate(it):
|
|
# This way of doing the checks is redundant, but very
|
|
# clear.
|
|
if start <= date <= end:
|
|
expected_with_start_raw[i, j] = True
|
|
if start < date:
|
|
expected_no_start_raw[i, j] = True
|
|
|
|
for country_codes in powerset(exchanges.country_code.unique()):
|
|
expected_sids = pd.Int64Index(sorted(concat(
|
|
sids_by_country[country_code]
|
|
for country_code in country_codes
|
|
)))
|
|
permuted_sids = [
|
|
sid for sid in sorted(expected_sids, key=permute_sid)
|
|
]
|
|
tile_count = len(country_codes) + ('US' in country_codes)
|
|
expected_with_start = pd.DataFrame(
|
|
data=np.tile(
|
|
expected_with_start_raw,
|
|
tile_count,
|
|
),
|
|
index=dates,
|
|
columns=pd.Int64Index(permuted_sids),
|
|
)
|
|
result = finder.lifetimes(
|
|
dates,
|
|
include_start_date=True,
|
|
country_codes=country_codes,
|
|
)
|
|
assert_equal(result.columns, expected_sids)
|
|
result = result[permuted_sids]
|
|
assert_equal(result, expected_with_start)
|
|
|
|
expected_no_start = pd.DataFrame(
|
|
data=np.tile(
|
|
expected_no_start_raw,
|
|
tile_count,
|
|
),
|
|
index=dates,
|
|
columns=pd.Int64Index(permuted_sids),
|
|
)
|
|
result = finder.lifetimes(
|
|
dates,
|
|
include_start_date=False,
|
|
country_codes=country_codes,
|
|
)
|
|
assert_equal(result.columns, expected_sids)
|
|
result = result[permuted_sids]
|
|
assert_equal(result, expected_no_start)
|
|
|
|
def test_sids(self):
|
|
# Ensure that the sids property of the AssetFinder is functioning
|
|
self.write_assets(equities=make_simple_equity_info(
|
|
[0, 1, 2],
|
|
pd.Timestamp('2014-01-01'),
|
|
pd.Timestamp('2014-01-02'),
|
|
))
|
|
self.assertEqual({0, 1, 2}, set(self.asset_finder.sids))
|
|
|
|
def test_lookup_by_supplementary_field(self):
|
|
equities = pd.DataFrame.from_records(
|
|
[
|
|
{
|
|
'sid': 0,
|
|
'symbol': 'A',
|
|
'start_date': pd.Timestamp('2013-1-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
'exchange': 'TEST',
|
|
},
|
|
{
|
|
'sid': 1,
|
|
'symbol': 'B',
|
|
'start_date': pd.Timestamp('2013-1-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
'exchange': 'TEST',
|
|
},
|
|
{
|
|
'sid': 2,
|
|
'symbol': 'C',
|
|
'start_date': pd.Timestamp('2013-7-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
'exchange': 'TEST',
|
|
},
|
|
]
|
|
)
|
|
|
|
equity_supplementary_mappings = pd.DataFrame.from_records(
|
|
[
|
|
{
|
|
'sid': 0,
|
|
'field': 'ALT_ID',
|
|
'value': '100000000',
|
|
'start_date': pd.Timestamp('2013-1-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2013-6-28', tz='UTC'),
|
|
},
|
|
{
|
|
'sid': 1,
|
|
'field': 'ALT_ID',
|
|
'value': '100000001',
|
|
'start_date': pd.Timestamp('2013-1-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
},
|
|
{
|
|
'sid': 0,
|
|
'field': 'ALT_ID',
|
|
'value': '100000002',
|
|
'start_date': pd.Timestamp('2013-7-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
},
|
|
{
|
|
'sid': 2,
|
|
'field': 'ALT_ID',
|
|
'value': '100000000',
|
|
'start_date': pd.Timestamp('2013-7-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
},
|
|
]
|
|
)
|
|
|
|
self.write_assets(
|
|
equities=equities,
|
|
equity_supplementary_mappings=equity_supplementary_mappings,
|
|
)
|
|
|
|
af = self.asset_finder
|
|
|
|
# Before sid 0 has changed ALT_ID.
|
|
dt = pd.Timestamp('2013-6-28', tz='UTC')
|
|
|
|
asset_0 = af.lookup_by_supplementary_field('ALT_ID', '100000000', dt)
|
|
self.assertEqual(asset_0.sid, 0)
|
|
|
|
asset_1 = af.lookup_by_supplementary_field('ALT_ID', '100000001', dt)
|
|
self.assertEqual(asset_1.sid, 1)
|
|
|
|
# We don't know about this ALT_ID yet.
|
|
with self.assertRaisesRegex(
|
|
ValueNotFoundForField,
|
|
"Value '{}' was not found for field '{}'.".format(
|
|
'100000002',
|
|
'ALT_ID',
|
|
)
|
|
):
|
|
af.lookup_by_supplementary_field('ALT_ID', '100000002', dt)
|
|
|
|
# After all assets have ended.
|
|
dt = pd.Timestamp('2014-01-02', tz='UTC')
|
|
|
|
asset_2 = af.lookup_by_supplementary_field('ALT_ID', '100000000', dt)
|
|
self.assertEqual(asset_2.sid, 2)
|
|
|
|
asset_1 = af.lookup_by_supplementary_field('ALT_ID', '100000001', dt)
|
|
self.assertEqual(asset_1.sid, 1)
|
|
|
|
asset_0 = af.lookup_by_supplementary_field('ALT_ID', '100000002', dt)
|
|
self.assertEqual(asset_0.sid, 0)
|
|
|
|
# At this point both sids 0 and 2 have held this value, so an
|
|
# as_of_date is required.
|
|
expected_in_repr = (
|
|
"Multiple occurrences of the value '{}' found for field '{}'."
|
|
).format('100000000', 'ALT_ID')
|
|
|
|
with self.assertRaisesRegex(
|
|
MultipleValuesFoundForField,
|
|
expected_in_repr,
|
|
):
|
|
af.lookup_by_supplementary_field('ALT_ID', '100000000', None)
|
|
|
|
def test_get_supplementary_field(self):
|
|
equities = pd.DataFrame.from_records(
|
|
[
|
|
{
|
|
'sid': 0,
|
|
'symbol': 'A',
|
|
'start_date': pd.Timestamp('2013-1-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
'exchange': 'TEST',
|
|
},
|
|
{
|
|
'sid': 1,
|
|
'symbol': 'B',
|
|
'start_date': pd.Timestamp('2013-1-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
'exchange': 'TEST',
|
|
},
|
|
{
|
|
'sid': 2,
|
|
'symbol': 'C',
|
|
'start_date': pd.Timestamp('2013-7-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
'exchange': 'TEST',
|
|
},
|
|
]
|
|
)
|
|
|
|
equity_supplementary_mappings = pd.DataFrame.from_records(
|
|
[
|
|
{
|
|
'sid': 0,
|
|
'field': 'ALT_ID',
|
|
'value': '100000000',
|
|
'start_date': pd.Timestamp('2013-1-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2013-6-28', tz='UTC'),
|
|
},
|
|
{
|
|
'sid': 1,
|
|
'field': 'ALT_ID',
|
|
'value': '100000001',
|
|
'start_date': pd.Timestamp('2013-1-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
},
|
|
{
|
|
'sid': 0,
|
|
'field': 'ALT_ID',
|
|
'value': '100000002',
|
|
'start_date': pd.Timestamp('2013-7-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
},
|
|
{
|
|
'sid': 2,
|
|
'field': 'ALT_ID',
|
|
'value': '100000000',
|
|
'start_date': pd.Timestamp('2013-7-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
},
|
|
]
|
|
)
|
|
|
|
self.write_assets(
|
|
equities=equities,
|
|
equity_supplementary_mappings=equity_supplementary_mappings,
|
|
)
|
|
finder = self.asset_finder
|
|
|
|
# Before sid 0 has changed ALT_ID and sid 2 has started.
|
|
dt = pd.Timestamp('2013-6-28', tz='UTC')
|
|
|
|
for sid, expected in [(0, '100000000'), (1, '100000001')]:
|
|
self.assertEqual(
|
|
finder.get_supplementary_field(sid, 'ALT_ID', dt),
|
|
expected,
|
|
)
|
|
|
|
# Since sid 2 has not yet started, we don't know about its
|
|
# ALT_ID.
|
|
with self.assertRaisesRegex(
|
|
NoValueForSid,
|
|
"No '{}' value found for sid '{}'.".format('ALT_ID', 2),
|
|
):
|
|
finder.get_supplementary_field(2, 'ALT_ID', dt),
|
|
|
|
# After all assets have ended.
|
|
dt = pd.Timestamp('2014-01-02', tz='UTC')
|
|
|
|
for sid, expected in [
|
|
(0, '100000002'), (1, '100000001'), (2, '100000000'),
|
|
]:
|
|
self.assertEqual(
|
|
finder.get_supplementary_field(sid, 'ALT_ID', dt),
|
|
expected,
|
|
)
|
|
|
|
# Sid 0 has historically held two values for ALT_ID by this dt.
|
|
with self.assertRaisesRegex(
|
|
MultipleValuesFoundForSid,
|
|
"Multiple '{}' values found for sid '{}'.".format('ALT_ID', 0),
|
|
):
|
|
finder.get_supplementary_field(0, 'ALT_ID', None),
|
|
|
|
def test_group_by_type(self):
|
|
equities = make_simple_equity_info(
|
|
range(5),
|
|
start_date=pd.Timestamp('2014-01-01'),
|
|
end_date=pd.Timestamp('2015-01-01'),
|
|
)
|
|
futures = make_commodity_future_info(
|
|
first_sid=6,
|
|
root_symbols=['CL'],
|
|
years=[2014],
|
|
)
|
|
# Intersecting sid queries, to exercise loading of partially-cached
|
|
# results.
|
|
queries = [
|
|
([0, 1, 3], [6, 7]),
|
|
([0, 2, 3], [7, 10]),
|
|
(list(equities.index), list(futures.index)),
|
|
]
|
|
self.write_assets(
|
|
equities=equities,
|
|
futures=futures,
|
|
)
|
|
finder = self.asset_finder
|
|
for equity_sids, future_sids in queries:
|
|
results = finder.group_by_type(equity_sids + future_sids)
|
|
self.assertEqual(
|
|
results,
|
|
{'equity': set(equity_sids), 'future': set(future_sids)},
|
|
)
|
|
|
|
@parameterized.expand([
|
|
(Equity, 'retrieve_equities', EquitiesNotFound),
|
|
(Future, 'retrieve_futures_contracts', FutureContractsNotFound),
|
|
])
|
|
def test_retrieve_specific_type(self, type_, lookup_name, failure_type):
|
|
equities = make_simple_equity_info(
|
|
range(5),
|
|
start_date=pd.Timestamp('2014-01-01'),
|
|
end_date=pd.Timestamp('2015-01-01'),
|
|
)
|
|
max_equity = equities.index.max()
|
|
futures = make_commodity_future_info(
|
|
first_sid=max_equity + 1,
|
|
root_symbols=['CL'],
|
|
years=[2014],
|
|
)
|
|
equity_sids = [0, 1]
|
|
future_sids = [max_equity + 1, max_equity + 2, max_equity + 3]
|
|
if type_ == Equity:
|
|
success_sids = equity_sids
|
|
fail_sids = future_sids
|
|
else:
|
|
fail_sids = equity_sids
|
|
success_sids = future_sids
|
|
|
|
self.write_assets(
|
|
equities=equities,
|
|
futures=futures,
|
|
)
|
|
finder = self.asset_finder
|
|
# Run twice to exercise caching.
|
|
lookup = getattr(finder, lookup_name)
|
|
for _ in range(2):
|
|
results = lookup(success_sids)
|
|
self.assertIsInstance(results, dict)
|
|
self.assertEqual(set(results.keys()), set(success_sids))
|
|
self.assertEqual(
|
|
valmap(int, results),
|
|
dict(zip(success_sids, success_sids)),
|
|
)
|
|
self.assertEqual(
|
|
{type_},
|
|
{type(asset) for asset in itervalues(results)},
|
|
)
|
|
with self.assertRaises(failure_type):
|
|
lookup(fail_sids)
|
|
with self.assertRaises(failure_type):
|
|
# Should fail if **any** of the assets are bad.
|
|
lookup([success_sids[0], fail_sids[0]])
|
|
|
|
def test_retrieve_all(self):
|
|
equities = make_simple_equity_info(
|
|
range(5),
|
|
start_date=pd.Timestamp('2014-01-01'),
|
|
end_date=pd.Timestamp('2015-01-01'),
|
|
)
|
|
max_equity = equities.index.max()
|
|
futures = make_commodity_future_info(
|
|
first_sid=max_equity + 1,
|
|
root_symbols=['CL'],
|
|
years=[2014],
|
|
)
|
|
self.write_assets(
|
|
equities=equities,
|
|
futures=futures,
|
|
)
|
|
finder = self.asset_finder
|
|
all_sids = finder.sids
|
|
self.assertEqual(len(all_sids), len(equities) + len(futures))
|
|
queries = [
|
|
# Empty Query.
|
|
(),
|
|
# Only Equities.
|
|
tuple(equities.index[:2]),
|
|
# Only Futures.
|
|
tuple(futures.index[:3]),
|
|
# Mixed, all cache misses.
|
|
tuple(equities.index[2:]) + tuple(futures.index[3:]),
|
|
# Mixed, all cache hits.
|
|
tuple(equities.index[2:]) + tuple(futures.index[3:]),
|
|
# Everything.
|
|
all_sids,
|
|
all_sids,
|
|
]
|
|
for sids in queries:
|
|
equity_sids = [i for i in sids if i <= max_equity]
|
|
future_sids = [i for i in sids if i > max_equity]
|
|
results = finder.retrieve_all(sids)
|
|
self.assertEqual(sids, tuple(map(int, results)))
|
|
|
|
self.assertEqual(
|
|
[Equity for _ in equity_sids] +
|
|
[Future for _ in future_sids],
|
|
list(map(type, results)),
|
|
)
|
|
self.assertEqual(
|
|
(
|
|
list(equities.symbol.loc[equity_sids]) +
|
|
list(futures.symbol.loc[future_sids])
|
|
),
|
|
list(asset.symbol for asset in results),
|
|
)
|
|
|
|
@parameterized.expand([
|
|
(EquitiesNotFound, 'equity', 'equities'),
|
|
(FutureContractsNotFound, 'future contract', 'future contracts'),
|
|
(SidsNotFound, 'asset', 'assets'),
|
|
])
|
|
def test_error_message_plurality(self,
|
|
error_type,
|
|
singular,
|
|
plural):
|
|
try:
|
|
raise error_type(sids=[1])
|
|
except error_type as e:
|
|
self.assertEqual(
|
|
str(e),
|
|
"No {singular} found for sid: 1.".format(singular=singular)
|
|
)
|
|
try:
|
|
raise error_type(sids=[1, 2])
|
|
except error_type as e:
|
|
self.assertEqual(
|
|
str(e),
|
|
"No {plural} found for sids: [1, 2].".format(plural=plural)
|
|
)
|
|
|
|
|
|
class AssetFinderMultipleCountries(WithTradingCalendars, ZiplineTestCase):
|
|
def write_assets(self, **kwargs):
|
|
self._asset_writer.write(**kwargs)
|
|
|
|
def init_instance_fixtures(self):
|
|
super(AssetFinderMultipleCountries, self).init_instance_fixtures()
|
|
|
|
conn = self.enter_instance_context(empty_assets_db())
|
|
self._asset_writer = AssetDBWriter(conn)
|
|
self.asset_finder = AssetFinder(conn)
|
|
|
|
@staticmethod
|
|
def country_code(n):
|
|
return 'A' + chr(ord('A') + n)
|
|
|
|
def test_lookup_symbol_delimited(self):
|
|
as_of = pd.Timestamp('2013-01-01', tz='UTC')
|
|
num_assets = 3
|
|
sids = list(range(num_assets))
|
|
frame = pd.DataFrame.from_records(
|
|
[
|
|
{
|
|
'sid': sid,
|
|
'symbol': 'TEST.A',
|
|
'company_name': "company %d" % sid,
|
|
'start_date': as_of.value,
|
|
'end_date': as_of.value,
|
|
'exchange': 'EXCHANGE %d' % sid,
|
|
}
|
|
for sid in sids
|
|
]
|
|
)
|
|
|
|
exchanges = pd.DataFrame({
|
|
'exchange': frame['exchange'],
|
|
'country_code': [self.country_code(n) for n in range(num_assets)],
|
|
})
|
|
self.write_assets(equities=frame, exchanges=exchanges)
|
|
finder = self.asset_finder
|
|
assets = finder.retrieve_all(sids)
|
|
|
|
def shouldnt_resolve(ticker):
|
|
with self.assertRaises(SymbolNotFound):
|
|
finder.lookup_symbol(ticker, as_of)
|
|
for n in range(num_assets):
|
|
with self.assertRaises(SymbolNotFound):
|
|
finder.lookup_symbol(
|
|
ticker,
|
|
as_of,
|
|
country_code=self.country_code(n),
|
|
)
|
|
|
|
# we do it twice to catch caching bugs
|
|
for _ in range(2):
|
|
shouldnt_resolve('TEST')
|
|
shouldnt_resolve('TESTA')
|
|
# '@' is not a supported delimiter
|
|
shouldnt_resolve('TEST@A')
|
|
|
|
# Adding an unnecessary delimiter shouldn't matter.
|
|
for delimiter in '-', '/', '_', '.':
|
|
ticker = 'TEST%sA' % delimiter
|
|
with self.assertRaises(SameSymbolUsedAcrossCountries):
|
|
finder.lookup_symbol(ticker, as_of)
|
|
|
|
for n in range(num_assets):
|
|
actual_asset = finder.lookup_symbol(
|
|
ticker,
|
|
as_of,
|
|
country_code=self.country_code(n),
|
|
)
|
|
assert_equal(actual_asset, assets[n])
|
|
assert_equal(
|
|
actual_asset.exchange_info.country_code,
|
|
self.country_code(n),
|
|
)
|
|
|
|
def test_lookup_symbol_fuzzy(self):
|
|
num_countries = 3
|
|
metadata = pd.DataFrame.from_records([
|
|
{'symbol': symbol, 'exchange': 'EXCHANGE %d' % n}
|
|
for n in range(num_countries)
|
|
for symbol in ('PRTY_HRD', 'BRKA', 'BRK_A')
|
|
])
|
|
exchanges = pd.DataFrame({
|
|
'exchange': metadata['exchange'].unique(),
|
|
'country_code': list(map(self.country_code, range(num_countries))),
|
|
})
|
|
self.write_assets(equities=metadata, exchanges=exchanges)
|
|
finder = self.asset_finder
|
|
dt = pd.Timestamp('2013-01-01', tz='UTC')
|
|
|
|
# Try combos of looking up PRTYHRD with and without a time or fuzzy
|
|
# Both non-fuzzys get no result
|
|
with self.assertRaises(SymbolNotFound):
|
|
finder.lookup_symbol('PRTYHRD', None)
|
|
with self.assertRaises(SymbolNotFound):
|
|
finder.lookup_symbol('PRTYHRD', dt)
|
|
|
|
for n in range(num_countries):
|
|
# Given that this ticker isn't defined in any country, explicitly
|
|
# passing a country code should still fail.
|
|
with self.assertRaises(SymbolNotFound):
|
|
finder.lookup_symbol(
|
|
'PRTYHRD',
|
|
None,
|
|
country_code=self.country_code(n),
|
|
)
|
|
with self.assertRaises(SymbolNotFound):
|
|
finder.lookup_symbol(
|
|
'PRTYHRD',
|
|
dt,
|
|
country_code=self.country_code(n),
|
|
)
|
|
|
|
with self.assertRaises(MultipleSymbolsFoundForFuzzySymbol):
|
|
finder.lookup_symbol('PRTYHRD', None, fuzzy=True)
|
|
|
|
with self.assertRaises(MultipleSymbolsFoundForFuzzySymbol):
|
|
finder.lookup_symbol('PRTYHRD', dt, fuzzy=True)
|
|
|
|
# if more than one asset is fuzzy matched within the same country,
|
|
# raise an error
|
|
with self.assertRaises(MultipleSymbolsFoundForFuzzySymbol):
|
|
finder.lookup_symbol('BRK.A', None, country_code='AA', fuzzy=True)
|
|
|
|
def check_sid(expected_sid, ticker, country_code):
|
|
params = (
|
|
{'as_of_date': None},
|
|
{'as_of_date': dt},
|
|
{'as_of_date': None, 'fuzzy': True},
|
|
{'as_of_date': dt, 'fuzzy': True},
|
|
)
|
|
for extra_params in params:
|
|
if 'fuzzy' in extra_params:
|
|
expected_error = MultipleSymbolsFoundForFuzzySymbol
|
|
else:
|
|
expected_error = SameSymbolUsedAcrossCountries
|
|
|
|
with self.assertRaises(expected_error):
|
|
finder.lookup_symbol(ticker, **extra_params)
|
|
|
|
self.assertEqual(
|
|
expected_sid,
|
|
finder.lookup_symbol(
|
|
ticker,
|
|
country_code=country_code,
|
|
**extra_params
|
|
),
|
|
)
|
|
|
|
for n in range(num_countries):
|
|
check_sid(n * 3, 'PRTY_HRD', self.country_code(n))
|
|
check_sid(n * 3 + 1, 'BRKA', self.country_code(n))
|
|
check_sid(n * 3 + 2, 'BRK_A', self.country_code(n))
|
|
|
|
def test_lookup_symbol_change_ticker(self):
|
|
T = partial(pd.Timestamp, tz='utc')
|
|
num_countries = 3
|
|
metadata = pd.DataFrame.from_records(
|
|
[
|
|
# first sid per country
|
|
{
|
|
'symbol': 'A',
|
|
'asset_name': 'Asset A',
|
|
'start_date': T('2014-01-01'),
|
|
'end_date': T('2014-01-05'),
|
|
},
|
|
{
|
|
'symbol': 'B',
|
|
'asset_name': 'Asset B',
|
|
'start_date': T('2014-01-06'),
|
|
'end_date': T('2014-01-10'),
|
|
},
|
|
|
|
# second sid per country
|
|
{
|
|
'symbol': 'C',
|
|
'asset_name': 'Asset C',
|
|
'start_date': T('2014-01-01'),
|
|
'end_date': T('2014-01-05'),
|
|
},
|
|
{
|
|
'symbol': 'A', # claiming the unused symbol 'A'
|
|
'asset_name': 'Asset A',
|
|
'start_date': T('2014-01-06'),
|
|
'end_date': T('2014-01-10'),
|
|
},
|
|
] * num_countries,
|
|
index=np.repeat(np.arange(num_countries * 2), 2),
|
|
)
|
|
metadata['exchange'] = np.repeat(
|
|
['EXCHANGE %d' % n for n in range(num_countries)],
|
|
4,
|
|
)
|
|
exchanges = pd.DataFrame({
|
|
'exchange': ['EXCHANGE %d' % n for n in range(num_countries)],
|
|
'country_code': [
|
|
self.country_code(n) for n in range(num_countries)
|
|
]
|
|
})
|
|
self.write_assets(equities=metadata, exchanges=exchanges)
|
|
finder = self.asset_finder
|
|
|
|
def assert_doesnt_resolve(symbol, as_of_date):
|
|
# check across all countries
|
|
with self.assertRaises(SymbolNotFound):
|
|
finder.lookup_symbol(symbol, as_of_date)
|
|
|
|
# check in each country individually
|
|
for n in range(num_countries):
|
|
with self.assertRaises(SymbolNotFound):
|
|
finder.lookup_symbol(
|
|
symbol,
|
|
as_of_date,
|
|
country_code=self.country_code(n),
|
|
)
|
|
|
|
def assert_resolves_in_each_country(symbol,
|
|
as_of_date,
|
|
sid_from_country_ix,
|
|
expected_symbol,
|
|
expected_name):
|
|
# ensure this is ambiguous across all countries
|
|
with self.assertRaises(SameSymbolUsedAcrossCountries):
|
|
finder.lookup_symbol(symbol, as_of_date)
|
|
|
|
for n in range(num_countries):
|
|
result = finder.lookup_symbol(
|
|
symbol,
|
|
as_of_date,
|
|
country_code=self.country_code(n),
|
|
)
|
|
assert_equal(
|
|
result,
|
|
finder.retrieve_asset(sid_from_country_ix(n)),
|
|
msg=str(asof),
|
|
)
|
|
# The symbol and asset_name should always be the last held
|
|
# values
|
|
assert_equal(result.symbol, expected_symbol)
|
|
assert_equal(result.asset_name, expected_name)
|
|
|
|
# note: these assertions walk forward in time, starting at assertions
|
|
# about ownership before the start_date and ending with assertions
|
|
# after the end_date; new assertions should be inserted in the correct
|
|
# locations
|
|
|
|
# no one held 'A' before 01
|
|
assert_doesnt_resolve('A', T('2013-12-31'))
|
|
|
|
# no one held 'C' before 01
|
|
assert_doesnt_resolve('C', T('2013-12-31'))
|
|
|
|
for asof in pd.date_range('2014-01-01', '2014-01-05', tz='utc'):
|
|
# from 01 through 05 the first sid on the exchange held 'A'
|
|
assert_resolves_in_each_country(
|
|
'A',
|
|
asof,
|
|
sid_from_country_ix=lambda n: n * 2,
|
|
expected_symbol='B',
|
|
expected_name='Asset B',
|
|
)
|
|
|
|
# from 01 through 05 the second sid on the exchange held 'C'
|
|
assert_resolves_in_each_country(
|
|
'C',
|
|
asof,
|
|
sid_from_country_ix=lambda n: n * 2 + 1,
|
|
expected_symbol='A',
|
|
expected_name='Asset A',
|
|
)
|
|
|
|
# no one held 'B' before 06
|
|
assert_doesnt_resolve('B', T('2014-01-05'))
|
|
|
|
# no one held 'C' after 06, however, no one has claimed it yet
|
|
# so it still maps to sid 1
|
|
assert_resolves_in_each_country(
|
|
'C',
|
|
T('2014-01-07'),
|
|
sid_from_country_ix=lambda n: n * 2 + 1,
|
|
expected_symbol='A',
|
|
expected_name='Asset A',
|
|
)
|
|
|
|
for asof in pd.date_range('2014-01-06', '2014-01-11', tz='utc'):
|
|
# from 06 through 10 sid 0 held 'B'
|
|
# we test through the 11th because sid 1 is the last to hold 'B'
|
|
# so it should ffill
|
|
assert_resolves_in_each_country(
|
|
'B',
|
|
asof,
|
|
sid_from_country_ix=lambda n: n * 2,
|
|
expected_symbol='B',
|
|
expected_name='Asset B',
|
|
)
|
|
|
|
# from 06 through 10 sid 1 held 'A'
|
|
# we test through the 11th because sid 1 is the last to hold 'A'
|
|
# so it should ffill
|
|
assert_resolves_in_each_country(
|
|
'A',
|
|
asof,
|
|
sid_from_country_ix=lambda n: n * 2 + 1,
|
|
expected_symbol='A',
|
|
expected_name='Asset A',
|
|
)
|
|
|
|
def test_lookup_symbol(self):
|
|
num_countries = 3
|
|
# Incrementing by two so that start and end dates for each
|
|
# generated Asset don't overlap (each Asset's end_date is the
|
|
# day after its start date.)
|
|
dates = pd.date_range('2013-01-01', freq='2D', periods=5, tz='UTC')
|
|
df = pd.DataFrame.from_records(
|
|
[
|
|
{
|
|
'sid': n * len(dates) + i,
|
|
'symbol': 'existing',
|
|
'start_date': date.value,
|
|
'end_date': (date + timedelta(days=1)).value,
|
|
'exchange': 'EXCHANGE %d' % n,
|
|
}
|
|
for n in range(num_countries)
|
|
for i, date in enumerate(dates)
|
|
]
|
|
)
|
|
exchanges = pd.DataFrame({
|
|
'exchange': ['EXCHANGE %d' % n for n in range(num_countries)],
|
|
'country_code': [
|
|
self.country_code(n) for n in range(num_countries)
|
|
],
|
|
})
|
|
self.write_assets(equities=df, exchanges=exchanges)
|
|
finder = self.asset_finder
|
|
for _ in range(2): # Run checks twice to test for caching bugs.
|
|
with self.assertRaises(SymbolNotFound):
|
|
finder.lookup_symbol('NON_EXISTING', dates[0])
|
|
for n in range(num_countries):
|
|
with self.assertRaises(SymbolNotFound):
|
|
finder.lookup_symbol(
|
|
'NON_EXISTING',
|
|
dates[0],
|
|
country_code=self.country_code(n),
|
|
)
|
|
|
|
with self.assertRaises(SameSymbolUsedAcrossCountries):
|
|
finder.lookup_symbol('EXISTING', None)
|
|
|
|
for n in range(num_countries):
|
|
with self.assertRaises(MultipleSymbolsFound):
|
|
finder.lookup_symbol(
|
|
'EXISTING',
|
|
None,
|
|
country_code=self.country_code(n),
|
|
)
|
|
|
|
for i, date in enumerate(dates):
|
|
# Verify that we correctly resolve multiple symbols using
|
|
# the supplied date
|
|
with self.assertRaises(SameSymbolUsedAcrossCountries):
|
|
finder.lookup_symbol('EXISTING', date)
|
|
|
|
for n in range(num_countries):
|
|
result = finder.lookup_symbol(
|
|
'EXISTING',
|
|
date,
|
|
country_code=self.country_code(n),
|
|
)
|
|
self.assertEqual(result.symbol, 'EXISTING')
|
|
expected_sid = n * len(dates) + i
|
|
self.assertEqual(result.sid, expected_sid)
|
|
|
|
def test_fail_to_write_overlapping_data(self):
|
|
num_countries = 3
|
|
df = pd.DataFrame.from_records(concat(
|
|
[
|
|
{
|
|
'sid': n * 3,
|
|
'symbol': 'multiple',
|
|
'start_date': pd.Timestamp('2010-01-01'),
|
|
'end_date': pd.Timestamp('2012-01-01'),
|
|
'exchange': 'EXCHANGE %d' % n,
|
|
},
|
|
# Same as asset 1, but with a later end date.
|
|
{
|
|
'sid': n * 3 + 1,
|
|
'symbol': 'multiple',
|
|
'start_date': pd.Timestamp('2010-01-01'),
|
|
'end_date': pd.Timestamp('2013-01-01'),
|
|
'exchange': 'EXCHANGE %d' % n,
|
|
},
|
|
# Same as asset 1, but with a later start_date
|
|
{
|
|
'sid': n * 3 + 2,
|
|
'symbol': 'multiple',
|
|
'start_date': pd.Timestamp('2011-01-01'),
|
|
'end_date': pd.Timestamp('2012-01-01'),
|
|
'exchange': 'EXCHANGE %d' % n,
|
|
},
|
|
]
|
|
for n in range(num_countries)
|
|
))
|
|
exchanges = pd.DataFrame({
|
|
'exchange': ['EXCHANGE %d' % n for n in range(num_countries)],
|
|
'country_code': [
|
|
self.country_code(n) for n in range(num_countries)
|
|
],
|
|
})
|
|
|
|
with self.assertRaises(ValueError) as e:
|
|
self.write_assets(equities=df, exchanges=exchanges)
|
|
|
|
expected_error_msg = (
|
|
"Ambiguous ownership for 3 symbols, multiple assets held the"
|
|
" following symbols:\n"
|
|
"MULTIPLE (%s):\n"
|
|
" intersections: (('2010-01-01 00:00:00', '2012-01-01 00:00:00'),"
|
|
" ('2011-01-01 00:00:00', '2012-01-01 00:00:00'))\n"
|
|
" start_date end_date\n"
|
|
" sid \n"
|
|
" 0 2010-01-01 2012-01-01\n"
|
|
" 1 2010-01-01 2013-01-01\n"
|
|
" 2 2011-01-01 2012-01-01\n"
|
|
"MULTIPLE (%s):\n"
|
|
" intersections: (('2010-01-01 00:00:00', '2012-01-01 00:00:00'),"
|
|
" ('2011-01-01 00:00:00', '2012-01-01 00:00:00'))\n"
|
|
" start_date end_date\n"
|
|
" sid \n"
|
|
" 3 2010-01-01 2012-01-01\n"
|
|
" 4 2010-01-01 2013-01-01\n"
|
|
" 5 2011-01-01 2012-01-01\n"
|
|
"MULTIPLE (%s):\n"
|
|
" intersections: (('2010-01-01 00:00:00', '2012-01-01 00:00:00'),"
|
|
" ('2011-01-01 00:00:00', '2012-01-01 00:00:00'))\n"
|
|
" start_date end_date\n"
|
|
" sid \n"
|
|
" 6 2010-01-01 2012-01-01\n"
|
|
" 7 2010-01-01 2013-01-01\n"
|
|
" 8 2011-01-01 2012-01-01" % (
|
|
self.country_code(0),
|
|
self.country_code(1),
|
|
self.country_code(2),
|
|
)
|
|
)
|
|
self.assertEqual(str(e.exception), expected_error_msg)
|
|
|
|
def test_endless_multiple_resolves(self):
|
|
"""
|
|
Situation:
|
|
1. Asset 1 w/ symbol FOOB changes to FOO_B, and then is delisted.
|
|
2. Asset 2 is listed with symbol FOO_B.
|
|
|
|
If someone asks for FOO_B with fuzzy matching after 2 has been listed,
|
|
they should be able to correctly get 2.
|
|
"""
|
|
|
|
date = pd.Timestamp('2013-01-01', tz='UTC')
|
|
num_countries = 3
|
|
df = pd.DataFrame.from_records(concat(
|
|
[
|
|
{
|
|
'sid': n * 2,
|
|
'symbol': 'FOOB',
|
|
'start_date': date.value,
|
|
'end_date': date.max.value,
|
|
'exchange': 'EXCHANGE %d' % n,
|
|
},
|
|
{
|
|
'sid': n * 2,
|
|
'symbol': 'FOO_B',
|
|
'start_date': (date + timedelta(days=31)).value,
|
|
'end_date': (date + timedelta(days=60)).value,
|
|
'exchange': 'EXCHANGE %d' % n,
|
|
},
|
|
{
|
|
'sid': n * 2 + 1,
|
|
'symbol': 'FOO_B',
|
|
'start_date': (date + timedelta(days=61)).value,
|
|
'end_date': date.max.value,
|
|
'exchange': 'EXCHANGE %d' % n,
|
|
},
|
|
]
|
|
for n in range(num_countries)
|
|
))
|
|
exchanges = pd.DataFrame({
|
|
'exchange': ['EXCHANGE %d' % n for n in range(num_countries)],
|
|
'country_code': [
|
|
self.country_code(n) for n in range(num_countries)
|
|
],
|
|
})
|
|
self.write_assets(equities=df, exchanges=exchanges)
|
|
finder = self.asset_finder
|
|
|
|
with self.assertRaises(MultipleSymbolsFoundForFuzzySymbol):
|
|
finder.lookup_symbol(
|
|
'FOO/B',
|
|
date + timedelta(days=90),
|
|
fuzzy=True,
|
|
)
|
|
|
|
for n in range(num_countries):
|
|
result = finder.lookup_symbol(
|
|
'FOO/B',
|
|
date + timedelta(days=90),
|
|
fuzzy=True,
|
|
country_code=self.country_code(n)
|
|
)
|
|
self.assertEqual(result.sid, n * 2 + 1)
|
|
|
|
|
|
class TestAssetDBVersioning(ZiplineTestCase):
|
|
|
|
def init_instance_fixtures(self):
|
|
super(TestAssetDBVersioning, self).init_instance_fixtures()
|
|
self.engine = eng = self.enter_instance_context(empty_assets_db())
|
|
self.metadata = sa.MetaData(eng, reflect=True)
|
|
|
|
def test_check_version(self):
|
|
version_table = self.metadata.tables['version_info']
|
|
|
|
# This should not raise an error
|
|
check_version_info(self.engine, version_table, ASSET_DB_VERSION)
|
|
|
|
# This should fail because the version is too low
|
|
with self.assertRaises(AssetDBVersionError):
|
|
check_version_info(
|
|
self.engine,
|
|
version_table,
|
|
ASSET_DB_VERSION - 1,
|
|
)
|
|
|
|
# This should fail because the version is too high
|
|
with self.assertRaises(AssetDBVersionError):
|
|
check_version_info(
|
|
self.engine,
|
|
version_table,
|
|
ASSET_DB_VERSION + 1,
|
|
)
|
|
|
|
def test_write_version(self):
|
|
version_table = self.metadata.tables['version_info']
|
|
version_table.delete().execute()
|
|
|
|
# Assert that the version is not present in the table
|
|
self.assertIsNone(sa.select((version_table.c.version,)).scalar())
|
|
|
|
# This should fail because the table has no version info and is,
|
|
# therefore, consdered v0
|
|
with self.assertRaises(AssetDBVersionError):
|
|
check_version_info(self.engine, version_table, -2)
|
|
|
|
# This should not raise an error because the version has been written
|
|
write_version_info(self.engine, version_table, -2)
|
|
check_version_info(self.engine, version_table, -2)
|
|
|
|
# Assert that the version is in the table and correct
|
|
self.assertEqual(sa.select((version_table.c.version,)).scalar(), -2)
|
|
|
|
# Assert that trying to overwrite the version fails
|
|
with self.assertRaises(sa.exc.IntegrityError):
|
|
write_version_info(self.engine, version_table, -3)
|
|
|
|
def test_finder_checks_version(self):
|
|
version_table = self.metadata.tables['version_info']
|
|
version_table.delete().execute()
|
|
write_version_info(self.engine, version_table, -2)
|
|
check_version_info(self.engine, version_table, -2)
|
|
|
|
# Assert that trying to build a finder with a bad db raises an error
|
|
with self.assertRaises(AssetDBVersionError):
|
|
AssetFinder(engine=self.engine)
|
|
|
|
# Change the version number of the db to the correct version
|
|
version_table.delete().execute()
|
|
write_version_info(self.engine, version_table, ASSET_DB_VERSION)
|
|
check_version_info(self.engine, version_table, ASSET_DB_VERSION)
|
|
|
|
# Now that the versions match, this Finder should succeed
|
|
AssetFinder(engine=self.engine)
|
|
|
|
def test_downgrade(self):
|
|
# Attempt to downgrade a current assets db all the way down to v0
|
|
conn = self.engine.connect()
|
|
|
|
# first downgrade to v3
|
|
downgrade(self.engine, 3)
|
|
metadata = sa.MetaData(conn)
|
|
metadata.reflect()
|
|
check_version_info(conn, metadata.tables['version_info'], 3)
|
|
self.assertFalse('exchange_full' in metadata.tables)
|
|
|
|
# now go all the way to v0
|
|
downgrade(self.engine, 0)
|
|
|
|
# Verify that the db version is now 0
|
|
metadata = sa.MetaData(conn)
|
|
metadata.reflect()
|
|
version_table = metadata.tables['version_info']
|
|
check_version_info(conn, version_table, 0)
|
|
|
|
# Check some of the v1-to-v0 downgrades
|
|
self.assertTrue('futures_contracts' in metadata.tables)
|
|
self.assertTrue('version_info' in metadata.tables)
|
|
self.assertFalse('tick_size' in
|
|
metadata.tables['futures_contracts'].columns)
|
|
self.assertTrue('contract_multiplier' in
|
|
metadata.tables['futures_contracts'].columns)
|
|
|
|
def test_impossible_downgrade(self):
|
|
# Attempt to downgrade a current assets db to a
|
|
# higher-than-current version
|
|
with self.assertRaises(AssetDBImpossibleDowngrade):
|
|
downgrade(self.engine, ASSET_DB_VERSION + 5)
|
|
|
|
def test_v5_to_v4_selects_most_recent_ticker(self):
|
|
T = pd.Timestamp
|
|
equities = pd.DataFrame(
|
|
[['A', 'A', T('2014-01-01'), T('2014-01-02')],
|
|
['B', 'B', T('2014-01-01'), T('2014-01-02')],
|
|
# these two are both ticker sid 2
|
|
['B', 'C', T('2014-01-03'), T('2014-01-04')],
|
|
['C', 'C', T('2014-01-01'), T('2014-01-02')]],
|
|
index=[0, 1, 2, 2],
|
|
columns=['symbol', 'asset_name', 'start_date', 'end_date'],
|
|
)
|
|
equities['exchange'] = 'NYSE'
|
|
|
|
AssetDBWriter(self.engine).write(equities=equities)
|
|
|
|
downgrade(self.engine, 4)
|
|
metadata = sa.MetaData(self.engine)
|
|
metadata.reflect()
|
|
|
|
def select_fields(r):
|
|
return r.sid, r.symbol, r.asset_name, r.start_date, r.end_date
|
|
|
|
expected_data = {
|
|
(0, 'A', 'A', T('2014-01-01').value, T('2014-01-02').value),
|
|
(1, 'B', 'B', T('2014-01-01').value, T('2014-01-02').value),
|
|
(2, 'B', 'C', T('2014-01-01').value, T('2014-01-04').value),
|
|
}
|
|
actual_data = set(map(
|
|
select_fields,
|
|
sa.select(metadata.tables['equities'].c).execute(),
|
|
))
|
|
|
|
assert_equal(expected_data, actual_data)
|
|
|
|
def test_v7_to_v6_only_keeps_US(self):
|
|
T = pd.Timestamp
|
|
equities = pd.DataFrame(
|
|
[['A', T('2014-01-01'), T('2014-01-02'), 'NYSE'],
|
|
['B', T('2014-01-01'), T('2014-01-02'), 'JPX'],
|
|
['C', T('2014-01-03'), T('2014-01-04'), 'NYSE'],
|
|
['D', T('2014-01-01'), T('2014-01-02'), 'JPX']],
|
|
index=[0, 1, 2, 3],
|
|
columns=['symbol', 'start_date', 'end_date', 'exchange'],
|
|
)
|
|
exchanges = pd.DataFrame.from_records([
|
|
{'exchange': 'NYSE', 'country_code': 'US'},
|
|
{'exchange': 'JPX', 'country_code': 'JP'},
|
|
])
|
|
AssetDBWriter(self.engine).write(
|
|
equities=equities,
|
|
exchanges=exchanges,
|
|
)
|
|
|
|
downgrade(self.engine, 6)
|
|
metadata = sa.MetaData(self.engine)
|
|
metadata.reflect()
|
|
|
|
expected_sids = {0, 2}
|
|
actual_sids = set(map(
|
|
lambda r: r.sid,
|
|
sa.select(metadata.tables['equities'].c).execute(),
|
|
))
|
|
|
|
assert_equal(expected_sids, actual_sids)
|
|
|
|
|
|
class TestVectorizedSymbolLookup(WithAssetFinder, ZiplineTestCase):
|
|
|
|
@classmethod
|
|
def make_equity_info(cls):
|
|
T = partial(pd.Timestamp, tz='UTC')
|
|
|
|
def asset(sid, symbol, start_date, end_date):
|
|
return dict(
|
|
sid=sid,
|
|
symbol=symbol,
|
|
start_date=T(start_date),
|
|
end_date=T(end_date),
|
|
exchange='NYSE',
|
|
)
|
|
|
|
records = [
|
|
asset(1, 'A', '2014-01-02', '2014-01-31'),
|
|
asset(2, 'A', '2014-02-03', '2015-01-02'),
|
|
asset(3, 'B', '2014-01-02', '2014-01-15'),
|
|
asset(4, 'B', '2014-01-17', '2015-01-02'),
|
|
asset(5, 'C', '2001-01-02', '2015-01-02'),
|
|
asset(6, 'D', '2001-01-02', '2015-01-02'),
|
|
asset(7, 'FUZZY', '2001-01-02', '2015-01-02'),
|
|
]
|
|
return pd.DataFrame.from_records(records)
|
|
|
|
@parameter_space(
|
|
as_of=pd.to_datetime([
|
|
'2014-01-02',
|
|
'2014-01-15',
|
|
'2014-01-17',
|
|
'2015-01-02',
|
|
], utc=True),
|
|
symbols=[
|
|
[],
|
|
['A'], ['B'], ['C'], ['D'],
|
|
list('ABCD'),
|
|
list('ABCDDCBA'),
|
|
list('AABBAABBACABD'),
|
|
],
|
|
)
|
|
def test_lookup_symbols(self, as_of, symbols):
|
|
af = self.asset_finder
|
|
expected = [
|
|
af.lookup_symbol(symbol, as_of) for symbol in symbols
|
|
]
|
|
result = af.lookup_symbols(symbols, as_of)
|
|
assert_equal(result, expected)
|
|
|
|
def test_fuzzy(self):
|
|
af = self.asset_finder
|
|
|
|
# FUZZ.Y shouldn't resolve unless fuzzy=True.
|
|
syms = ['A', 'B', 'FUZZ.Y']
|
|
dt = pd.Timestamp('2014-01-15', tz='UTC')
|
|
|
|
with self.assertRaises(SymbolNotFound):
|
|
af.lookup_symbols(syms, pd.Timestamp('2014-01-15', tz='UTC'))
|
|
|
|
with self.assertRaises(SymbolNotFound):
|
|
af.lookup_symbols(
|
|
syms,
|
|
pd.Timestamp('2014-01-15', tz='UTC'),
|
|
fuzzy=False,
|
|
)
|
|
|
|
results = af.lookup_symbols(syms, dt, fuzzy=True)
|
|
assert_equal(results, af.retrieve_all([1, 3, 7]))
|
|
assert_equal(
|
|
results,
|
|
[af.lookup_symbol(sym, dt, fuzzy=True) for sym in syms],
|
|
)
|
|
|
|
|
|
class TestAssetFinderPreprocessors(WithTmpDir, ZiplineTestCase):
|
|
|
|
def test_asset_finder_doesnt_silently_create_useless_empty_files(self):
|
|
nonexistent_path = self.tmpdir.getpath(self.id() + '__nothing_here')
|
|
|
|
with self.assertRaises(ValueError) as e:
|
|
AssetFinder(nonexistent_path)
|
|
expected = "SQLite file {!r} doesn't exist.".format(nonexistent_path)
|
|
self.assertEqual(str(e.exception), expected)
|
|
|
|
# sqlite3.connect will create an empty file if you connect somewhere
|
|
# nonexistent. Test that we don't do that.
|
|
self.assertFalse(os.path.exists(nonexistent_path))
|
|
|
|
|
|
class TestExchangeInfo(ZiplineTestCase):
|
|
def test_equality(self):
|
|
a = ExchangeInfo('FULL NAME', 'E', 'US')
|
|
b = ExchangeInfo('FULL NAME', 'E', 'US')
|
|
|
|
assert_equal(a, b)
|
|
|
|
# same full name but different canonical name
|
|
c = ExchangeInfo('FULL NAME', 'NOT E', 'US')
|
|
assert_not_equal(c, a)
|
|
|
|
# same canonical name but different full name
|
|
d = ExchangeInfo('DIFFERENT FULL NAME', 'E', 'US')
|
|
assert_not_equal(d, a)
|
|
|
|
# same names but different country
|
|
|
|
e = ExchangeInfo('FULL NAME', 'E', 'JP')
|
|
assert_not_equal(e, a)
|
|
|
|
def test_repr(self):
|
|
e = ExchangeInfo('FULL NAME', 'E', 'US')
|
|
assert_equal(repr(e), "ExchangeInfo('FULL NAME', 'E', 'US')")
|
|
|
|
def test_read_from_asset_finder(self):
|
|
sids = list(range(8))
|
|
exchange_names = [
|
|
'NEW YORK STOCK EXCHANGE',
|
|
'NEW YORK STOCK EXCHANGE',
|
|
'NASDAQ STOCK MARKET',
|
|
'NASDAQ STOCK MARKET',
|
|
'TOKYO STOCK EXCHANGE',
|
|
'TOKYO STOCK EXCHANGE',
|
|
'OSAKA STOCK EXCHANGE',
|
|
'OSAKA STOCK EXCHANGE',
|
|
]
|
|
equities = pd.DataFrame({
|
|
'sid': sids,
|
|
'exchange': exchange_names,
|
|
'symbol': [chr(65 + sid) for sid in sids],
|
|
})
|
|
exchange_infos = [
|
|
ExchangeInfo('NEW YORK STOCK EXCHANGE', 'NYSE', 'US'),
|
|
ExchangeInfo('NASDAQ STOCK MARKET', 'NYSE', 'US'),
|
|
ExchangeInfo('TOKYO STOCK EXCHANGE', 'JPX', 'JP'),
|
|
ExchangeInfo('OSAKA STOCK EXCHANGE', 'JPX', 'JP'),
|
|
]
|
|
exchange_info_table = pd.DataFrame(
|
|
[
|
|
(info.name, info.canonical_name, info.country_code)
|
|
for info in exchange_infos
|
|
],
|
|
columns=['exchange', 'canonical_name', 'country_code'],
|
|
)
|
|
expected_exchange_info_map = {
|
|
info.name: info for info in exchange_infos
|
|
}
|
|
|
|
ctx = tmp_asset_finder(
|
|
equities=equities,
|
|
exchanges=exchange_info_table,
|
|
)
|
|
with ctx as af:
|
|
actual_exchange_info_map = af.exchange_info
|
|
assets = af.retrieve_all(sids)
|
|
|
|
assert_equal(actual_exchange_info_map, expected_exchange_info_map)
|
|
|
|
for asset in assets:
|
|
expected_exchange_info = expected_exchange_info_map[
|
|
exchange_names[asset.sid]
|
|
]
|
|
assert_equal(asset.exchange_info, expected_exchange_info)
|
|
|
|
|
|
class TestWrite(WithInstanceTmpDir, ZiplineTestCase):
|
|
def init_instance_fixtures(self):
|
|
super(TestWrite, self).init_instance_fixtures()
|
|
self.assets_db_path = path = os.path.join(
|
|
self.instance_tmpdir.path,
|
|
'assets.db',
|
|
)
|
|
self.writer = AssetDBWriter(path)
|
|
|
|
def new_asset_finder(self):
|
|
return AssetFinder(self.assets_db_path)
|
|
|
|
def test_write_multiple_exchanges(self):
|
|
# Incrementing by two so that start and end dates for each
|
|
# generated Asset don't overlap (each Asset's end_date is the
|
|
# day after its start date).
|
|
dates = pd.date_range('2013-01-01', freq='2D', periods=5, tz='UTC')
|
|
sids = list(range(5))
|
|
df = pd.DataFrame.from_records(
|
|
[
|
|
{
|
|
'sid': sid,
|
|
'symbol': str(sid),
|
|
'start_date': date.value,
|
|
'end_date': (date + timedelta(days=1)).value,
|
|
|
|
# Change the exchange with each mapping period. We don't
|
|
# currently support point in time exchange information,
|
|
# so we just take the most recent by end date.
|
|
'exchange': 'EXCHANGE-%d-%d' % (sid, n),
|
|
}
|
|
for n, date in enumerate(dates)
|
|
for sid in sids
|
|
]
|
|
)
|
|
self.writer.write(equities=df)
|
|
|
|
reader = self.new_asset_finder()
|
|
equities = reader.retrieve_all(reader.sids)
|
|
|
|
for eq in equities:
|
|
expected_exchange = 'EXCHANGE-%d-%d' % (eq.sid, len(dates) - 1)
|
|
assert_equal(eq.exchange, expected_exchange)
|
|
|
|
def test_write_direct(self):
|
|
# don't include anything with a default to test that those work.
|
|
equities = pd.DataFrame({
|
|
'sid': [0, 1],
|
|
'asset_name': ['Ayy Inc.', 'Lmao LP'],
|
|
# the full exchange name
|
|
'exchange': ['NYSE', 'TSE'],
|
|
})
|
|
equity_symbol_mappings = pd.DataFrame({
|
|
'sid': [0, 1],
|
|
'symbol': ['AYY', 'LMAO'],
|
|
'company_symbol': ['AYY', 'LMAO'],
|
|
'share_class_symbol': ['', ''],
|
|
})
|
|
equity_supplementary_mappings = pd.DataFrame({
|
|
'sid': [0, 1],
|
|
'field': ['QSIP', 'QSIP'],
|
|
'value': [str(hash(s)) for s in ['AYY', 'LMAO']],
|
|
})
|
|
exchanges = pd.DataFrame({
|
|
'exchange': ['NYSE', 'TSE'],
|
|
'country_code': ['US', 'JP'],
|
|
})
|
|
|
|
self.writer.write_direct(
|
|
equities=equities,
|
|
equity_symbol_mappings=equity_symbol_mappings,
|
|
equity_supplementary_mappings=equity_supplementary_mappings,
|
|
exchanges=exchanges,
|
|
)
|
|
|
|
reader = self.new_asset_finder()
|
|
|
|
equities = reader.retrieve_all(reader.sids)
|
|
expected_equities = [
|
|
Equity(
|
|
0,
|
|
ExchangeInfo('NYSE', 'NYSE', 'US'),
|
|
symbol='AYY',
|
|
asset_name='Ayy Inc.',
|
|
start_date=pd.Timestamp(0, tz='UTC'),
|
|
end_date=pd.Timestamp.max.tz_localize('UTC'),
|
|
first_traded=None,
|
|
auto_close_date=None,
|
|
tick_size=0.01,
|
|
multiplier=1.0,
|
|
),
|
|
Equity(
|
|
1,
|
|
ExchangeInfo('TSE', 'TSE', 'JP'),
|
|
symbol='LMAO',
|
|
asset_name='Lmao LP',
|
|
start_date=pd.Timestamp(0, tz='UTC'),
|
|
end_date=pd.Timestamp.max.tz_localize('UTC'),
|
|
first_traded=None,
|
|
auto_close_date=None,
|
|
tick_size=0.01,
|
|
multiplier=1.0,
|
|
)
|
|
]
|
|
assert_equal(equities, expected_equities)
|
|
|
|
exchange_info = reader.exchange_info
|
|
expected_exchange_info = {
|
|
'NYSE': ExchangeInfo('NYSE', 'NYSE', 'US'),
|
|
'TSE': ExchangeInfo('TSE', 'TSE', 'JP'),
|
|
}
|
|
assert_equal(exchange_info, expected_exchange_info)
|
|
|
|
supplementary_map = reader.equity_supplementary_map
|
|
expected_supplementary_map = {
|
|
('QSIP', str(hash('AYY'))): (
|
|
OwnershipPeriod(
|
|
start=pd.Timestamp(0, tz='UTC'),
|
|
end=pd.Timestamp.max.tz_localize('UTC'),
|
|
sid=0,
|
|
value=str(hash('AYY')),
|
|
),
|
|
),
|
|
('QSIP', str(hash('LMAO'))): (
|
|
OwnershipPeriod(
|
|
start=pd.Timestamp(0, tz='UTC'),
|
|
end=pd.Timestamp.max.tz_localize('UTC'),
|
|
sid=1,
|
|
value=str(hash('LMAO')),
|
|
),
|
|
),
|
|
}
|
|
assert_equal(supplementary_map, expected_supplementary_map)
|