Merge pull request #2309 from quantopian/float-precision-fix

BUG: Round values before truncating when writing bcolz minute/day tables.
This commit is contained in:
ernestoeperez88 2018-10-01 14:21:29 -07:00 committed by GitHub
commit 5cda2ad542
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 41 additions and 9 deletions

View file

@ -142,6 +142,38 @@ class BcolzMinuteBarTestCase(WithTradingCalendars,
self.assertEquals(50.0, volume_price)
def test_precision_after_scaling(self):
'''For numbers that don't have an exact float representation,
assert that scaling the value does not cause a loss in precision.
'''
minute = self.market_opens[self.test_calendar_start]
sid = 1
data = DataFrame(
data={
'open': [130.23],
'high': [130.23],
'low': [130.23],
'close': [130.23],
'volume': [1000]
},
index=[minute])
self.writer.write_sid(sid, data)
open_price = self.reader.get_value(sid, minute, 'open')
self.assertEquals(130.23, open_price)
high_price = self.reader.get_value(sid, minute, 'high')
self.assertEquals(130.23, high_price)
low_price = self.reader.get_value(sid, minute, 'low')
self.assertEquals(130.23, low_price)
close_price = self.reader.get_value(sid, minute, 'close')
self.assertEquals(130.23, close_price)
volume_price = self.reader.get_value(sid, minute, 'volume')
self.assertEquals(1000, volume_price)
def test_write_one_ohlcv_with_ratios(self):
minute = self.market_opens[self.test_calendar_start]
sid = 1

View file

@ -406,7 +406,7 @@ class DataPortalTestBase(WithDataPortal,
day = calendar.day
dividend_date = self.trading_days[2]
prev_day_price = 1.005
prev_day_price = 1.006
dividend_amount = 0.5 # see self.make_dividends_data
ratio = 1.0 - dividend_amount / prev_day_price
@ -541,10 +541,10 @@ class DataPortalTestBase(WithDataPortal,
# Equity prices should be floored to three decimal places.
expected_equity_values = {
'open': 1.005,
'high': 1.005,
'open': 1.006,
'high': 1.006,
'low': 1.005,
'close': 1.005,
'close': 1.006,
'volume': expected_equity_volume,
}
# Futures prices should be rounded to four decimal places.

View file

@ -365,7 +365,7 @@ class BcolzDailyBarWriter(object):
return raw_data
winsorise_uint32(raw_data, invalid_data_behavior, 'volume', *OHLC)
processed = (raw_data[list(OHLC)] * 1000).astype('uint32')
processed = (raw_data[list(OHLC)] * 1000).round().astype('uint32')
dates = raw_data.index.values.astype('datetime64[s]')
check_uint32_safe(dates.max().view(np.int64), 'day')
processed['day'] = dates.astype('uint32')

View file

@ -131,10 +131,10 @@ def convert_cols(cols, scale_factor, sid, invalid_data_behavior):
If 'warn', logs a warning and filters out incompatible values.
If 'ignore', silently filters out incompatible values.
"""
scaled_opens = np.nan_to_num(cols['open']) * scale_factor
scaled_highs = np.nan_to_num(cols['high']) * scale_factor
scaled_lows = np.nan_to_num(cols['low']) * scale_factor
scaled_closes = np.nan_to_num(cols['close']) * scale_factor
scaled_opens = (np.nan_to_num(cols['open']) * scale_factor).round()
scaled_highs = (np.nan_to_num(cols['high']) * scale_factor).round()
scaled_lows = (np.nan_to_num(cols['low']) * scale_factor).round()
scaled_closes = (np.nan_to_num(cols['close']) * scale_factor).round()
exclude_mask = np.zeros_like(scaled_opens, dtype=bool)