zipline/tests/utils/daily_bar_writer.py
Scott Sanderson e491ebef53 ENH: Initial support for international pipelines.
This commit adds initial support for international markets in the Pipeline API
via addition of the concept of "domains".

Significant changes on this branch include:

- Added `zipline.pipeline.domain` and the `Domain` interface.
  - Added several canonical domain definitions:
    - GENERIC
    - US_EQUITIES
    - CA_EQUITIES
    - GB_EQUITIES

- Added `zipline.country` as a place to define canonical country code names.

- Made `PipelineLoader` an interface and added new `domain` parameter to
  signature of `load_adjusted_array`.

- Added a new system for "generic" and "specialized" DataSet and BoundColumn
  objects.

- Added a new system for inferring a domain from a pipeline that contains a mix
  of generic and non-generic terms.

- Reworked the built-in pricing dataset. USEquityPricing dataset is now a
  specialized version of a generic EquityPricing dataset, and most built-in
  factors are now implemented in terms of EquityPricing rather than
  USEquityPricing.

- Removed `zipline.data.us_equity_pricing`.
  - Moved BcolzDailyBar{Reader,Writer} to `zipline.data.bcolz_daily_bars`.
  - Moved SQLiteAdjustment{Reader,Writer} to `zipline.data.adjustments`.

New dependencies added as part of this work:

- iso3166 (for canonical country code definitions)
- python-interface (for strict interface definitions).

See https://github.com/quantopian/zipline/issues/2265 for a full description of
the design for domains.
2018-09-10 10:05:39 -05:00

50 lines
1.4 KiB
Python

from numpy import (
float64,
uint32,
int64,
)
from bcolz import ctable
from zipline.data.bcolz_daily_bars import (
BcolzDailyBarWriter,
OHLC,
UINT32_MAX
)
class DailyBarWriterFromDataFrames(BcolzDailyBarWriter):
_csv_dtypes = {
'open': float64,
'high': float64,
'low': float64,
'close': float64,
'volume': float64,
}
def __init__(self, asset_map):
self._asset_map = asset_map
def gen_tables(self, assets):
for asset in assets:
yield asset, ctable.fromdataframe(assets[asset])
def to_uint32(self, array, colname):
arrmax = array.max()
if colname in OHLC:
self.check_uint_safe(arrmax * 1000, colname)
return (array * 1000).astype(uint32)
elif colname == 'volume':
self.check_uint_safe(arrmax, colname)
return array.astype(uint32)
elif colname == 'day':
nanos_per_second = (1000 * 1000 * 1000)
self.check_uint_safe(arrmax.view(int64) / nanos_per_second,
colname)
return (array.view(int64) / nanos_per_second).astype(uint32)
@staticmethod
def check_uint_safe(value, colname):
if value >= UINT32_MAX:
raise ValueError(
"Value %s from column '%s' is too large" % (value, colname)
)