from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline.factors import Latest
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import CustomFactor, AverageDollarVolume, Returns, RSI, VWAP
from quantopian.pipeline.filters import Q500US, Q1500US
from quantopian.pipeline.data import morningstar
import pandas as pd
import numpy as np
from scipy import stats
import alphalens
def demean_by_group(signal, grouping):
"""Calculates and subtracts the group mean from the signal.
Both inputs are 1-day np arrays. Returns 1-day np array of demeaned values."""
values_to_return = np.empty(signal.shape[0])
options = set(grouping)
for option in options:
logical = grouping == option
mean_by_group = signal[logical].sum()/logical.size
values_to_return[logical] = signal[logical] - mean_by_group
return values_to_return
def make_factors():
class Alpha5(CustomFactor):
vwap_in = VWAP(window_length=2)
vwap_in.window_safe = True
inputs = [USEquityPricing.close, USEquityPricing.open, vwap_in]
window_length = 10
def compute(self, today, assets, out, close, open, vwap):
v000 = open[-1]
v00100 = np.empty((10, out.shape[0]))
for i0 in range(1, 11):
v00100[-i0] = vwap[-i0]
v0010 = v00100.sum(axis=0)
v0011 = np.full(out.shape[0], 10.0)
v001 = v0010 / v0011
v00 = v000 - v001
v0 = stats.rankdata(v00)
v10 = np.full(out.shape[0], -1.0)
v11000 = close[-1]
v11001 = vwap[-1]
v1100 = v11000 - v11001
v110 = stats.rankdata(v1100)
v11 = np.abs(v110)
v1 = v10 * v11
out[:] = v0 * v1
class Alpha8(CustomFactor):
inputs = [Returns(window_length=2), USEquityPricing.open]
window_length = 16
def compute(self, today, assets, out, returns, open):
v0 = np.full(out.shape[0], -1.0)
v10000 = np.empty((5, out.shape[0]))
for i0 in range(1, 6):
v10000[-i0] = open[-i0]
v1000 = v10000.sum(axis=0)
v10010 = np.empty((5, out.shape[0]))
for i0 in range(1, 6):
v10010[-i0] = returns[-i0]
v1001 = v10010.sum(axis=0)
v100 = v1000 * v1001
v101000 = np.empty((5, out.shape[0]))
for i0 in range(11, 16):
v101000[10-i0] = open[-i0]
v10100 = v101000.sum(axis=0)
v101010 = np.empty((5, out.shape[0]))
for i0 in range(11, 16):
v101010[10-i0] = returns[-i0]
v10101 = v101010.sum(axis=0)
v1010 = v10100 * v10101
v101 = v1010 # delay
v10 = v100 - v101
v1 = stats.rankdata(v10)
out[:] = v0 * v1
class Alpha9(CustomFactor):
inputs = [USEquityPricing.close]
window_length = 7
def compute(self, today, assets, out, close):
v00 = np.full(out.shape[0], 0.0)
v010 = np.empty((5, out.shape[0]))
for i0 in range(1, 6):
v0100 = np.empty((2, out.shape[0]))
for i1 in range(1, 3):
v0100[-i1] = close[-i0-i1]
v010[-i0] = v0100[-1] - v0100[-2]
v01 = np.min(v010, axis=0)
v0 = v00 < v01
v10 = np.empty((2, out.shape[0]))
for i0 in range(1, 3):
v10[-i0] = close[-i0]
v1 = v10[-1] - v10[-2]
v2000 = np.empty((5, out.shape[0]))
for i0 in range(1, 6):
v20000 = np.empty((2, out.shape[0]))
for i1 in range(1, 3):
v20000[-i1] = close[-i0-i1]
v2000[-i0] = v20000[-1] - v20000[-2]
v200 = np.max(v2000, axis=0)
v201 = np.full(out.shape[0], 0.0)
v20 = v200 < v201
v210 = np.empty((2, out.shape[0]))
for i0 in range(1, 3):
v210[-i0] = close[-i0]
v21 = v210[-1] - v210[-2]
v220 = np.full(out.shape[0], -1.0)
v2210 = np.empty((2, out.shape[0]))
for i0 in range(1, 3):
v2210[-i0] = close[-i0]
v221 = v2210[-1] - v2210[-2]
v22 = v220 * v221
v2lgcl = np.empty(out.shape[0])
v2lgcl[v20] = v21[v20]
v2lgcl[~v20] = v22[~v20]
v2 = v2lgcl
vlgcl = np.empty(out.shape[0])
vlgcl[v0] = v1[v0]
vlgcl[~v0] = v2[~v0]
out[:] = vlgcl
all_factors = {
'Alpha5' : Alpha5,
'Alpha8' : Alpha8,
'Alpha9' : Alpha9,
}
return all_factors
universe = Q500US() #Q1500US() q500 is faster
factors = make_factors()
def make_history_pipeline(factors, universe):
factor_funs = {name: f(mask=universe) for name, f in factors.iteritems()}
pipe = Pipeline(screen=universe, columns=factor_funs)
return pipe
history_pipe = make_history_pipeline(factors, universe)
start = pd.Timestamp("2016-01-01")
end = pd.Timestamp("2016-12-31")
results = run_pipeline(history_pipe, start_date=start, end_date=end)
results.index.names = ['date', 'security']
results.head()
#This is where I'm stuck. Any attempts at ranking and combining these 3 factors pre-pipe or
#post-pipe have been unsuccesful thus far.