Notebook
In [3]:
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline.factors import Latest
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import CustomFactor, AverageDollarVolume, Returns, RSI, VWAP
from quantopian.pipeline.filters import Q500US, Q1500US
from quantopian.pipeline.data import morningstar

import pandas as pd
import numpy as np
from scipy import stats
import alphalens
In [4]:
def demean_by_group(signal, grouping):
    """Calculates and subtracts the group mean from the signal.  
    Both inputs are 1-day np arrays.  Returns 1-day np array of demeaned values."""
    values_to_return = np.empty(signal.shape[0])
    options = set(grouping)
    for option in options:
        logical = grouping == option 
        mean_by_group = signal[logical].sum()/logical.size
        values_to_return[logical] = signal[logical] - mean_by_group
                
    return values_to_return
In [5]:
def make_factors():

    class Alpha5(CustomFactor):
        vwap_in = VWAP(window_length=2)
        vwap_in.window_safe = True
        inputs = [USEquityPricing.close, USEquityPricing.open, vwap_in]
        window_length = 10

        def compute(self, today, assets, out, close, open, vwap):
            v000 = open[-1]
            v00100 = np.empty((10, out.shape[0]))
            for i0 in range(1, 11):
                v00100[-i0] = vwap[-i0]
            v0010 = v00100.sum(axis=0)
            v0011 = np.full(out.shape[0], 10.0)
            v001 = v0010 / v0011
            v00 = v000 - v001
            v0 = stats.rankdata(v00)
            v10 = np.full(out.shape[0], -1.0)
            v11000 = close[-1]
            v11001 = vwap[-1]
            v1100 = v11000 - v11001
            v110 = stats.rankdata(v1100)
            v11 = np.abs(v110)
            v1 = v10 * v11
            out[:] = v0 * v1
            
    class Alpha8(CustomFactor):
        inputs = [Returns(window_length=2), USEquityPricing.open]
        window_length = 16

        def compute(self, today, assets, out, returns, open):
            v0 = np.full(out.shape[0], -1.0)
            v10000 = np.empty((5, out.shape[0]))
            for i0 in range(1, 6):
                v10000[-i0] = open[-i0]
            v1000 = v10000.sum(axis=0)
            v10010 = np.empty((5, out.shape[0]))
            for i0 in range(1, 6):
                v10010[-i0] = returns[-i0]
            v1001 = v10010.sum(axis=0)
            v100 = v1000 * v1001
            v101000 = np.empty((5, out.shape[0]))
            for i0 in range(11, 16):
                v101000[10-i0] = open[-i0]
            v10100 = v101000.sum(axis=0)
            v101010 = np.empty((5, out.shape[0]))
            for i0 in range(11, 16):
                v101010[10-i0] = returns[-i0]
            v10101 = v101010.sum(axis=0)
            v1010 = v10100 * v10101
            v101 = v1010 # delay
            v10 = v100 - v101
            v1 = stats.rankdata(v10)
            out[:] = v0 * v1
            
    class Alpha9(CustomFactor):
        inputs = [USEquityPricing.close]
        window_length = 7

        def compute(self, today, assets, out, close):
            v00 = np.full(out.shape[0], 0.0)
            v010 = np.empty((5, out.shape[0]))
            for i0 in range(1, 6):
                v0100 = np.empty((2, out.shape[0]))
                for i1 in range(1, 3):
                    v0100[-i1] = close[-i0-i1]
                v010[-i0] = v0100[-1] - v0100[-2]
            v01 = np.min(v010, axis=0)
            v0 = v00 < v01
            v10 = np.empty((2, out.shape[0]))
            for i0 in range(1, 3):
                v10[-i0] = close[-i0]
            v1 = v10[-1] - v10[-2]
            v2000 = np.empty((5, out.shape[0]))
            for i0 in range(1, 6):
                v20000 = np.empty((2, out.shape[0]))
                for i1 in range(1, 3):
                    v20000[-i1] = close[-i0-i1]
                v2000[-i0] = v20000[-1] - v20000[-2]
            v200 = np.max(v2000, axis=0)
            v201 = np.full(out.shape[0], 0.0)
            v20 = v200 < v201
            v210 = np.empty((2, out.shape[0]))
            for i0 in range(1, 3):
                v210[-i0] = close[-i0]
            v21 = v210[-1] - v210[-2]
            v220 = np.full(out.shape[0], -1.0)
            v2210 = np.empty((2, out.shape[0]))
            for i0 in range(1, 3):
                v2210[-i0] = close[-i0]
            v221 = v2210[-1] - v2210[-2]
            v22 = v220 * v221
            v2lgcl = np.empty(out.shape[0])
            v2lgcl[v20] = v21[v20]
            v2lgcl[~v20] = v22[~v20]
            v2 = v2lgcl
            vlgcl = np.empty(out.shape[0])
            vlgcl[v0] = v1[v0]
            vlgcl[~v0] = v2[~v0]
            out[:] = vlgcl

               
    all_factors = {
        'Alpha5' : Alpha5,
        'Alpha8' : Alpha8,
        'Alpha9' : Alpha9,
    }     
    return all_factors
In [6]:
universe = Q500US() #Q1500US() q500 is faster
factors = make_factors()
In [7]:
def make_history_pipeline(factors, universe):  
    factor_funs = {name: f(mask=universe) for name, f in factors.iteritems()}
    pipe = Pipeline(screen=universe, columns=factor_funs)
    return pipe

history_pipe = make_history_pipeline(factors, universe)
In [8]:
start = pd.Timestamp("2016-01-01")
end = pd.Timestamp("2016-12-31")
results = run_pipeline(history_pipe, start_date=start, end_date=end)
results.index.names = ['date', 'security']
results.head()
Out[8]:
Alpha5 Alpha8 Alpha9
date security
2016-01-04 00:00:00+00:00 Equity(2 [ARNC]) -94601.0 -275.0 0.10
Equity(24 [AAPL]) -1920.0 -434.0 2.09
Equity(62 [ABT]) -72630.0 -225.0 0.35
Equity(67 [ADSK]) -35518.0 -394.0 1.33
Equity(76 [TAP]) -61920.0 -336.0 0.62
In [10]:
#This is where I'm stuck. Any attempts at ranking and combining these 3 factors pre-pipe or 
#post-pipe have been unsuccesful thus far.
In [ ]: