Notebook
In [1]:
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline.factors import Latest
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import CustomFactor, AverageDollarVolume, Returns, RSI, VWAP
from quantopian.pipeline.filters import Q500US, Q1500US
from quantopian.pipeline.data import morningstar

import pandas as pd
import numpy as np
from scipy import stats
import alphalens
In [2]:
def demean_by_group(signal, grouping):
    """Calculates and subtracts the group mean from the signal.  
    Both inputs are 1-day np arrays.  Returns 1-day np array of demeaned values."""
    values_to_return = np.empty(signal.shape[0])
    options = set(grouping)
    for option in options:
        logical = grouping == option 
        mean_by_group = signal[logical].sum()/logical.size
        values_to_return[logical] = signal[logical] - mean_by_group
                
    return values_to_return
In [3]:
def make_factors():

    class Alpha5(CustomFactor):
        vwap_in = VWAP(window_length=2)
        vwap_in.window_safe = True
        inputs = [USEquityPricing.close, USEquityPricing.open, vwap_in]
        window_length = 10

        def compute(self, today, assets, out, close, open, vwap):
            v000 = open[-1]
            v00100 = np.empty((10, out.shape[0]))
            for i0 in range(1, 11):
                v00100[-i0] = vwap[-i0]
            v0010 = v00100.sum(axis=0)
            v0011 = np.full(out.shape[0], 10.0)
            v001 = v0010 / v0011
            v00 = v000 - v001
            v0 = stats.rankdata(v00)
            v10 = np.full(out.shape[0], -1.0)
            v11000 = close[-1]
            v11001 = vwap[-1]
            v1100 = v11000 - v11001
            v110 = stats.rankdata(v1100)
            v11 = np.abs(v110)
            v1 = v10 * v11
            out[:] = v0 * v1
            
    class Alpha8(CustomFactor):
        inputs = [Returns(window_length=2), USEquityPricing.open]
        window_length = 16

        def compute(self, today, assets, out, returns, open):
            v0 = np.full(out.shape[0], -1.0)
            v10000 = np.empty((5, out.shape[0]))
            for i0 in range(1, 6):
                v10000[-i0] = open[-i0]
            v1000 = v10000.sum(axis=0)
            v10010 = np.empty((5, out.shape[0]))
            for i0 in range(1, 6):
                v10010[-i0] = returns[-i0]
            v1001 = v10010.sum(axis=0)
            v100 = v1000 * v1001
            v101000 = np.empty((5, out.shape[0]))
            for i0 in range(11, 16):
                v101000[10-i0] = open[-i0]
            v10100 = v101000.sum(axis=0)
            v101010 = np.empty((5, out.shape[0]))
            for i0 in range(11, 16):
                v101010[10-i0] = returns[-i0]
            v10101 = v101010.sum(axis=0)
            v1010 = v10100 * v10101
            v101 = v1010 # delay
            v10 = v100 - v101
            v1 = stats.rankdata(v10)
            out[:] = v0 * v1
            
    class Alpha9(CustomFactor):
        inputs = [USEquityPricing.close]
        window_length = 7

        def compute(self, today, assets, out, close):
            v00 = np.full(out.shape[0], 0.0)
            v010 = np.empty((5, out.shape[0]))
            for i0 in range(1, 6):
                v0100 = np.empty((2, out.shape[0]))
                for i1 in range(1, 3):
                    v0100[-i1] = close[-i0-i1]
                v010[-i0] = v0100[-1] - v0100[-2]
            v01 = np.min(v010, axis=0)
            v0 = v00 < v01
            v10 = np.empty((2, out.shape[0]))
            for i0 in range(1, 3):
                v10[-i0] = close[-i0]
            v1 = v10[-1] - v10[-2]
            v2000 = np.empty((5, out.shape[0]))
            for i0 in range(1, 6):
                v20000 = np.empty((2, out.shape[0]))
                for i1 in range(1, 3):
                    v20000[-i1] = close[-i0-i1]
                v2000[-i0] = v20000[-1] - v20000[-2]
            v200 = np.max(v2000, axis=0)
            v201 = np.full(out.shape[0], 0.0)
            v20 = v200 < v201
            v210 = np.empty((2, out.shape[0]))
            for i0 in range(1, 3):
                v210[-i0] = close[-i0]
            v21 = v210[-1] - v210[-2]
            v220 = np.full(out.shape[0], -1.0)
            v2210 = np.empty((2, out.shape[0]))
            for i0 in range(1, 3):
                v2210[-i0] = close[-i0]
            v221 = v2210[-1] - v2210[-2]
            v22 = v220 * v221
            v2lgcl = np.empty(out.shape[0])
            v2lgcl[v20] = v21[v20]
            v2lgcl[~v20] = v22[~v20]
            v2 = v2lgcl
            vlgcl = np.empty(out.shape[0])
            vlgcl[v0] = v1[v0]
            vlgcl[~v0] = v2[~v0]
            out[:] = vlgcl

               
    all_factors = {
        'Alpha5' : Alpha5,
        'Alpha8' : Alpha8,
        'Alpha9' : Alpha9,
    }     
    return all_factors
In [4]:
universe = Q500US() #Q1500US() q500 is faster
factors = make_factors()
In [13]:
def make_history_pipeline(factors, universe):  
    factor_funs = {name: f(mask=universe) for name, f in factors.iteritems()}
    # create the sum of ranks
    factor_funs["sum589"] = factor_funs['Alpha5'].rank() + factor_funs['Alpha8'].rank() + factor_funs['Alpha9'].rank()
    pipe = Pipeline(columns=factor_funs, screen = universe)
    return pipe

history_pipe = make_history_pipeline(factors, universe)
In [16]:
start = pd.Timestamp("2016-01-01")
end = pd.Timestamp("2016-12-31")
results = run_pipeline(history_pipe, start_date=start, end_date=end)
results.head()
Out[16]:
Alpha5 Alpha8 Alpha9 sum589
2016-01-04 00:00:00+00:00 Equity(2 [ARNC]) -94848.0 -274.0 0.10 426.0
Equity(24 [AAPL]) -1920.0 -435.0 2.09 1030.0
Equity(62 [ABT]) -72900.0 -225.0 0.35 622.0
Equity(67 [ADSK]) -35275.0 -395.0 1.33 866.0
Equity(76 [TAP]) -62073.0 -336.0 0.62 652.0
In [19]:
assets = results.index.levels[1].unique()
pricing = get_pricing(assets, start_date=start, end_date=end, fields='open_price')
len(assets)
Out[19]:
572
In [20]:
#now we're ready to use alpha lens
import alphalens

# Ingest and format data  
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(results['sum589'],  
                                                                   pricing,  
                                                                   quantiles=2,
                                                                   periods = [3,10,30])

# Run analysis  
alphalens.tears.create_full_tear_sheet(factor_data)  

#I.C. Mean: .1 is fantastic, .01 is pretty good, anything below that is not good
Quantiles Statistics
min max mean std count count %
factor_quantile
1 3.0 801.0 465.216578 192.523374 55195 50.087116
2 716.0 1500.0 1035.894697 196.422789 55003 49.912884
Returns Analysis
3 10 30
Ann. alpha 0.027 0.033 -0.002
beta 0.031 0.036 0.073
Mean Period Wise Return Top Quantile (bps) 2.377 13.080 15.588
Mean Period Wise Return Bottom Quantile (bps) -2.369 -13.035 -15.534
Mean Period Wise Spread (bps) 1.578 2.612 1.041
Information Analysis
3 10 30
IC Mean 0.001 0.015 0.016
IC Std. 0.131 0.139 0.123
t-stat(IC) 0.158 1.632 1.951
p-value(IC) 0.874 0.104 0.052
IC Skew 0.308 0.289 0.178
IC Kurtosis 0.350 0.108 -0.149
Ann. IR 0.169 1.739 2.078
Turnover Analysis
3 10 30
Quantile 1 Mean Turnover 0.398 0.570 0.504
Quantile 2 Mean Turnover 0.399 0.571 0.504
3 10 30
Mean Factor Rank Autocorrelation 0.287 -0.202 0.015
<matplotlib.figure.Figure at 0x7f34213b6c50>