Notebook

Alphalens - Open Source Factor Analysis

In [1]:
import numpy as np
import pandas as pd
import alphalens
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.data import morningstar
from quantopian.pipeline.factors import CustomFactor, Latest, Returns, RSI, EWMA as ewma, SimpleMovingAverage
from quantopian.pipeline.filters import Q1500US

Define the Factors

In [2]:
#Create the classes for many rav variables and factors

class LowVol(CustomFactor):
    inputs = [Returns(window_length=2)]
    window_length = 25
    
    def compute(self, today, assets, out, close):
        out[:] = -np.nanstd(close, axis=0)

class growth(CustomFactor):
    window_length = 1
    inputs = [morningstar.operation_ratios.revenue_growth]
    
    def compute(self, today, assets, out, growth):
        out[:] = growth
        
class sus_growth(CustomFactor):
    window_length = 1
    inputs = [morningstar.valuation_ratios.sustainable_growth_rate]
    
    def compute(self, today, assets, out, sustainable_growth_rate):
        out[:] = sustainable_growth_rate

#Quality
class turns(CustomFactor):
    window_length = 1
    inputs = [morningstar.operation_ratios.assets_turnover]
    
    def compute(self, today, assets, out, assets_turnover):
        out[:] = assets_turnover

class roic(CustomFactor):
    window_length = 1
    inputs = [morningstar.operation_ratios.roic]
    def compute(self, today, assets, out, roic):
        out[:] = roic

class margins(CustomFactor):
    window_length = 1
    inputs = [morningstar.operation_ratios.ebitda_margin]
    def compute(self, today, assets, out, ebitda_margin):
        out[:] = ebitda_margin

#Valuation
class peg_ratio(CustomFactor):
    window_length = 1
    inputs = [morningstar.valuation_ratios.peg_ratio]
    
    def compute(self, today, assets, out, peg_ratio):
        out[:] = np.where(peg_ratio <= 0, 0, 1/peg_ratio)
        
class pe_ratio(CustomFactor):
    window_length = 1
    inputs = [morningstar.valuation_ratios.pe_ratio]
    def compute(self, today, assets, out, pe_ratio):
        out[:] = np.where(pe_ratio <= 0, 0, 1/pe_ratio)

class pb_ratio(CustomFactor):
    window_length = 1
    inputs = [morningstar.valuation_ratios.pb_ratio]
    def compute(self, today, assets, out, pb_ratio):
        out[:] = np.where(pb_ratio <= 0, 0, 1/pb_ratio)

class total_yield(CustomFactor):
    window_length = 1
    inputs = [morningstar.valuation_ratios.total_yield]
    def compute(self, today, assets, out, total_yield):
        out[:] = total_yield
        
class ev_ebitda(CustomFactor):
    window_length = 1
    inputs = [morningstar.valuation_ratios.ev_to_ebitda]
    def compute(self, today, assets, out, ev_to_ebitda):
        out[:] = np.where(ev_to_ebitda <= 0, 0, 1/ev_to_ebitda)

class liquidity(CustomFactor):   
    inputs = [USEquityPricing.volume, USEquityPricing.close] 
    window_length = 21

    def compute(self, today, assets, out, volume, close): 
        out[:] = (volume * close).mean(axis=0)
        
class sector(CustomFactor):
    inputs = [morningstar.asset_classification.morningstar_sector_code]
    window_length = 1
    def compute(self, today, assets, out, morningstar_sector_code):
        out[:] = morningstar_sector_code[-1]
        
class AvgDailyDollarVolumeTraded(CustomFactor):
    
    inputs = [USEquityPricing.close, USEquityPricing.volume]
    window_length = 20
    
    def compute(self, today, assets, out, close_price, volume):
        out[:] = np.mean(close_price * volume, axis=0)

class PriceReturn(CustomFactor):  
    inputs = [USEquityPricing.close]
    window_length = 2
    
    def compute(self, today, assets, out, close):
        out[:] = close[-1]/close[0]

Set Universe

In [3]:
universe = Q1500US()

Create the Pipeline

In [ ]:
pipe = Pipeline(
    columns={
        #'LowVol' : LowVol(mask=universe),
        #'ROIC': roic(mask=universe),
        #'PE': pe_ratio(mask=universe),
        #'Growth': growth(mask=universe),
        #'SusGrowth': sus_growth(mask=universe),
        #'Turns': turns(mask=universe),
        #'Margins': margins(mask=universe),
        #'PEG': peg_ratio(mask=universe),
        #'PB': pb_ratio(mask=universe),
        #'TotalYield': total_yield(mask=universe),
        'EV_EBITDA': ev_ebitda(mask=universe),
        'PriceReturn': PriceReturn(mask=universe),
        'Sector': sector(mask=universe)
    },
    screen=universe
)

Run the Pipeline

In [ ]:
results = run_pipeline(pipe, '2014-01-01', '2016-10-31')
In [ ]:
results.head()
In [ ]:
#Develop Factors for Testing

def factor_engineering(factor_name):
    results['factor'] = results.groupby([results.index.get_level_values(0),'Sector'])[factor_name].apply(lambda x: x.fillna(x.median()))
    zscore = lambda x: (x - x.mean()) / x.std()
    results['factor_score'] = results.groupby([results.index.get_level_values(0),'Sector'])['factor'].transform(zscore)

    #Reset Factor to mean where abs(factor_score) > 3
    results['factor_adj'] = np.where(abs(results['factor_score']) > 3,np.nan,results['factor'])
    results['factor_adj'] = results.groupby([results.index.get_level_values(0),'Sector'])['factor_adj'].apply(lambda x: x.fillna(x.median()))
    results['factor_score_adj'] = results.groupby([results.index.get_level_values(0),'Sector'])['factor_adj'].transform(zscore)
    return results['factor_score_adj']

#Look ahead forward return
results['forward_return_1'] = results.groupby(results.index.get_level_values(1))['PriceReturn'].apply(lambda x: x.shift(-1))

Get pricing

In [ ]:
assets = results.index.levels[1].unique()
# Make sure to extend the pricing a bit to get a full set of returns.
pricing = get_pricing(assets, start_date='2012-06-30', end_date='2016-07-31', fields='open_price')
In [ ]:
results.head()

Build Tearsheet

AlphaLens builds a single factor tearsheet.

In [ ]:
#First, let's look at our standard factor
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(results['EV_EBITDA'],
                                                                   pricing,
                                                                   quantiles=5,
                                                                   periods=(1,5,10))

alphalens.tears.create_full_tear_sheet(factor_data)
In [ ]:
#We can improve this factor a bit simply by removing outliers.
#Lets use the z-score adjusted factor we built earlier
#This can be a pattern for how we can quickly iterate and improve factors

#results['factor'] = factor_engineering('EV_EBITDA')
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor_engineering('EV_EBITDA'),
                                                                   pricing,
                                                                   quantiles=5,
                                                                   periods=(1,5,10))

alphalens.tears.create_full_tear_sheet(factor_data)
In [ ]:
#But what do good charts look like?
#Let's use the look ahead or forward return. The 1 period charts should come out looking really good. (Too good)
#Some things (like the distribution charts) aren't really useful and turnover is really high.
#However, this shows what the charts could look like.
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(results['forward_return_1'],
                                                                   pricing,
                                                                   quantiles=5,
                                                                   periods=(1,5,10))

alphalens.tears.create_full_tear_sheet(factor_data)
In [ ]:
#We can also build multiple datasets to do multifactor analysis.
#By digging around in the source (https://github.com/quantopian/alphalens) we can find some return streams to compare

factor_data_2 = alphalens.utils.get_clean_factor_and_forward_returns(factor_engineering('PE'),
                                                                   pricing,
                                                                   quantiles=5,
                                                                   periods=(1,5,10))
In [ ]:
factor_returns_1 = alphalens.performance.factor_returns(factor_data)#[['1']]
factor_returns_2 = alphalens.performance.factor_returns(factor_data_2)#[['1']]
In [ ]:
factor_returns_composite = factor_returns_1.merge(factor_returns_2)
In [ ]:
factor_returns_composite.head()
In [ ]:
factor_returns_1.head()