Notebook
In [1]:
# https://www.quantopian.com/posts/piotroskis-f-score-algorithm
In [2]:
import numpy as np
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data import Fundamentals
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import CustomFactor
from quantopian.pipeline.classifiers.fundamentals import Sector  
from quantopian.pipeline.filters import QTradableStocksUS
from time import time
from sklearn import preprocessing
from scipy.stats.mstats import winsorize
In [3]:
import alphalens as al
In [4]:
WIN_LIMIT = 0.0
In [5]:
MORNINGSTAR_SECTOR_CODES = {
     -1: 'Misc',
    101: 'Basic Materials',
    102: 'Consumer Cyclical',
    103: 'Financial Services',
    104: 'Real Estate',
    205: 'Consumer Defensive',
    206: 'Healthcare',
    207: 'Utilities',
    308: 'Communication Services',
    309: 'Energy',
    310: 'Industrials',
    311: 'Technology' ,    
}
In [6]:
def make_factors():
                
        class Piotroski(CustomFactor):
            inputs = [
                    Fundamentals.roa,
                    Fundamentals.operating_cash_flow,
                    Fundamentals.cash_flow_from_continuing_operating_activities,
                    Fundamentals.long_term_debt_equity_ratio,
                    Fundamentals.current_ratio,
                    Fundamentals.shares_outstanding,
                    Fundamentals.gross_margin,
                    Fundamentals.assets_turnover,
                    ]

            window_length = 100
    
            def compute(self, today, assets, out,roa, cash_flow, cash_flow_from_ops, long_term_debt_ratio, current_ratio, shares_outstanding, gross_margin, assets_turnover):
            
                profit = (
                            (roa[-1] > 0).astype(int) +
                            (cash_flow[-1] > 0).astype(int) +
                            (roa[-1] > roa[0]).astype(int) +
                            (cash_flow_from_ops[-1] > roa[-1]).astype(int)
                         )
        
                leverage = (
                            (long_term_debt_ratio[-1] < long_term_debt_ratio[0]).astype(int) +
                            (current_ratio[-1] > current_ratio[0]).astype(int) + 
                            (shares_outstanding[-1] <= shares_outstanding[0]).astype(int)
                            )
        
                operating = (
                            (gross_margin[-1] > gross_margin[0]).astype(int) +
                            (assets_turnover[-1] > assets_turnover[0]).astype(int)
                            )
        
                out[:] = preprocess(profit + leverage + operating)
                                                                                             
        return {
            'Piotroski':               Piotroski,
        }
In [7]:
def preprocess(a):
    
    a = np.nan_to_num(a - np.nanmean(a))
    
    a = winsorize(a, limits=[WIN_LIMIT,WIN_LIMIT])

    return preprocessing.scale(a)
In [8]:
universe = QTradableStocksUS()

factors = make_factors()

combined_alpha = None

for name, f in factors.iteritems():
        if combined_alpha == None:
            combined_alpha = f(mask=universe)
        else:
            combined_alpha = combined_alpha+f(mask=universe)

pipe = Pipeline(
    columns = {
            'CombinedAlpha' : combined_alpha,
            'Sector' : Sector()
    },
    screen=universe
)

start_timer = time()
results = run_pipeline(pipe, '2010-01-01', '2018-10-19',chunksize=252)
end_timer = time()
results.fillna(value=0);

print "Time to run pipeline %.2f secs" % (end_timer - start_timer)
Time to run pipeline 123.71 secs
In [9]:
my_factor = results['CombinedAlpha']
sectors = results['Sector']
asset_list = results.index.levels[1].unique()
prices = get_pricing(asset_list, start_date='2010-01-01', end_date='2018-10-19', fields='open_price')
periods = (1,3,5,10,21)

factor_data = al.utils.get_clean_factor_and_forward_returns(factor=my_factor,
                                                            prices=prices,
                                                            groupby=sectors,
                                                            groupby_labels=MORNINGSTAR_SECTOR_CODES,
                                                            periods=periods,
                                                            quantiles = 5)
Dropped 15.4% entries from factor data: 1.1% in forward returns computation and 14.3% in binning phase (set max_loss=0 to see potentially suppressed Exceptions).
max_loss is 35.0%, not exceeded: OK!
In [10]:
mean_return_by_q_daily, std_err_by_q_daily = al.performance.mean_return_by_quantile(factor_data,
                                                                                    by_date=True)
mean_return_by_q, std_err_by_q = al.performance.mean_return_by_quantile(factor_data,
                                                                        by_group=False)
ic = al.performance.factor_information_coefficient(factor_data)
In [11]:
al.tears.create_information_tear_sheet(factor_data)
Information Analysis
1D 3D 5D 10D 21D
IC Mean 0.009 0.011 0.013 0.015 0.021
IC Std. 0.069 0.071 0.070 0.070 0.068
Risk-Adjusted IC 0.126 0.156 0.178 0.215 0.304
t-stat(IC) 5.456 6.747 7.714 9.274 13.146
p-value(IC) 0.000 0.000 0.000 0.000 0.000
IC Skew -0.050 0.008 -0.082 -0.196 -0.310
IC Kurtosis 0.209 0.141 0.123 0.298 0.725
<matplotlib.figure.Figure at 0x7efc0a82e050>