Notebook
In [159]:
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import CustomFactor, Returns, AverageDollarVolume
from quantopian.pipeline.classifiers.morningstar import Sector

import math
import datetime
import numpy as np
import pandas as pd
import scipy as sp
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
In [160]:
def run_pipeline_chunks(pipe, start_date, end_date, chunks_len = None):
    """
    Drop-in replacement for run_pipeline.
    run_pipeline fails over a very long period of time (memery usage),
    so we need to split in chunks the pipeline and concatenate the results
    """    
    chunks  = []
    current = pd.Timestamp(start_date)
    end     = pd.Timestamp(end_date)
    step    = pd.Timedelta(weeks=26) if chunks_len is None else chunks_len
    
    while current <= end:
        
        current_end = current + step
        if current_end > end:
            current_end = end
        
        print 'Running pipeline:', current, ' - ', current_end
        results = run_pipeline(pipe, current.strftime("%Y-%m-%d"), current_end.strftime("%Y-%m-%d"))
        chunks.append(results)
        
        # pipeline bug: it returns more days than requested, so get last date from the results
        last_date_returned = results.index.get_level_values(0)[-1].tz_localize(None)
        current = last_date_returned + pd.Timedelta(days=1)
        
        if last_date_returned > current_end:
            print 'pipeline bug it returns more days than requested: last date returned ', last_date_returned

    return pd.concat(chunks)
In [161]:
universe = AverageDollarVolume(window_length=20).top(top_liquid)
      
pipe = Pipeline(
    columns={
        'factor' : Returns(mask=universe, window_length=2),
        'sector' : Sector(mask=universe),
    },
    screen=universe
)

Bug 1: run_pipeline returns more days than requested

In [162]:
start_date  = '2015-06-30'
end_date    = '2015-08-24'
top_liquid  = 3

dummy = run_pipeline_chunks(pipe, start_date, end_date, chunks_len = pd.Timedelta(days=3))
Running pipeline: 2015-06-30 00:00:00  -  2015-07-03 00:00:00
pipeline bug it returns more days than requested: last date returned  2015-07-06 00:00:00
Running pipeline: 2015-07-07 00:00:00  -  2015-07-10 00:00:00
Running pipeline: 2015-07-11 00:00:00  -  2015-07-14 00:00:00
Running pipeline: 2015-07-15 00:00:00  -  2015-07-18 00:00:00
pipeline bug it returns more days than requested: last date returned  2015-07-20 00:00:00
Running pipeline: 2015-07-21 00:00:00  -  2015-07-24 00:00:00
Running pipeline: 2015-07-25 00:00:00  -  2015-07-28 00:00:00
Running pipeline: 2015-07-29 00:00:00  -  2015-08-01 00:00:00
pipeline bug it returns more days than requested: last date returned  2015-08-03 00:00:00
Running pipeline: 2015-08-04 00:00:00  -  2015-08-07 00:00:00
Running pipeline: 2015-08-08 00:00:00  -  2015-08-11 00:00:00
Running pipeline: 2015-08-12 00:00:00  -  2015-08-15 00:00:00
pipeline bug it returns more days than requested: last date returned  2015-08-17 00:00:00
Running pipeline: 2015-08-18 00:00:00  -  2015-08-21 00:00:00
Running pipeline: 2015-08-22 00:00:00  -  2015-08-24 00:00:00

Bug 2: run_pipeline returns Sector code '-1' if ran in certain date ranges

In [163]:
start_date  = '2015-08-30'
end_date    = '2016-05-30'
top_liquid  = 500
In [164]:
p1 = run_pipeline_chunks(pipe, start_date, end_date)
p2 = run_pipeline(pipe, start_date=start_date, end_date=end_date)
Running pipeline: 2015-08-30 00:00:00  -  2016-02-28 00:00:00
pipeline bug it returns more days than requested: last date returned  2016-02-29 00:00:00
Running pipeline: 2016-03-01 00:00:00  -  2016-05-30 00:00:00
pipeline bug it returns more days than requested: last date returned  2016-05-31 00:00:00
In [165]:
print 'mean ', (p1-p2).mean(), 'std ', (p1-p2).std()

print p1[ (p1-p2).any(axis=1) ]
print p2[ (p1-p2).any(axis=1) ]

print len(p1.index.get_level_values(0)), '-', len(p2.index.get_level_values(0))
print 'first', p1.index.get_level_values(0)[0], '-', p2.index.get_level_values(0)[0]
print 'last', p1.index.get_level_values(0)[-1], '-', p2.index.get_level_values(0)[-1]

print 'first', p1.index.get_level_values(1)[0], '-', p2.index.get_level_values(1)[0]
print 'last', p1.index.get_level_values(1)[-1], '-', p2.index.get_level_values(1)[-1]
mean  factor    0.000000
sector   -0.069757
dtype: float64 std  factor    0.000000
sector    2.679579
dtype: float64
                                                factor  sector
2016-03-01 00:00:00+00:00 Equity(40430 [GM]) -0.006581      -1
2016-03-02 00:00:00+00:00 Equity(40430 [GM])  0.019535      -1
2016-03-03 00:00:00+00:00 Equity(40430 [GM])  0.004332      -1
2016-03-04 00:00:00+00:00 Equity(40430 [GM])  0.030524      -1
2016-03-07 00:00:00+00:00 Equity(40430 [GM])  0.009981      -1
2016-03-08 00:00:00+00:00 Equity(40430 [GM])  0.006694      -1
2016-03-09 00:00:00+00:00 Equity(40430 [GM]) -0.028816      -1
2016-03-10 00:00:00+00:00 Equity(40430 [GM])  0.007923      -1
2016-03-11 00:00:00+00:00 Equity(40430 [GM])  0.009499      -1
2016-03-14 00:00:00+00:00 Equity(40430 [GM])  0.013952      -1
2016-03-15 00:00:00+00:00 Equity(40430 [GM]) -0.002560      -1
2016-03-16 00:00:00+00:00 Equity(40430 [GM]) -0.003850      -1
2016-03-17 00:00:00+00:00 Equity(40430 [GM])  0.020934      -1
2016-03-18 00:00:00+00:00 Equity(40430 [GM])  0.007256      -1
2016-03-21 00:00:00+00:00 Equity(40430 [GM])  0.000940      -1
2016-03-22 00:00:00+00:00 Equity(40430 [GM])  0.003755      -1
2016-03-23 00:00:00+00:00 Equity(40430 [GM]) -0.005299      -1
2016-03-24 00:00:00+00:00 Equity(40430 [GM]) -0.019116      -1
2016-03-28 00:00:00+00:00 Equity(40430 [GM]) -0.010863      -1
2016-03-29 00:00:00+00:00 Equity(40430 [GM])  0.001938      -1
2016-03-30 00:00:00+00:00 Equity(40430 [GM])  0.005803      -1
2016-03-31 00:00:00+00:00 Equity(40430 [GM]) -0.006410      -1
2016-04-01 00:00:00+00:00 Equity(40430 [GM])  0.013548      -1
2016-04-04 00:00:00+00:00 Equity(40430 [GM]) -0.030395      -1
2016-04-05 00:00:00+00:00 Equity(40430 [GM]) -0.018546      -1
2016-04-06 00:00:00+00:00 Equity(40430 [GM]) -0.010033      -1
2016-04-07 00:00:00+00:00 Equity(40430 [GM])  0.011486      -1
2016-04-08 00:00:00+00:00 Equity(40430 [GM]) -0.017702      -1
2016-04-11 00:00:00+00:00 Equity(40430 [GM]) -0.001700      -1
2016-04-12 00:00:00+00:00 Equity(40430 [GM])  0.006131      -1
...                                                ...     ...
2016-04-19 00:00:00+00:00 Equity(40430 [GM])  0.024877      -1
2016-04-20 00:00:00+00:00 Equity(40430 [GM])  0.021080      -1
2016-04-21 00:00:00+00:00 Equity(40430 [GM])  0.006850      -1
2016-04-22 00:00:00+00:00 Equity(40430 [GM])  0.014322      -1
2016-04-25 00:00:00+00:00 Equity(40430 [GM]) -0.014395      -1
2016-04-26 00:00:00+00:00 Equity(40430 [GM]) -0.007147      -1
2016-04-27 00:00:00+00:00 Equity(40430 [GM])  0.007825      -1
2016-04-28 00:00:00+00:00 Equity(40430 [GM]) -0.001273      -1
2016-04-29 00:00:00+00:00 Equity(40430 [GM])  0.009049      -1
2016-05-02 00:00:00+00:00 Equity(40430 [GM]) -0.020339      -1
2016-05-03 00:00:00+00:00 Equity(40430 [GM]) -0.001101      -1
2016-05-04 00:00:00+00:00 Equity(40430 [GM]) -0.016218      -1
2016-05-05 00:00:00+00:00 Equity(40430 [GM]) -0.020807      -1
2016-05-06 00:00:00+00:00 Equity(40430 [GM]) -0.001961      -1
2016-05-09 00:00:00+00:00 Equity(40430 [GM])  0.013757      -1
2016-05-10 00:00:00+00:00 Equity(40430 [GM]) -0.005170      -1
2016-05-11 00:00:00+00:00 Equity(40430 [GM])  0.018188      -1
2016-05-12 00:00:00+00:00 Equity(40430 [GM]) -0.008931      -1
2016-05-13 00:00:00+00:00 Equity(40430 [GM])  0.003219      -1
2016-05-16 00:00:00+00:00 Equity(40430 [GM]) -0.021335      -1
2016-05-17 00:00:00+00:00 Equity(40430 [GM])  0.005737      -1
2016-05-18 00:00:00+00:00 Equity(40430 [GM]) -0.001956      -1
2016-05-19 00:00:00+00:00 Equity(40430 [GM])  0.004246      -1
2016-05-20 00:00:00+00:00 Equity(40430 [GM]) -0.013659      -1
2016-05-23 00:00:00+00:00 Equity(40430 [GM])  0.007254      -1
2016-05-24 00:00:00+00:00 Equity(40430 [GM])  0.001637      -1
2016-05-25 00:00:00+00:00 Equity(40430 [GM])  0.014706      -1
2016-05-26 00:00:00+00:00 Equity(40430 [GM])  0.019002      -1
2016-05-27 00:00:00+00:00 Equity(40430 [GM]) -0.010904      -1
2016-05-31 00:00:00+00:00 Equity(40430 [GM])  0.002397      -1

[64 rows x 2 columns]
                                                factor  sector
2016-03-01 00:00:00+00:00 Equity(40430 [GM]) -0.006581     102
2016-03-02 00:00:00+00:00 Equity(40430 [GM])  0.019535     102
2016-03-03 00:00:00+00:00 Equity(40430 [GM])  0.004332     102
2016-03-04 00:00:00+00:00 Equity(40430 [GM])  0.030524     102
2016-03-07 00:00:00+00:00 Equity(40430 [GM])  0.009981     102
2016-03-08 00:00:00+00:00 Equity(40430 [GM])  0.006694     102
2016-03-09 00:00:00+00:00 Equity(40430 [GM]) -0.028816     102
2016-03-10 00:00:00+00:00 Equity(40430 [GM])  0.007923     102
2016-03-11 00:00:00+00:00 Equity(40430 [GM])  0.009499     102
2016-03-14 00:00:00+00:00 Equity(40430 [GM])  0.013952     102
2016-03-15 00:00:00+00:00 Equity(40430 [GM]) -0.002560     102
2016-03-16 00:00:00+00:00 Equity(40430 [GM]) -0.003850     102
2016-03-17 00:00:00+00:00 Equity(40430 [GM])  0.020934     102
2016-03-18 00:00:00+00:00 Equity(40430 [GM])  0.007256     102
2016-03-21 00:00:00+00:00 Equity(40430 [GM])  0.000940     102
2016-03-22 00:00:00+00:00 Equity(40430 [GM])  0.003755     102
2016-03-23 00:00:00+00:00 Equity(40430 [GM]) -0.005299     102
2016-03-24 00:00:00+00:00 Equity(40430 [GM]) -0.019116     102
2016-03-28 00:00:00+00:00 Equity(40430 [GM]) -0.010863     102
2016-03-29 00:00:00+00:00 Equity(40430 [GM])  0.001938     102
2016-03-30 00:00:00+00:00 Equity(40430 [GM])  0.005803     102
2016-03-31 00:00:00+00:00 Equity(40430 [GM]) -0.006410     102
2016-04-01 00:00:00+00:00 Equity(40430 [GM])  0.013548     102
2016-04-04 00:00:00+00:00 Equity(40430 [GM]) -0.030395     102
2016-04-05 00:00:00+00:00 Equity(40430 [GM]) -0.018546     102
2016-04-06 00:00:00+00:00 Equity(40430 [GM]) -0.010033     102
2016-04-07 00:00:00+00:00 Equity(40430 [GM])  0.011486     102
2016-04-08 00:00:00+00:00 Equity(40430 [GM]) -0.017702     102
2016-04-11 00:00:00+00:00 Equity(40430 [GM]) -0.001700     102
2016-04-12 00:00:00+00:00 Equity(40430 [GM])  0.006131     102
...                                                ...     ...
2016-04-19 00:00:00+00:00 Equity(40430 [GM])  0.024877     102
2016-04-20 00:00:00+00:00 Equity(40430 [GM])  0.021080     102
2016-04-21 00:00:00+00:00 Equity(40430 [GM])  0.006850     102
2016-04-22 00:00:00+00:00 Equity(40430 [GM])  0.014322     102
2016-04-25 00:00:00+00:00 Equity(40430 [GM]) -0.014395     102
2016-04-26 00:00:00+00:00 Equity(40430 [GM]) -0.007147     102
2016-04-27 00:00:00+00:00 Equity(40430 [GM])  0.007825     102
2016-04-28 00:00:00+00:00 Equity(40430 [GM]) -0.001273     102
2016-04-29 00:00:00+00:00 Equity(40430 [GM])  0.009049     102
2016-05-02 00:00:00+00:00 Equity(40430 [GM]) -0.020339     102
2016-05-03 00:00:00+00:00 Equity(40430 [GM]) -0.001101     102
2016-05-04 00:00:00+00:00 Equity(40430 [GM]) -0.016218     102
2016-05-05 00:00:00+00:00 Equity(40430 [GM]) -0.020807     102
2016-05-06 00:00:00+00:00 Equity(40430 [GM]) -0.001961     102
2016-05-09 00:00:00+00:00 Equity(40430 [GM])  0.013757     102
2016-05-10 00:00:00+00:00 Equity(40430 [GM]) -0.005170     102
2016-05-11 00:00:00+00:00 Equity(40430 [GM])  0.018188     102
2016-05-12 00:00:00+00:00 Equity(40430 [GM]) -0.008931     102
2016-05-13 00:00:00+00:00 Equity(40430 [GM])  0.003219     102
2016-05-16 00:00:00+00:00 Equity(40430 [GM]) -0.021335     102
2016-05-17 00:00:00+00:00 Equity(40430 [GM])  0.005737     102
2016-05-18 00:00:00+00:00 Equity(40430 [GM]) -0.001956     102
2016-05-19 00:00:00+00:00 Equity(40430 [GM])  0.004246     102
2016-05-20 00:00:00+00:00 Equity(40430 [GM]) -0.013659     102
2016-05-23 00:00:00+00:00 Equity(40430 [GM])  0.007254     102
2016-05-24 00:00:00+00:00 Equity(40430 [GM])  0.001637     102
2016-05-25 00:00:00+00:00 Equity(40430 [GM])  0.014706     102
2016-05-26 00:00:00+00:00 Equity(40430 [GM])  0.019002     102
2016-05-27 00:00:00+00:00 Equity(40430 [GM]) -0.010904     102
2016-05-31 00:00:00+00:00 Equity(40430 [GM])  0.002397     102

[64 rows x 2 columns]
94500 - 94500
first 2015-08-31 00:00:00+00:00 - 2015-08-31 00:00:00+00:00
last 2016-05-31 00:00:00+00:00 - 2016-05-31 00:00:00+00:00
first Equity(2 [AA]) - Equity(2 [AA])
last Equity(49506 [HPE]) - Equity(49506 [HPE])