Notebook
In [11]:
#Imports
from quantopian.pipeline import CustomFactor, Pipeline
from quantopian.research import run_pipeline
from quantopian.pipeline.data import morningstar
from quantopian.pipeline.data.builtin import USEquityPricing

from quantopian.pipeline.factors import Returns, RollingLinearRegressionOfReturns
from quantopian.pipeline.factors import VWAP, AverageDollarVolume, SimpleMovingAverage

from quantopian.pipeline.classifiers.morningstar import Sector
from quantopian.pipeline.filters.morningstar import IsPrimaryShare
from quantopian.pipeline.factors import Factor

import numpy as np
from sklearn.svm import SVR
import matplotlib.pyplot as plt
In [12]:
#Custom Factors
        
class AverageDailyReturns(CustomFactor):
    # Default inputs
    inputs = [USEquityPricing.close]
    # Compute average daily returns
    def compute(self, today, assets, out, close):
        close = close[6:]
        percent_change = numpy.asarray([td / tm -1 for td, tm in zip(close[1:], close)])
        average_change = numpy.mean(percent_change.T, axis=1)
        out[:] = average_change
In [17]:
#Custom Pipeline

def make_pipeline():
    
    """
    9 filters:
        1. common stock
        2 & 3. not limited partnership - name and database check
        4. database has fundamental data
        5. not over the counter
        6. not when issued
        7. not depository receipts
        8. primary share
    """
    common_stock = morningstar.share_class_reference.security_type.latest.eq('ST00000001')
    not_lp_name = ~morningstar.company_reference.standard_name.latest.matches('.* L[\\. ]?P\.?$')
    not_lp_balance_sheet = morningstar.balance_sheet.limited_partnership.latest.isnull()
    have_data = morningstar.valuation.market_cap.latest.notnull()
    not_otc = ~morningstar.share_class_reference.exchange_id.latest.startswith('OTC')
    not_wi = ~morningstar.share_class_reference.symbol.latest.endswith('.WI')
    not_depository = ~morningstar.share_class_reference.is_depositary_receipt.latest
    primary_share = IsPrimaryShare()

    # Combine the above filters.
    tradable_filter = (common_stock & not_lp_name & not_lp_balance_sheet & have_data
                           & not_otc & not_wi & not_depository & primary_share)

    high_volume_tradable = (AverageDollarVolume(window_length=21,
            mask=tradable_filter).percentile_between(99, 100))


    return Pipeline(
        columns={
            'Average Daily Returns' : AverageDailyReturns(window_length=30),
            'Monthly Return' : Returns(window_length=7),
        }
        ,screen=(tradable_filter)
    )
In [20]:
my_pipe = make_pipeline()
data = run_pipeline(my_pipe, '2016-08-08', '2016-08-08').dropna()
print 'Number of securities that passed the filter: %d' % len(data)
inputs = data.values.T[:1].T
outputs = data.values.T[1]
#data.dropna().T['2015-05-05 00:00:00+00:00'].T['Market Cap']
print inputs
print outputs

data
Number of securities that passed the filter: 3849
[[ 0.00578133]
 [ 0.00572199]
 [ 0.00176213]
 ..., 
 [ 0.00391306]
 [ 0.0020918 ]
 [ 0.00178416]]
[-0.01444902  0.03557728 -0.00784393 ...,  0.04918033 -0.02203857
  0.00305882]
Out[20]:
Average Daily Returns Monthly Return
2016-08-08 00:00:00+00:00 Equity(2 [AA]) 0.005781 -0.014449
Equity(24 [AAPL]) 0.005722 0.035577
Equity(31 [ABAX]) 0.001762 -0.007844
Equity(41 [ARCB]) 0.007770 0.042363
Equity(52 [ABM]) 0.000967 0.001074
Equity(53 [ABMD]) 0.004019 0.045391
Equity(62 [ABT]) 0.006295 0.018227
Equity(67 [ADSK]) 0.005180 -0.002019
Equity(69 [ACAT]) -0.002761 -0.146061
Equity(76 [TAP]) -0.000364 0.013504
Equity(84 [ACET]) 0.005160 -0.006696
Equity(110 [ACXM]) 0.006102 0.092572
Equity(114 [ADBE]) 0.001980 0.006300
Equity(117 [AEY]) 0.002816 -0.035354
Equity(122 [ADI]) 0.006180 -0.008407
Equity(128 [ADM]) 0.002592 -0.011346
Equity(161 [AEP]) -0.001836 -0.015670
Equity(162 [AEPI]) -0.002265 -0.055695
Equity(166 [AES]) 0.000547 -0.001634
Equity(168 [AET]) 0.000019 0.029016
Equity(185 [AFL]) 0.000912 -0.003547
Equity(197 [AGCO]) 0.002199 0.010008
Equity(216 [HES]) -0.001467 0.047154
Equity(225 [AHPI]) 0.002520 -0.046875
Equity(239 [AIG]) 0.006388 0.078383
Equity(247 [AIN]) 0.004304 0.016057
Equity(253 [AIR]) 0.003747 0.021366
Equity(266 [AJG]) 0.001591 -0.002226
Equity(270 [AKRX]) 0.002914 -0.091269
Equity(283 [ALCO]) -0.002101 -0.048016
... ... ...
Equity(49885 [LMCA]) 0.006224 0.017658
Equity(49891 [MGP]) -0.000118 -0.010448
Equity(49894 [ARA]) -0.004719 -0.049146
Equity(49895 [QHC]) -0.003952 -0.096627
Equity(49896 [SCWX]) 0.001500 -0.038848
Equity(49908 [RRR]) 0.001926 -0.008293
Equity(49920 [GWRS]) -0.006627 -0.089180
Equity(49925 [NGVT]) 0.010348 0.130344
Equity(49934 [NTLA]) 0.001681 0.063191
Equity(49938 [SBPH]) 0.007835 0.190450
Equity(49954 [TPB]) 0.002920 -0.021583
Equity(49959 [SITE]) 0.002248 -0.034917
Equity(49989 [MSBI]) 0.001506 0.015801
Equity(49993 [CLSD]) -0.004029 -0.052067
Equity(49995 [RETA]) -0.004065 -0.008663
Equity(50002 [COTV]) 0.005745 0.004593
Equity(50003 [USFD]) -0.000787 -0.021461
Equity(50004 [GMS]) 0.003435 -0.069521
Equity(50005 [ZDGE]) -0.002889 0.017157
Equity(50013 [MBRX]) -0.001647 -0.056259
Equity(50015 [NH]) -0.012379 -0.007874
Equity(50040 [ATKR]) 0.000775 0.053266
Equity(50041 [VIVE]) 0.021926 0.226277
Equity(50049 [FTV]) 0.001644 0.037775
Equity(50068 [SELB]) -0.006860 0.108298
Equity(50077 [TWLO]) 0.009725 -0.014031
Equity(50091 [SYRS]) -0.024064 -0.154519
Equity(50096 [HONE]) 0.003913 0.049180
Equity(50099 [GMRE]) 0.002092 -0.022039
Equity(50103 [RNDB]) 0.001784 0.003059

3849 rows × 2 columns

In [19]:
X = inputs #np.sort(5 * np.random.rand(40, 1), axis=0)
y = outputs #np.sin(X).ravel()
print X
print y

# Fit regression model
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
svr_lin = SVR(kernel='linear', C=1e3)
#svr_poly = SVR(kernel='poly', C=1e3, degree=2)

svr_rbf.fit(X, y)
svr_lin.fit(X, y)
#svr_poly.fit(X, y)

y_rbf = svr_rbf.predict(X)
y_lin = svr_lin.predict(X)
#y_poly = svr_poly.predict(X)


# look at the results
plt.scatter(X, y, c='k', label='data')
plt.hold('on')
plt.plot(X, y_rbf, c='g', label='RBF model')
plt.plot(X, y_lin, c='r', label='Linear model')
#plt.plot(X, y_poly, c='b', label='Polynomial model')
plt.xlabel('data')
plt.ylabel('target')
plt.title('Support Vector Regression')
plt.legend()
plt.show()
print 'Built-in Correlation: ' + str(numpy.corrcoef(y_rbf, y)[0, 1])
[[ 0.00578133]
 [ 0.00572199]
 [ 0.00176213]
 ..., 
 [ 0.00391306]
 [ 0.0020918 ]
 [ 0.00178416]]
[-0.01444902  0.03557728 -0.00784393 ...,  0.04918033 -0.02203857
  0.00305882]
Built-in Correlation: 0.556510325603