Notebook
In [60]:
import math
import numpy as np
import pandas as pd
import scipy as sp

'''User defined variables'''
N = 25 #Number of stocks in universe - Max 25 because of memmory limitations in Quantopian
K = 10 #Number of mean reverting stocks to select from universe.
'''End of user defined variables'''

fundamentals = init_fundamentals()

# At the moment we select universe based on sector code
# Ideally we should run a clustering algorithm on covariance/correlation to identify "similar" stocks

fundamental_df = get_fundamentals(query(fundamentals.valuation.market_cap)
        .filter(fundamentals.valuation.market_cap != None)
        .filter(fundamentals.asset_classification.morningstar_sector_code == 309)                          
        .filter(fundamentals.company_reference.primary_exchange_id != "OTCPK") # no pink sheets
        .filter(fundamentals.share_class_reference.security_type == 'ST00000001') # common stock only
        .filter(~fundamentals.share_class_reference.symbol.contains('_WI')) # drop when-issued
        .filter(fundamentals.share_class_reference.is_primary_share == True) # remove ancillary classes
        .filter(fundamentals.share_class_reference.is_depositary_receipt == False) # !ADR/GDR
        .order_by(fundamentals.valuation.market_cap.desc()), '2011-01-01'
        ).T
stocks = fundamental_df[0:N].index

prices = get_pricing(stocks,start_date='2009-09-01',end_date = '2010-12-1',fields='price',frequency='daily')
prices = prices.dropna(axis=1)
In [61]:
from cvxopt import matrix, spmatrix
from cvxopt.blas import dot
from cvxopt.solvers import sdp, conelp
from cvxopt import solvers

def getH(n, k, B):
    H = np.zeros((3*n*n+2, 1))
    return H
    
def getG(n, k, B):
    # we have n x n variables in Y
    # we introduct n x n new variables for L1 norm constraint
    # we have one more variable for z >= 0
    
    G = np.zeros((3*n*n+2, 2*n*n+1)) 

    # contraints for > 0 for auxillary variables
    for i in range(0, n*n):
        G[i, n*n+i] = -1.0
    
    # constraints for inequalities
    for i in range(0,n*n):
        G[n*n+i,i] = 1.0
        G[n*n+i,n*n+i] = -1.0
                
    # constraints for inequalities    
    for i in range(0, n*n):
        G[2*n*n+i,i] = -1.0
        G[2*n*n+i,n*n+i] = -1.0

    G[-2, n*n:2*n*n] = 1.
    G[-2, -1] = -k
    
    # constaints for z
    G[-1, -1] = -1.0
    return G

def getGs(n):
    A = np.zeros((n, n))
    Gs = np.zeros((2*n*n+1, n*n))
    
    for i in range(0, n*n):
        I = np.copy(A)
        I = np.asarray(np.ravel(I).tolist())
        I[i] = -1.0
        Gs[i,:] = I
    return Gs.T

def getHs(n):
    return np.zeros((n,n))
In [62]:
from sklearn.linear_model import MultiTaskLasso, LinearRegression
import statsmodels.api as smapi
from sklearn.decomposition import PCA

solvers.options['maxiters'] = 100
hist = prices.values[:-60, :]
Yt = hist[8:, :]
Yt_1 = hist[7:-1, :]
Yt_2 = hist[6:-2, :]
Yt_3 = hist[5:-3, :]
Yt_4 = hist[4:-4, :]
Yt_5 = hist[3:-5, :]
Yt_6 = hist[2:-6, :]
Yt_7 = hist[1:-7, :]
Yt_8 = hist[:-8, :]
X = PCA(.9).fit_transform(np.diff(Yt_1, axis=0))
X = np.hstack((X, np.diff(Yt_1, axis=0), 
               np.diff(Yt_2, axis=0), np.diff(Yt_3, axis=0), 
               np.diff(Yt_4, axis=0), np.diff(Yt_5, axis=0),
               np.diff(Yt_6, axis=0), np.diff(Yt_7, axis=0),
               np.diff(Yt_8, axis=0)))
R1 = LinearRegression(fit_intercept=False).fit(X, Yt[1:, :]).predict(X) - Yt[1:, :]
R2 = LinearRegression().fit(X, Yt_1[1:, :]).predict(X) - Yt_1[1:, :]
reg = LinearRegression(fit_intercept=False).fit(R2, R1)
Als = reg.coef_
R = reg.predict(R2)
B = np.dot(R.T, R) / np.shape(R)[0]

M = np.dot(Als, B)
M = np.dot(M, Als.T)
print np.shape(M)
(n,n) = np.shape(M)
c = np.ravel(M.T).tolist() # constants for Y i.e. n x n
c = c + ([0.] * (n*n))     # constants for new variables W
c.append(0.)               # constant for z
c = np.asarray(c) 

a1 = np.ravel(np.eye(n)).tolist() # for Y
a1 = a1 + ([0] * (n*n))           # for W
a1.append(-1.0)                   # for z

a2 = np.ravel(B.T).tolist()       # for Y
a2 = a2 + ([0] * (n*n+1))         # for W and z


Alist = [np.asarray(a1), np.asarray(a2)]

Buff = ([0] * (n*n))
Buff = matrix(Buff, (n,n))

# condition for symmetry
for i in range(0, n):
    for j in range(0, n):
        if i <> j and Buff[i,j] == 0:
            a = ([0] * (n*n))
            m = matrix(a, (n,n))
            Buff[i,j] = 1
            Buff[j,i] = 1
            m[i,j] = 1
            m[j,i] = -1
            a = np.ravel(m).tolist()
            a = a + ([0] * (n*n))
            a.append(0)
            Alist.append(np.asarray(a))
            
bm = [0., 1.]            

Buff = ([0] * (n*n))
Buff = matrix(Buff, (n,n))

for i in range(0, n):
    for j in range(0, n):
        if i <> j and Buff[i,j] == 0:
            bm.append(0)
            Buff[i,j] = 1
            Buff[j,i] = 1
            
A = np.matrix(Alist)
H = getH(n, K, B)
G = getG(n, K, B) 
Gs = getGs(n)
Hs = getHs(n)

#CVXOPT begins
cm = matrix(c, (len(c), 1))
hm = matrix(H)
Gm = matrix(G)
Gs = [matrix(Gs)]
hs = [matrix(Hs)]
Am = matrix(A)
bm = matrix(bm)        
sol = sdp(c=cm,Gl=Gm,hl=hm,Gs=Gs, hs=hs, A=Am,b=bm)
print sol['status']
(25, 25)
     pcost       dcost       gap    pres   dres   k/t
 0:  1.5017e+00  1.5017e+00  1e+04  4e+01  3e+00  1e+00
 1: -2.1732e-01  1.0196e+00  4e+04  7e+01  5e+00  3e+00
 2: -1.2621e+03 -5.5897e+01  2e+08  5e+03  3e+02  1e+03
 3: -7.0872e+01 -1.8915e+00  2e+05  1e+02  7e+00  7e+01
 4: -1.6787e+01 -4.1225e+00  5e+04  4e+01  3e+00  1e+01
 5: -3.3887e+00 -1.8117e+00  7e+03  1e+01  8e-01  2e+00
 6: -1.1007e+00 -4.6163e-01  8e+02  3e+00  2e-01  7e-01
 7:  3.4931e-02  1.5233e-01  9e+01  6e-01  4e-02  1e-01
 8:  3.8247e-01  4.4018e-01  2e+01  2e-01  1e-02  6e-02
 9:  3.8817e-01  4.2524e-01  1e+01  1e-01  7e-03  4e-02
10:  3.3779e-01  3.6263e-01  1e+01  7e-02  5e-03  3e-02
11:  2.8413e-01  2.9579e-01  7e+00  3e-02  2e-03  1e-02
12:  2.4573e-01  2.5214e-01  7e+00  2e-02  2e-03  7e-03
13:  1.9499e-01  1.9834e-01  4e+00  1e-02  8e-04  4e-03
14:  1.5650e-01  1.5890e-01  3e+00  7e-03  5e-04  3e-03
15:  1.3073e-01  1.3254e-01  3e+00  5e-03  4e-04  2e-03
16:  1.0906e-01  1.1024e-01  2e+00  3e-03  2e-04  1e-03
17:  9.4027e-02  9.4691e-02  1e+00  2e-03  1e-04  7e-04
18:  6.6321e-02  6.6722e-02  8e-01  9e-04  7e-05  4e-04
19:  5.5935e-02  5.6272e-02  8e-01  8e-04  6e-05  4e-04
20:  3.8973e-02  3.9186e-02  5e-01  4e-04  3e-05  2e-04
21:  3.7107e-02  3.7297e-02  5e-01  4e-04  3e-05  2e-04
22:  3.1309e-02  3.1467e-02  4e-01  3e-04  2e-05  2e-04
23:  2.0513e-02  2.0588e-02  2e-01  1e-04  1e-05  8e-05
24:  1.9862e-02  1.9928e-02  2e-01  1e-04  8e-06  7e-05
25:  1.4355e-02  1.4402e-02  1e-01  7e-05  5e-06  5e-05
26:  1.0073e-02  1.0101e-02  6e-02  3e-05  2e-06  3e-05
27:  1.0022e-02  1.0046e-02  5e-02  3e-05  2e-06  2e-05
28:  7.9564e-03  7.9721e-03  3e-02  2e-05  1e-06  2e-05
29:  6.2894e-03  6.2991e-03  2e-02  8e-06  6e-07  1e-05
30:  6.1225e-03  6.1328e-03  2e-02  8e-06  6e-07  1e-05
31:  4.9734e-03  4.9830e-03  2e-02  6e-06  5e-07  1e-05
32:  4.9081e-03  4.9173e-03  2e-02  6e-06  4e-07  9e-06
33:  3.9419e-03  3.9494e-03  2e-02  4e-06  3e-07  8e-06
34:  3.7809e-03  3.7878e-03  2e-02  4e-06  3e-07  7e-06
35:  3.0269e-03  3.0307e-03  1e-02  2e-06  1e-07  4e-06
36:  2.8213e-03  2.8247e-03  9e-03  1e-06  1e-07  4e-06
37:  2.5213e-03  2.5235e-03  6e-03  9e-07  6e-08  2e-06
38:  2.4103e-03  2.4122e-03  5e-03  8e-07  5e-08  2e-06
39:  2.1291e-03  2.1306e-03  4e-03  5e-07  4e-08  2e-06
40:  1.6846e-03  1.6857e-03  3e-03  3e-07  2e-08  1e-06
41:  1.6658e-03  1.6668e-03  2e-03  3e-07  2e-08  1e-06
42:  1.6148e-03  1.6156e-03  2e-03  2e-07  2e-08  9e-07
43:  1.4608e-03  1.4615e-03  2e-03  2e-07  1e-08  7e-07
44:  1.2978e-03  1.2982e-03  1e-03  1e-07  7e-09  4e-07
45:  1.3072e-03  1.3075e-03  8e-04  8e-08  6e-09  4e-07
46:  1.1927e-03  1.1929e-03  4e-04  4e-08  3e-09  2e-07
47:  1.1605e-03  1.1606e-03  3e-04  3e-08  2e-09  1e-07
48:  1.1358e-03  1.1359e-03  2e-04  2e-08  1e-09  1e-07
49:  1.1152e-03  1.1153e-03  1e-04  1e-08  9e-10  7e-08
50:  1.0867e-03  1.0867e-03  4e-05  4e-09  3e-10  2e-08
51:  1.0782e-03  1.0782e-03  1e-05  1e-09  7e-11  6e-09
52:  1.0755e-03  1.0755e-03  2e-06  1e-09  1e-11  1e-09
53:  1.0750e-03  1.0750e-03  5e-07  1e-07  3e-12  3e-10
54:  1.0749e-03  1.0749e-03  2e-07  9e-07  4e-12  1e-10
55:  1.0749e-03  1.0749e-03  7e-08  4e-06  1e-11  3e-11
56:  1.0749e-03  1.0749e-03  2e-08  8e-05  3e-10  8e-12
57:  1.0749e-03  1.0749e-03  3e-09  2e-03  1e-08  1e-12
58:  1.0749e-03  1.0733e-03  3e-10  7e-02  6e-07  2e-13
59:  1.0749e-03  1.0757e-03  2e-11  3e-01  6e-07  1e-14
60:  1.0749e-03  5.8552e-04  4e-12  5e+01  3e-04  2e-15
61:  1.0749e-03  2.0017e-04  3e-12  6e+01  4e-04  1e-15
62:  1.0749e-03  1.2929e-04  3e-12  8e+01  4e-04  1e-15
63:  1.0749e-03  6.7678e-04  3e-12  1e+02  2e-04  1e-15
64:  1.0749e-03  4.2956e-04  3e-12  1e+02  3e-04  1e-15
65:  1.0749e-03 -4.8434e-04  3e-12  1e+02  6e-04  1e-15
66:  1.0749e-03 -7.4556e-04  3e-12  1e+02  7e-04  1e-15
67:  1.0749e-03 -7.8526e-04  3e-12  1e+02  7e-04  1e-15
68:  1.0749e-03 -1.0390e-03  3e-12  2e+02  8e-04  1e-15
69:  1.0749e-03 -5.0928e-04  3e-12  2e+02  7e-04  1e-15
70:  1.0749e-03 -5.1095e-04  3e-12  2e+02  6e-04  1e-15
71:  1.0749e-03 -4.9816e-04  3e-12  2e+02  6e-04  1e-15
72:  1.0749e-03  1.5017e-03  3e-12  2e+02  3e-04  1e-15
73:  1.0749e-03  1.4919e-03  3e-12  2e+02  3e-04  1e-15
74:  1.0749e-03  3.0407e-03  3e-12  2e+02  1e-03  1e-15
75:  1.0749e-03  2.4707e-03  3e-12  3e+02  6e-04  1e-15
76:  1.0749e-03  2.2968e-03  4e-12  4e+02  1e-03  1e-15
77:  1.0749e-03  4.5406e-03  4e-12  4e+02  2e-03  1e-15
78:  1.0749e-03  4.7243e-03  5e-12  4e+02  2e-03  1e-15
79:  1.0749e-03  4.8245e-03  5e-12  4e+02  2e-03  1e-15
80:  1.0749e-03  5.2595e-03  5e-12  4e+02  2e-03  1e-15
81:  1.0749e-03  5.3353e-03  5e-12  4e+02  2e-03  1e-15
82:  1.0749e-03  5.4359e-03  5e-12  4e+02  2e-03  1e-15
83:  1.0749e-03  5.4441e-03  5e-12  4e+02  2e-03  1e-15
84:  1.0749e-03  5.4285e-03  5e-12  4e+02  2e-03  1e-15
85:  1.0749e-03  5.3337e-03  5e-12  4e+02  2e-03  1e-15
86:  1.0749e-03  5.3335e-03  5e-12  4e+02  2e-03  1e-15
87:  1.0749e-03  5.5941e-03  5e-12  4e+02  2e-03  1e-15
88:  1.0749e-03  5.5560e-03  5e-12  4e+02  2e-03  1e-15
89:  1.0749e-03  5.5092e-03  5e-12  4e+02  2e-03  1e-15
90:  1.0749e-03  5.4390e-03  5e-12  4e+02  2e-03  1e-15
91:  1.0749e-03  5.6819e-03  5e-12  4e+02  2e-03  1e-15
92:  1.0749e-03  6.1540e-03  5e-12  4e+02  3e-03  1e-15
93:  1.0749e-03  5.6272e-03  6e-12  3e+02  2e-03  1e-15
94:  1.0749e-03  8.2999e-03  7e-12  4e+02  3e-03  1e-15
95:  1.0749e-03  1.1237e-02  9e-12  4e+02  4e-03  1e-15
96:  1.0749e-03  1.1932e-02  9e-12  4e+02  4e-03  1e-15
97:  1.0749e-03  1.1850e-02  9e-12  4e+02  4e-03  1e-15
98:  1.0749e-03  9.7114e-03  1e-11  5e+02  4e-03  1e-15
99:  1.0749e-03  1.1588e-02  1e-11  5e+02  5e-03  1e-15
100:  1.0749e-03  8.6523e-03  1e-11  5e+02  3e-03  1e-15
Terminated (maximum number of iterations reached).
unknown
In [63]:
import scipy as sp
Y = matrix(sol['x'][0:(n*n)],(n,n))
z = sol['x'][-1]
evalue, evec =  sp.linalg.eig(Y)
idx = np.abs(evalue).argsort()[::-1]
W = np.ravel(evec[:, idx[0]].real)
In [64]:
import matplotlib.pyplot as plt
pvalue = np.dot(prices, W)
plt.plot(pvalue, color='b')
plt.plot(pvalue[:-60], color='r')
Out[64]:
[<matplotlib.lines.Line2D at 0x7f43616ec8d0>]