Notebook
In [199]:
import math
import numpy as np
import pandas as pd
import scipy as sp

fundamentals = init_fundamentals()

fundamental_df = get_fundamentals(query(fundamentals.valuation.market_cap)
        .filter(fundamentals.valuation.market_cap != None)
        .filter(fundamentals.company_reference.primary_exchange_id != "OTCPK") # no pink sheets
        .filter(fundamentals.share_class_reference.security_type == 'ST00000001') # common stock only
        .filter(~fundamentals.share_class_reference.symbol.contains('_WI')) # drop when-issued
        .filter(fundamentals.share_class_reference.is_primary_share == True) # remove ancillary classes
        .filter(fundamentals.share_class_reference.is_depositary_receipt == False) # !ADR/GDR
        .order_by(fundamentals.valuation.market_cap.desc()), '2012-01-01'
        ).T
stocks = fundamental_df[0:25].index

prices = get_pricing(stocks,start_date='2009-06-01',end_date = '2012-12-31',fields='price',frequency='daily')
prices = prices.dropna(axis=1)
In [208]:
import numpy as np
import scipy as sp
import pandas as pd
from sklearn.covariance import EmpiricalCovariance

mu = []
sig = []
BlockSize = 4 * np.shape(prices.values)[1]

insample = prices.values[:-60, :]

if np.shape(insample)[0] // BlockSize < 15:
    print np.shape(insample)[0]
    print BlockSize * 15
    print "ERROR - insufficient data"

for i in range(0, np.shape(insample)[0], BlockSize):
    x = insample[i:i+BlockSize, :]
    mu.append(np.mean(x, axis=0))
    sig.append(EmpiricalCovariance().fit(x).covariance_)
    
mu = np.asarray(mu)
sig = np.asarray(sig)

Mu = np.mean(mu, axis=0)
N = np.shape(sig)[0]
Sig = np.zeros(np.shape(sig[0, :, :]))

for i in range(0, N):
    Sig += sig[i]

Sig /= N * 1.
invSig = np.linalg.inv(Sig)
S = np.zeros(np.shape(Sig))

for i in range(0, N):
    S = S + np.outer(mu[i], mu[i]) + 2 * np.dot(sig[i],np.dot(invSig, sig[i]))
S = S / N - np.outer(Mu,Mu) - 2 * Sig;
phi,lambdas = sp.linalg.eig(S, Sig)
idx = phi.argsort()
stat = lambdas[:, idx[0]]
844
1500
ERROR - insufficient data
In [215]:
import matplotlib.pyplot as plt
from statsmodels.tsa import stattools as st
insampleportfolio = np.dot(prices.values, stat)
plt.plot(insampleportfolio, color='b')
outofsampleportfolio = np.dot(prices.values[:-60, :], stat)
plt.plot(outofsampleportfolio, color='r')

print st.adfuller(outofsampleportfolio[-90:])
(-3.1563111406134854, 0.02265697745221611, 1, 88, {'5%': -2.8949898192148762, '1%': -3.5069444018242861, '10%': -2.5846145506198348}, 30.973979214006079)
In [ ]: