Notebook
In [46]:
import quantopian.research as qr
import numpy as np
import matplotlib.pyplot as plt
import cvxopt as opt
from cvxopt import blas, solvers
import pandas as pd
In [54]:
#Specify the assets and the timeframe
stocks = ['AAPL','MSFT','AMZN','JNJ', 'XOM','IBM','JPM','WFC','BAC','GE',
         'WMT','PG','BERK','ORCL','PFE','CVX','KO','CMCSA','C','HD','UNH','VZ','DIS',
         'AGG','EFA','GLD','VNQ']
start_dt = '2007-09-01'
end_dt = '2017-09-01'
In [55]:
#Get Prices
price_data = qr.prices(stocks,start_dt,end_dt,frequency='daily',price_field='price')
price_data.head()
Out[55]:
Equity(24 [AAPL]) Equity(5061 [MSFT]) Equity(16841 [AMZN]) Equity(4151 [JNJ]) Equity(8347 [XOM]) Equity(3766 [IBM]) Equity(25006 [JPM]) Equity(8151 [WFC]) Equity(700 [BAC]) Equity(3149 [GE]) ... Equity(1637 [CMCS_A]) Equity(1335 [C]) Equity(3496 [HD]) Equity(7792 [UNH]) Equity(21839 [VZ]) Equity(2190 [DIS]) Equity(25485 [AGG]) Equity(22972 [EFA]) Equity(26807 [GLD]) Equity(26669 [VNQ])
2007-09-04 00:00:00+00:00 18.529 22.498 82.75 45.970 66.416 93.527 35.794 28.207 43.015 27.198 ... 11.009 429.084 27.830 44.746 24.173 29.692 78.800 61.170 67.50 47.417
2007-09-05 00:00:00+00:00 17.581 22.232 83.65 45.688 66.401 93.337 34.846 27.621 42.572 26.975 ... 10.929 417.993 27.991 43.935 23.714 29.588 79.117 60.119 67.54 46.426
2007-09-06 00:00:00+00:00 17.351 22.591 86.22 45.681 66.584 93.084 34.925 27.467 42.452 27.484 ... 10.882 414.993 27.093 44.193 23.935 29.962 79.022 60.498 68.90 46.689
2007-09-07 00:00:00+00:00 16.941 22.217 84.53 45.718 65.206 91.453 34.372 27.113 41.787 27.010 ... 10.619 413.448 26.265 43.810 23.375 29.231 79.514 59.633 69.39 45.745
2007-09-10 00:00:00+00:00 17.573 22.248 83.36 45.807 64.590 91.643 34.657 27.036 41.727 27.260 ... 10.666 411.175 25.912 43.792 23.347 29.179 79.705 59.425 69.60 45.321

5 rows × 27 columns

In [56]:
#Get Returns
return_data = qr.returns(stocks,start_dt,end_dt,periods=1,frequency='daily',price_field='price')
return_data.head()
Out[56]:
Equity(24 [AAPL]) Equity(5061 [MSFT]) Equity(16841 [AMZN]) Equity(4151 [JNJ]) Equity(8347 [XOM]) Equity(3766 [IBM]) Equity(25006 [JPM]) Equity(8151 [WFC]) Equity(700 [BAC]) Equity(3149 [GE]) ... Equity(1637 [CMCS_A]) Equity(1335 [C]) Equity(3496 [HD]) Equity(7792 [UNH]) Equity(21839 [VZ]) Equity(2190 [DIS]) Equity(25485 [AGG]) Equity(22972 [EFA]) Equity(26807 [GLD]) Equity(26669 [VNQ])
2007-09-04 00:00:00+00:00 0.041189 0.004196 0.036059 0.005842 0.021172 0.013118 0.019801 0.002737 0.008085 0.003876 ... -0.005421 0.009194 -0.054206 0.006003 0.021509 0.017267 -0.005214 0.007892 0.013970 0.012967
2007-09-05 00:00:00+00:00 -0.051163 -0.011823 0.010876 -0.006134 -0.000226 -0.002031 -0.026485 -0.020775 -0.010299 -0.008199 ... -0.007267 -0.025848 0.005785 -0.018125 -0.018988 -0.003503 0.004023 -0.017182 0.000593 -0.020900
2007-09-06 00:00:00+00:00 -0.013082 0.016148 0.030723 -0.000153 0.002756 -0.002711 0.002267 -0.005575 -0.002819 0.018869 ... -0.004300 -0.007177 -0.032082 0.005872 0.009319 0.012640 -0.001201 0.006304 0.020136 0.005665
2007-09-07 00:00:00+00:00 -0.023630 -0.016555 -0.019601 0.000810 -0.020696 -0.017522 -0.015834 -0.012888 -0.015665 -0.017246 ... -0.024168 -0.003723 -0.030561 -0.008667 -0.023397 -0.024398 0.006226 -0.014298 0.007112 -0.020219
2007-09-10 00:00:00+00:00 0.037306 0.001395 -0.013841 0.001947 -0.009447 0.002078 0.008292 -0.002840 -0.001436 0.009256 ... 0.004426 -0.005498 -0.013440 -0.000411 -0.001198 -0.001779 0.002402 -0.003488 0.003026 -0.009269

5 rows × 27 columns

In [57]:
return_vector = return_data.T.as_matrix()
return_vector
Out[57]:
array([[ 0.04118903, -0.05116304, -0.0130823 , ...,  0.00251657,
         0.00391845,  0.00042691],
       [ 0.00419568, -0.01182327,  0.01614789, ...,  0.01286614,
         0.01054054, -0.0115004 ],
       [ 0.0360586 ,  0.01087613,  0.03072325, ...,  0.01415005,
         0.01353921, -0.00243713],
       ..., 
       [ 0.00789244, -0.01718162,  0.00630416, ..., -0.00135379,
         0.00753125,  0.00164449],
       [ 0.01397026,  0.00059259,  0.02013622, ..., -0.00049031,
         0.01199045,  0.00174825],
       [ 0.01296731, -0.02089968,  0.00566493, ...,  0.0056572 ,
         0.005386  ,  0.00369048]])
In [58]:
plt.plot(return_vector.T, alpha=.4);
plt.xlabel('time')
plt.ylabel('returns')
Out[58]:
<matplotlib.text.Text at 0x7f58ea10b810>
In [59]:
#Functions for generating random weights
def rand_weights(n):
    k = np.random.rand(n)
    return k / sum(k)
In [60]:
def random_portfolio(returns):

    p = np.asmatrix(np.mean(returns, axis=1))
    w = np.asmatrix(rand_weights(returns.shape[0]))
    C = np.asmatrix(np.cov(returns))
    
    mu = w * p.T
    sigma = np.sqrt(w * C * w.T)
    
    # This recursion reduces outliers to keep plots pretty
    if sigma > 2:
        return random_portfolio(returns)
    return mu, sigma
In [61]:
n_portfolios = 5000
means, stds = np.column_stack([
    random_portfolio(return_vector) 
    for _ in xrange(n_portfolios)
])
In [62]:
plt.plot(stds, means, 'o', markersize=5)
plt.xlabel('Standard Deviation of Returns')
plt.ylabel('Average Return')
plt.title('Risk and Reward of Randomly Generated Portfolios')
Out[62]:
<matplotlib.text.Text at 0x7f58e9e5b390>
In [37]:
def optimal_portfolio(returns):
    n = len(returns)
    returns = np.asmatrix(returns)
    
    N = 100
    mus = [10**(5.0 * t/N - 1.0) for t in range(N)]
    
    # Convert to cvxopt matrices
    S = opt.matrix(np.cov(returns))
    pbar = opt.matrix(np.mean(returns, axis=1))
    
    # Create constraint matrices
    G = -opt.matrix(np.eye(n))   # negative n x n identity matrix
    h = opt.matrix(0.0, (n ,1))
    A = opt.matrix(1.0, (1, n))
    b = opt.matrix(1.0)
    
    # Calculate efficient frontier weights using quadratic programming
    portfolios = [solvers.qp(mu*S, -pbar, G, h, A, b)['x'] 
                  for mu in mus]
    ## CALCULATE RISKS AND RETURNS FOR FRONTIER
    returns = [blas.dot(pbar, x) for x in portfolios]
    risks = [np.sqrt(blas.dot(x, S*x)) for x in portfolios]
    ## CALCULATE THE 2ND DEGREE POLYNOMIAL OF THE FRONTIER CURVE
    m1 = np.polyfit(returns, risks, 2)
    x1 = np.sqrt(m1[2] / m1[0])
    # CALCULATE THE OPTIMAL PORTFOLIO
    wt = solvers.qp(opt.matrix(x1 * S), -pbar, G, h, A, b)['x']
    return np.asarray(wt), returns, risks

weights, returns, risks = optimal_portfolio(return_vector)

plt.plot(stds, means, 'o')
plt.xlabel('Standard Deviation of Returns')
plt.ylabel('Average Return')
plt.plot(risks, returns, 'y-o')

ValueErrorTraceback (most recent call last)
<ipython-input-37-cfa9cd216bb5> in <module>()
     29     return np.asarray(wt), returns, risks
     30 
---> 31 weights, returns, risks = optimal_portfolio(return_vector)
     32 
     33 plt.plot(stds, means, 'o')

<ipython-input-37-cfa9cd216bb5> in optimal_portfolio(returns)
     18     # Calculate efficient frontier weights using quadratic programming
     19     portfolios = [solvers.qp(mu*S, -pbar, G, h, A, b)['x'] 
---> 20                   for mu in mus]
     21     ## CALCULATE RISKS AND RETURNS FOR FRONTIER
     22     returns = [blas.dot(pbar, x) for x in portfolios]

/usr/local/lib/python2.7/dist-packages/cvxopt/coneprog.pyc in qp(P, q, G, h, A, b, solver, initvals)
   4483             'residual as dual infeasibility certificate': dinfres} 
   4484 
-> 4485     return coneqp(P, q, G, h, None, A,  b, initvals)

/usr/local/lib/python2.7/dist-packages/cvxopt/coneprog.pyc in coneqp(P, q, G, h, dims, A, b, initvals, kktsolver, xnewcopy, xdot, xaxpy, xscal, ynewcopy, ydot, yaxpy, yscal)
   2069         try: f = kktsolver(W)
   2070         except ArithmeticError:
-> 2071             raise ValueError("Rank(A) < p or Rank([P; A; G]) < n")
   2072 
   2073 

ValueError: Rank(A) < p or Rank([P; A; G]) < n
In [38]:
print(weights)
[[  9.99984457e-01]
 [  2.41631926e-06]
 [  9.73380276e-06]
 [  3.39323845e-06]]