Notebook
In [111]:
from numpy import isnan, matrix, array, zeros, empty, sqrt, round, ones, dot, append, mean, cov, transpose, linspace
import numpy as np
import talib
import pandas as pd
import scipy.optimize
import operator
from pytz import timezone
from zipline.utils.tradingcalendar import get_early_closes
import statsmodels.api as sm
from statsmodels import regression
import statsmodels.tsa.stattools as ts
import matplotlib.pyplot as plt
In [112]:
start_date = '2005-1-1'
end_date = '2012-1-1'
def ret_index(prices):
    '''
    return index of 1 dollar invested in that instrument
    '''
    rets = prices.pct_change()
    index = (rets+1).cumprod()
    return (index-1)
In [113]:
def load_symbols() :
    equities = (
        # Equity
        'DIA',    # Dow
        'SPY',    # SP500
    )
    fixedincome = (
        # Fixed income
        'LQD',    # Corporate bond
        'HYG',    # High yield
    )
    alternative = (
        'USO',    # Oil
        'GLD',    # Gold
        'VNQ',    # US Real Estate
        'RWX',    # Dow JonesĀ® Global ex-U.S. Select Real Estate Securities Index
        'UNG',    # Natual gas
        'DBA',    # Agriculture
    )
    return  equities + fixedincome + alternative
In [114]:
secs = load_symbols()
secs
Out[114]:
('DIA', 'SPY', 'LQD', 'HYG', 'USO', 'GLD', 'VNQ', 'RWX', 'UNG', 'DBA')
In [115]:
# calculate the slope for these assets
prices = get_pricing(secs, start_date, end_date,fields='price') # get price data
X = range(len(prices)) # variabile esplicativa
Y = prices.values # variabile dipendente
A=sm.add_constant(X)
sd = prices.std()
Linear regression model with complete summary, w ewon't need all the data but just a and b to calculate intercept and slope.
In [116]:
def linreg(X,Y):
    # Running the linear regression
    X = sm.add_constant(X)
    model = regression.linear_model.OLS(Y, X).fit()
    a = model.params[0]
    b = model.params[1]
    print 'intercept:', a
    print 'slope', b
    X = X[:, 1]

    # Return summary of the regression and plot results
    X2 = np.linspace(X.min(), X.max(), 100)
    Y_hat = X2 * b + a
    plt.scatter(X, Y, alpha=0.3) # Plot the raw data
    plt.plot(X2, Y_hat, 'r', alpha=0.9);  # Add the regression line, colored in red
    plt.xlabel('X Value')
    plt.ylabel('Y Value')
    return model.summary()
In [117]:
Y = prices[symbols('SPY')].values

results = linreg(A,Y)
results
intercept: 130.904374088
slope -0.00947200289767
Out[117]:
<caption>OLS Regression Results</caption>
Dep. Variable: y R-squared: 0.075
Model: OLS Adj. R-squared: 0.074
Method: Least Squares F-statistic: 141.8
Date: Thu, 14 Jan 2016 Prob (F-statistic): 1.64e-31
Time: 17:10:55 Log-Likelihood: -7495.0
No. Observations: 1763 AIC: 1.499e+04
Df Residuals: 1761 BIC: 1.501e+04
Df Model: 1
Covariance Type: nonrobust
coef std err t P>|t| [95.0% Conf. Int.]
const 130.9044 0.809 161.776 0.000 129.317 132.491
x1 -0.0095 0.001 -11.910 0.000 -0.011 -0.008
Omnibus: 79.412 Durbin-Watson: 0.008
Prob(Omnibus): 0.000 Jarque-Bera (JB): 89.747
Skew: -0.553 Prob(JB): 3.25e-20
Kurtosis: 2.985 Cond. No. 2.03e+03
In [ ]:
 
In [118]:
#
# every day get 252 length portion of data and apply the linear regression model, returning a/b to construct a momentum indicator
#
def linear_regression(df):
    Y = df
    X = range(len(Y))
    A= sm.add_constant(X)
    model = regression.linear_model.OLS(Y, A).fit()
    (b, a) =model.params
    return a/b
In [ ]:
 
In [119]:
spy = prices[symbols('SPY')]
In [120]:
lin_reg_momentm_indicator = pd.rolling_apply(spy,252,f)
In [121]:
ret_index(spy).plot()
r.plot()
Out[121]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f9b282d0a90>
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: