Notebook
In [2]:
from pykalman import KalmanFilter
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import poly1d
In [3]:
# Load pricing data
start = '2003-01-01'
end = '2018-01-01'
x = get_pricing('SPY', fields='price', start_date=start, end_date=end)
In [6]:
x = x.pct_change()
In [9]:
x_lag = np.empty(len(x), dtype = float)
x_curr = np.empty(len(x), dtype = float)

for i in range(1, len(x)):

    x_lag[i-1] = x[i]
    x_curr[i] = x[i] 
In [10]:
plt.scatter(x_lag[1:], x_curr[1:])
Out[10]:
<matplotlib.collections.PathCollection at 0x7fac1ec86c50>
In [11]:
from sklearn import linear_model
In [12]:
import statsmodels.api as sm

X_ = x_lag
y_ = x_curr

model = sm.OLS(y_, X_).fit()
predictions = model.predict(X_)

model.summary()
Out[12]:
<caption>OLS Regression Results</caption>
Dep. Variable: y R-squared: 0.008
Model: OLS Adj. R-squared: 0.008
Method: Least Squares F-statistic: 30.81
Date: Thu, 30 Aug 2018 Prob (F-statistic): 3.04e-08
Time: 09:24:29 Log-Likelihood: 11567.
No. Observations: 3776 AIC: -2.313e+04
Df Residuals: 3775 BIC: -2.313e+04
Df Model: 1
Covariance Type: nonrobust
coef std err t P>|t| [95.0% Conf. Int.]
x1 -0.0900 0.016 -5.551 0.000 -0.122 -0.058
Omnibus: 710.927 Durbin-Watson: 2.011
Prob(Omnibus): 0.000 Jarque-Bera (JB): 19691.710
Skew: -0.032 Prob(JB): 0.00
Kurtosis: 14.187 Cond. No. 1.00
In [13]:
plt.plot(np.unique(x_lag[1:]), np.poly1d(np.polyfit(x_lag[1:], x_curr[1:], 1))(np.unique(x_curr)))
plt.scatter(x_lag[1:], x_curr[1:])
Out[13]:
<matplotlib.collections.PathCollection at 0x7fac06d22550>