from pykalman import KalmanFilter
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import poly1d
# Load pricing data
start = '2003-01-01'
end = '2018-01-01'
x = get_pricing('SPY', fields='price', start_date=start, end_date=end)
x = x.pct_change()
x_lag = np.empty(len(x), dtype = float)
x_curr = np.empty(len(x), dtype = float)
for i in range(1, len(x)):
x_lag[i-1] = x[i]
x_curr[i] = x[i]
plt.scatter(x_lag[1:], x_curr[1:])
from sklearn import linear_model
import statsmodels.api as sm
X_ = x_lag
y_ = x_curr
model = sm.OLS(y_, X_).fit()
predictions = model.predict(X_)
model.summary()
plt.plot(np.unique(x_lag[1:]), np.poly1d(np.polyfit(x_lag[1:], x_curr[1:], 1))(np.unique(x_curr)))
plt.scatter(x_lag[1:], x_curr[1:])