Notebook
In [845]:
import numpy as np
import scipy as sp

In [846]:
days_in_quarter = 3
num_of_quarters=4

In [847]:
x = np.array([[1, np.nan, 3],  # 0
[1, np.nan, 3],  # 1
[1, np.nan, 3],  # 2
[1, 1, 4],  # 3
[1, 1, 4],  # 4
[1, 1, 4],  # 5
[1, 7, 5],  # 6
[2, 7, 5],  # 7
[2, 7, 5],  # 8
[2, 7, 6],  # 9
[2, 7, 6],  #10
[4, 7, 6]]) #11

In [848]:
q1 = np.empty(68)
q1.fill(np.nan)
q2 = np.empty(59)
q2.fill(0.40)
q3 = np.empty(67)
q3.fill(0.40)
q4 = np.empty(63)
q4.fill(0.30)

#x=np.concatenate([q1, q2, q3, q4])
#days_in_quarter = 70
#x

In [849]:
window_length = x.shape[0]
num_of_securities = x.shape[1]

# duplicate the first row and add 1. So there is always a diff at position 0
y = np.nan_to_num(x)
y = np.insert(y, 0, y[0] + 1, axis=0)

In [850]:
y = np.transpose(y)
d = np.diff(y, axis=1)
d

Out[850]:
array([[-1.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  2.],
[-1.,  0.,  0.,  1.,  0.,  0.,  6.,  0.,  0.,  0.,  0.,  0.],
[-1.,  0.,  0.,  1.,  0.,  0.,  1.,  0.,  0.,  1.,  0.,  0.]])
In [851]:
i = np.where(d != 0)
i

Out[851]:
(array([0, 0, 0, 1, 1, 1, 2, 2, 2, 2]),
array([ 0,  7, 11,  0,  3,  6,  0,  3,  6,  9]))
In [852]:
f = np.bincount(i[0])
f

Out[852]:
array([3, 3, 4])
In [853]:
sidxs = np.cumsum(f)[:-1]
sidxs

Out[853]:
array([3, 6])
In [854]:
sa = np.split(i[1], sidxs)
sa

Out[854]:
[array([ 0,  7, 11]), array([0, 3, 6]), array([0, 3, 6, 9])]
In [855]:
sa2 = np.empty([0, num_of_quarters], dtype=int)
for a in sa:
if len(a) >= num_of_quarters:
sa2 = np.vstack((sa2,a[-num_of_quarters:]))
else:
n = len(a)
a = np.pad(a, num_of_quarters-n, 'maximum')[-num_of_quarters:]
d_sa2 = np.diff(a)
i_sa2 = np.where(d_sa2 > days_in_quarter)[0]
i_sa2 = i_sa2[0:num_of_quarters-n]
a = np.insert(a, i_sa2 + 1, a[i_sa2] + days_in_quarter)[:num_of_quarters]
sa2 = np.vstack((sa2,a))

sa2

Out[855]:
array([[ 0,  3,  7, 11],
[ 0,  3,  6,  6],
[ 0,  3,  6,  9]])
In [856]:
a=sa[1]
a[-4:]

Out[856]:
array([0, 3, 6])
In [857]:
sa2r = np.ravel(sa2)

i0 = np.transpose(range(0, len(sa2)) *np.ones((num_of_quarters,len(sa2)), dtype=int))
i0r = np.ravel(i0)

print i0r
print sa2r

[0 0 0 0 1 1 1 1 2 2 2 2]
[ 0  3  7 11  0  3  6  6  0  3  6  9]

In [858]:
xa=np.transpose(x)[i0r, sa2r]
xa

Out[858]:
array([  1.,   1.,   2.,   4.,  nan,   1.,   7.,   7.,   3.,   4.,   5.,
6.])
In [859]:
quartely_data = np.transpose(np.reshape(xa, (num_of_securities, 4)))
quartely_data

Out[859]:
array([[  1.,  nan,   3.],
[  1.,   1.,   4.],
[  2.,   7.,   5.],
[  4.,   7.,   6.]])
In [860]:
np.sum(quartely_data, axis=0)

Out[860]:
array([  8.,  nan,  18.])
In [861]:
print np.diff((0,  29,  92, 155, 220))
print np.diff((0,  42, 112, 169, 232))

[29 63 63 65]
[42 70 57 63]