Notebook
In [99]:
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import SimpleMovingAverage
from quantopian.pipeline.classifiers.fundamentals import Sector 
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data import Fundamentals
from quantopian.pipeline.filters import Q1500US, Q500US
import pandas as pd
import numpy as np
import random as random
import itertools
from itertools import combinations
import sklearn
from sklearn.cluster import KMeans
def make_pipeline(): 
  
  sector_filter = Sector()
  financial_sector_filter = sector_filter.eq(103)

  market_cap = Fundamentals.market_cap.latest
  
  enterprise_value = Fundamentals.enterprise_value.latest
  
  dps_growth = Fundamentals.dps_growth.latest 

  sustain_growth = Fundamentals.sustainable_growth_rate.latest
  
  working_capital_per_share = Fundamentals.working_capital_per_share.latest 
    
  ROA = Fundamentals.roa.latest
  
  ROE = Fundamentals.roe.latest

  ROIC = Fundamentals.roic.latest

  EV_EBITDA = Fundamentals.ev_to_ebitda.latest

  return Pipeline(
      columns={
          #'EV/EBITDA': EV_EBITDA,
          'enterprise value': enterprise_value,
          'market_cap': market_cap,
          'sustain growth': sustain_growth,
          'ROA' : ROA,
          'ROE' : ROE,
          'ROIC' : ROIC
    }, screen = financial_sector_filter
      
  )
result = run_pipeline(make_pipeline(), '2015-05-05', '2015-05-05')

result = result.dropna(axis=0)
result.head(5)
Out[99]:
ROA ROE ROIC enterprise value market_cap sustain growth
2015-05-05 00:00:00+00:00 Equity(21 [AAME]) 0.003682 0.011316 0.010876 9.892274e+07 8.150474e+07 0.0346
Equity(66 [AB]) 0.028198 0.028258 0.028258 3.247275e+09 3.247275e+09 0.0038
Equity(157 [AEG]) -0.000153 -0.002354 0.000615 2.605216e+10 2.124709e+10 0.0005
Equity(185 [AFL]) 0.005498 0.035853 0.029553 2.930033e+10 2.756033e+10 0.1282
Equity(192 [ATAX]) 0.001303 0.003155 0.007914 5.693063e+08 3.344038e+08 -0.0613
In [100]:
result_array = result.values #change from DataFrame to array to use k-means library 
In [101]:
result_array
Out[101]:
array([[  3.68200000e-03,   1.13160000e-02,   1.08760000e-02,
          9.89227430e+07,   8.15047430e+07,   3.46000000e-02],
       [  2.81980000e-02,   2.82580000e-02,   2.82580000e-02,
          3.24727510e+09,   3.24727510e+09,   3.80000000e-03],
       [ -1.53000000e-04,  -2.35400000e-03,   6.15000000e-04,
          2.60521645e+10,   2.12470910e+10,   5.00000000e-04],
       ..., 
       [  1.49100000e-03,   1.01940000e-02,   9.11800000e-03,
          2.16061994e+10,   1.43571994e+10,   4.21000000e-02],
       [  3.31100000e-03,   4.13979000e-01,   1.10787000e-01,
          6.58138244e+08,   4.31409244e+08,   1.50300000e-01],
       [  8.12000000e-04,   1.68980000e-02,   1.49610000e-02,
         -2.92384211e+10,   7.21025955e+10,   4.96000000e-02]])
In [102]:
kmeans = KMeans(n_clusters=50).fit(result_array) #fit into 50 clusters
cluster_label = kmeans.labels_
print cluster_label
[43 21 41 41 43 48 36  0 11 43 21 43  5  7 38 12 39  9 36 43 21 43  4  0 21
  0 21 21 43 43  0 41 21  0  7 21 21 43  0 36 36 36 43  0 43  0 43 43 43  0
 21 21 43 43 43 43 43 21 43  0  0  0 43  0 31 43  0 43  0 43  0  0 43 43  6
  0 21 43 43 43  0 43  0 43 43 33 43  6 36  6 31 46 43 44 43 39 11 21 43 21
 43 31  0 43  0 43 43 16 43  0 21  0 21 43 31  0 13 43 43  0 36  0  6 28 36
 21  0  0 43 21 21 21 11 43  0 42 17  0 41 43 21 21  0 43 36  0 31 43 36 21
 43 43  0 36 21  0 43 34 21  3 21  0 43 36  0 43 11 43 43 43 43 43 43 43 36
 43 43 43 21 43 43  0 43 21 21  0 43 43 44 43 43  0 44 43 43  0 43 43 43 43
 43 43 43  9 43 43 25  0 21 43 43 43 43 21  6 21 43 21 36 21 36 14  0 44 43
 31 43 43  0 43 43 43 43  0 43 43  0 43 43 43 14 43 43 43 43  0 43 43 43 31
 31  0  0 20 43 43 10  2 21 21 43 43 43 26 43 43 43 43 43 43 43 43  0 43 43
 43 21  0 43 21 43 21 21 43  0 43 43 43 43 43 43 43 21 43 43 43 43 43  0 21
 43 43 21 21 43 14 43 43 21 21 21 43 43  8 36 43 40 43  0  0 43  0 43 43 43
 43 43 43 43 20 31 40 43  0 43  0 43 21 43 43 40 43  0 43 43 43 43  6 43 43
 43 43 43 43 43 43 29 43 43 43 43 43 43  0 36 43 28 43 43 43 37 43 25 31 47
 21 43 43 43 43 43 42 20 43  6  2  0 43 43  0 43 43 43  0 43 43 43  0 43 43
 26 43 44 21  0 43  0  0  0 11 43 22 30 43 21 43 43 43 21 33 43 43 43  0  0
 32 49 43 43 21  0 43 43 43 13  0 21 21 43  0 21 43 43 43 43 43 21  0 43 36
 43 43 43 43 43 43 21 43 21 43 43 43 43 43 43 36 43 43  0 43 43 43 36 43  0
  0 43 43 44 43 43 43 41 43 43 43 21 41 43 43  0 43 43 43 43 43 43 43 43 43
 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43  0 43
 27 43  0 43 21 43 43  0 43  6 43 43 43  0 43 35  0 21 43 43 43  0 43 21  0
 43 43  0 21 16 43 11 28 43 43 21  0 43 24 43  6 43 43 43  0 43 43 43  1 43
  0 43 43  0 43 45 43 43  0 43 20 43  6 43 21 43  0 43  0  0 43 21 21 11 43
 43 43 43 19  0 21 43  6 43 43 43 43 21 43 16 43  0 43 43 43 43 43 43 43 43
 43 13  0 23 44 16 43 43  6 43 43 43 43 43 43 43 43 18  6 43  0  0  0 43 43
 43 43 43 43 43  0 43 45 43  0  0 21 15 43 43 43 43 43  6 21 43  6 31 43 24]
In [103]:
cluster = np.array(cluster_label)
cluster = cluster.reshape((-1, 1))
result_array = np.append(cluster,result_array,axis=1) #append cluster ID to array 
pd.DataFrame(result_array) #change array back to DataFrame
Out[103]:
0 1 2 3 4 5 6
0 43.0 0.003682 0.011316 0.010876 9.892274e+07 8.150474e+07 0.0346
1 21.0 0.028198 0.028258 0.028258 3.247275e+09 3.247275e+09 0.0038
2 41.0 -0.000153 -0.002354 0.000615 2.605216e+10 2.124709e+10 0.0005
3 41.0 0.005498 0.035853 0.029553 2.930033e+10 2.756033e+10 0.1282
4 43.0 0.001303 0.003155 0.007914 5.693063e+08 3.344038e+08 -0.0613
5 48.0 0.001256 0.006079 0.006394 9.809177e+10 7.987577e+10 0.0657
6 36.0 0.002219 0.006871 0.007905 1.001021e+10 7.951205e+09 0.0155
7 0.0 0.002943 0.015669 0.015292 2.160424e+09 2.692702e+09 0.0381
8 11.0 0.011279 0.050711 0.031012 3.296227e+10 2.783527e+10 0.1545
9 43.0 0.002878 0.031764 0.027686 3.718901e+08 3.277641e+08 0.0555
10 21.0 0.001672 0.016197 0.008543 5.591413e+09 2.870463e+09 0.0452
11 43.0 0.001125 0.013168 0.011884 6.035022e+07 5.054822e+07 0.0540
12 5.0 0.009720 0.071760 0.022289 1.130175e+11 7.951649e+10 0.2342
13 7.0 0.001401 0.013161 0.009438 2.960945e+11 1.726545e+11 0.0249
14 38.0 0.002320 0.030784 0.012423 1.141985e+11 6.226774e+10 0.0281
15 12.0 0.000335 0.007733 0.005713 -1.413665e+11 6.577016e+10 -0.0080
16 39.0 0.036751 0.051546 0.043192 2.314759e+10 3.241869e+10 0.1801
17 9.0 0.000542 0.005744 0.004748 -5.232574e+10 4.820926e+10 0.0485
18 36.0 0.005054 0.023812 0.018896 8.078365e+09 6.191140e+09 0.1325
19 43.0 0.003203 0.032778 0.028207 7.119604e+07 7.459441e+07 0.0565
20 21.0 0.003610 0.031137 0.009223 4.362786e+09 1.251738e+09 0.0618
21 43.0 0.003224 0.028571 0.015785 5.890345e+08 5.243335e+08 0.0740
22 4.0 0.001990 0.035903 0.061448 -6.013790e+10 6.717920e+10 0.0847
23 0.0 0.002836 0.039845 0.039392 2.652283e+09 2.633561e+09 0.0835
24 21.0 0.002099 0.018664 0.011310 4.493447e+09 4.487532e+09 0.0564
25 0.0 0.002171 0.017820 0.019889 2.083382e+09 2.335786e+09 0.0594
26 21.0 0.001426 0.011319 0.016769 2.982209e+09 3.402502e+09 -0.0722
27 21.0 0.005857 0.013747 0.010614 5.298427e+09 4.581161e+09 0.0720
28 43.0 0.007268 0.020680 0.019692 3.028307e+08 3.504287e+08 0.0377
29 43.0 0.007268 0.020680 0.019692 2.954691e+08 3.430671e+08 0.0377
... ... ... ... ... ... ... ...
645 0.0 0.080295 0.256127 0.253757 1.007112e+09 1.462687e+09 0.3543
646 0.0 -0.037035 -0.220978 -0.065104 1.488312e+09 6.253480e+08 -0.2692
647 0.0 0.002527 0.011162 0.014759 2.097480e+09 1.834826e+09 0.1157
648 43.0 0.011437 0.026461 0.022585 1.645783e+08 1.925573e+08 0.0269
649 43.0 0.010447 0.014761 0.011446 4.890821e+08 3.144104e+08 -0.0128
650 43.0 0.001535 0.018522 0.011944 2.129486e+08 1.145556e+08 0.0850
651 43.0 -0.000535 -0.013678 -0.013569 5.850115e+08 2.747995e+08 -0.0070
652 43.0 -0.020049 -0.044814 -0.010910 4.800485e+08 2.295175e+08 -0.0450
653 43.0 -0.008102 -0.073270 0.019364 3.350251e+08 7.115513e+07 0.2228
654 43.0 0.022880 0.025814 0.025516 1.519000e+08 1.554000e+08 0.0400
655 0.0 0.000577 0.008482 0.007185 1.049446e+09 1.304946e+09 0.0794
656 43.0 -0.004846 -0.009741 -0.002760 2.622443e+08 1.340013e+08 -0.0451
657 45.0 0.008633 0.078083 0.017286 3.811092e+10 8.424202e+09 0.2725
658 43.0 0.023368 0.040033 0.026852 2.982051e+08 1.853200e+08 0.0405
659 0.0 0.002153 0.016578 0.012956 1.423575e+09 1.116596e+09 0.1296
660 0.0 0.010686 0.016491 0.014463 1.377679e+09 9.842276e+08 0.0110
661 21.0 -0.002283 -0.004234 0.002644 3.341315e+09 2.524332e+09 0.0114
662 15.0 0.002012 0.070489 0.004315 1.420094e+11 7.640381e+09 0.2120
663 43.0 0.000145 0.010984 0.007785 -3.214447e+09 3.911122e+09 0.0057
664 43.0 0.033795 0.046667 0.046667 2.904320e+07 3.436020e+07 0.0772
665 43.0 0.003705 0.041440 0.046163 6.710013e+08 9.076433e+08 0.1545
666 43.0 0.000783 0.006919 0.007528 2.241898e+08 1.327688e+08 0.0226
667 43.0 0.002124 0.019533 0.020166 1.111283e+08 2.930953e+08 0.0462
668 6.0 0.006983 0.016697 0.012237 1.503307e+10 1.310507e+10 -0.0001
669 21.0 0.131775 0.167159 0.167159 4.043032e+09 4.152231e+09 -0.0030
670 43.0 0.002415 0.019548 0.016112 2.236228e+08 1.132408e+08 0.0669
671 6.0 0.002389 0.028056 0.025297 1.647010e+10 1.114051e+10 0.1015
672 31.0 0.001491 0.010194 0.009118 2.160620e+10 1.435720e+10 0.0421
673 43.0 0.003311 0.413979 0.110787 6.581382e+08 4.314092e+08 0.1503
674 24.0 0.000812 0.016898 0.014961 -2.923842e+10 7.210260e+10 0.0496

675 rows × 7 columns