In [1]:
import numpy as np
import pandas as pd
import pandas_datareader as pdr
import matplotlib.pyplot as plt
%matplotlib inline
import sys
sys.path.insert(0,'./financial_calcs/')
import trading_calcs.standard as std_calcs
In [2]:
from sklearn import tree
X = [[0,0],[1,1]]
Y = [0,1]
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X, Y)
clf.predict([[2.,2.]])
Out[2]:
array([1])

An example of creating in increasing price, and creating a VWAP to go with it.

In [22]:
dti = pd.date_range('2020-01-01',periods=200,freq='D')
inc_price = np.linspace(20,25,100)
decr_price = np.linspace(25,20,100)
price = np.hstack((inc_price, decr_price))
s_price = pd.Series(price, index=dti, name='Price')
volume_arry = np.ones((200,))*50000.0
volume = pd.Series(volume_arry, index=dti, name='Volume')
vwap = std_calcs.vwap_calc(s_price, volume, 14)
totvwap = vwap.sum()
In [23]:
series_pct_chng = s_price.pct_change(1)
In [24]:
plt.plot(series_pct_chng)
Out[24]:
[<matplotlib.lines.Line2D at 0x7f61b6bb0a58>]

Experimental fake values

In [96]:
price = np.array([25, 24, 23, 24, 25, 24, 23, 24, 24.5, 25.5, 26.0, 26.5, 27.0, 27.5])
dti = pd.date_range('2020-01-01',periods=len(price),freq='D')
vol = np.array([30, 50, 20, 80, 30, 90, 10, 80, 90, 80, 85, 90, 110, 95])
s_price = pd.Series(price, index=dti, name='Price')
s_vol = pd.Series(vol, index=dti, name='Volume')
s_vwap = std_calcs.vwap_calc(s_price,s_vol,2)
s_vwap
Out[96]:
2020-01-01          NaN
2020-01-02    24.375000
2020-01-03    23.714286
2020-01-04    23.800000
2020-01-05    24.272727
2020-01-06    24.250000
2020-01-07    23.900000
2020-01-08    23.888889
2020-01-09    24.264706
2020-01-10    24.970588
2020-01-11    25.757576
2020-01-12    26.257143
2020-01-13    26.775000
2020-01-14    27.231707
Freq: D, Name: MVWAP, dtype: float64
In [71]:
plt.plot(s_price.pct_change(1))
Out[71]:
[<matplotlib.lines.Line2D at 0x7f61b49f9438>]
In [72]:
s_price.plot()
s_vwap.plot()
Out[72]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f61b49c90f0>

Get the percent change from price over 3 days, then shift back by three days. Each index now indicates what the percent return will be 3 days into the future.

In [86]:
future_price = s_price.pct_change(3).shift(-3)
future_price=future_price.fillna(0)
future_price
Out[86]:
2020-01-01   -0.040000
2020-01-02    0.041667
2020-01-03    0.043478
2020-01-04   -0.041667
2020-01-05   -0.040000
2020-01-06    0.020833
2020-01-07    0.108696
2020-01-08    0.083333
2020-01-09    0.081633
2020-01-10    0.058824
2020-01-11    0.057692
2020-01-12    0.000000
2020-01-13    0.000000
2020-01-14    0.000000
Freq: D, Name: Price, dtype: float64

This is where positions are calculated. If the future price will be positive then buy (1).

In [84]:
pos = np.where(future_price > 0, 1,0)
pos
Out[84]:
array([0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0])

This is where the sell position is decided, when future price will be negative, sell (-1).

In [106]:
pos1 = np.where(future_price <= 0, -1, pos)
pos1
Out[106]:
array([-1,  1,  1, -1, -1,  1,  1,  1,  1,  1,  1, -1, -1, -1])
In [107]:
s_vwap = s_vwap.fillna(method='backfill')
In [117]:
X = np.column_stack((s_price.values,s_vwap.values, s_vol))
X
Out[117]:
array([[ 25.        ,  24.375     ,  30.        ],
       [ 24.        ,  24.375     ,  50.        ],
       [ 23.        ,  23.71428571,  20.        ],
       [ 24.        ,  23.8       ,  80.        ],
       [ 25.        ,  24.27272727,  30.        ],
       [ 24.        ,  24.25      ,  90.        ],
       [ 23.        ,  23.9       ,  10.        ],
       [ 24.        ,  23.88888889,  80.        ],
       [ 24.5       ,  24.26470588,  90.        ],
       [ 25.5       ,  24.97058824,  80.        ],
       [ 26.        ,  25.75757576,  85.        ],
       [ 26.5       ,  26.25714286,  90.        ],
       [ 27.        ,  26.775     , 110.        ],
       [ 27.5       ,  27.23170732,  95.        ]])
In [116]:
from sklearn import tree
Y = pos1
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X, Y)
clf.predict([[26,24.3]])
Out[116]:
array([-1])
In [110]:
tree.plot_tree(clf)
Out[110]:
[Text(223.20000000000002, 195.696, 'X[1] <= 26.007\ngini = 0.49\nsamples = 14\nvalue = [6, 8]'),
 Text(186.0, 152.208, 'X[0] <= 24.75\ngini = 0.397\nsamples = 11\nvalue = [3, 8]'),
 Text(111.60000000000001, 108.72, 'X[1] <= 23.844\ngini = 0.245\nsamples = 7\nvalue = [1, 6]'),
 Text(74.4, 65.232, 'X[1] <= 23.757\ngini = 0.5\nsamples = 2\nvalue = [1, 1]'),
 Text(37.2, 21.744, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'),
 Text(111.60000000000001, 21.744, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'),
 Text(148.8, 65.232, 'gini = 0.0\nsamples = 5\nvalue = [0, 5]'),
 Text(260.40000000000003, 108.72, 'X[1] <= 24.673\ngini = 0.5\nsamples = 4\nvalue = [2, 2]'),
 Text(223.20000000000002, 65.232, 'gini = 0.0\nsamples = 2\nvalue = [2, 0]'),
 Text(297.6, 65.232, 'gini = 0.0\nsamples = 2\nvalue = [0, 2]'),
 Text(260.40000000000003, 152.208, 'gini = 0.0\nsamples = 3\nvalue = [3, 0]')]

Extra VWAP Stuff that might be useful later

The following represents the position, that is where to buy.

In [73]:
pos = np.where((s_price < (1-0.01)*s_vwap),1,0)
pos
Out[73]:
array([0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0])

The following makes the position last for 3 days.

In [74]:
def position_expander(pos,duration):
    i = 0
    while(i<len(pos)):
        if pos[i]:
            #print(pos[i+1:i+6])
            pos[i+1:i+duration] = 1
            i += duration
        else:
            i += 1
    return pos

pos = position_expander(pos,3)
pos
Out[74]:
array([0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0])
In [75]:
pct_chng = s_price.pct_change(1)
pct_chng
Out[75]:
2020-01-01         NaN
2020-01-02   -0.040000
2020-01-03   -0.041667
2020-01-04    0.043478
2020-01-05    0.041667
2020-01-06   -0.040000
2020-01-07   -0.041667
2020-01-08    0.043478
2020-01-09    0.020833
2020-01-10    0.040816
2020-01-11    0.019608
2020-01-12    0.019231
2020-01-13    0.018868
2020-01-14    0.018519
Freq: D, Name: Price, dtype: float64
In [76]:
system = pos*pct_chng
system
Out[76]:
2020-01-01         NaN
2020-01-02   -0.040000
2020-01-03   -0.041667
2020-01-04    0.043478
2020-01-05    0.000000
2020-01-06   -0.040000
2020-01-07   -0.041667
2020-01-08    0.043478
2020-01-09    0.000000
2020-01-10    0.000000
2020-01-11    0.000000
2020-01-12    0.000000
2020-01-13    0.000000
2020-01-14    0.000000
Freq: D, Name: Price, dtype: float64
In [77]:
plt.plot(100.0*system.cumsum())
Out[77]:
[<matplotlib.lines.Line2D at 0x7f61b49eab38>]