import numpy as np
import pandas as pd




#Sci-Kit Learn Classification Trees
import sklearn as skl
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import PassiveAggressiveRegressor
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.linear_model import LogisticRegression


from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import accuracy_score

from sklearn.model_selection import train_test_split

from sklearn import tree




import matplotlib
from matplotlib import pyplot as plt

from google.colab import drive
drive.mount('/content/gdrive');

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive

I was able to pull all the data off of the Bank of Canada website found here.

raw_dataset = pd.read_csv('/content/gdrive/My Drive/Canadian Bond Yields/April2010-April2018.csv')

print('Successfully Imported :')
raw_dataset.head(5)

Successfully Imported :

#Reserved.

Let's try simply predicting the direction of the next day's price actioin on a single 2yr Bond.

This shouldn't return predictions that are accurate more than 65% of the time. Otherwise the Canadian Bond Market really is a joke.

Below are the visualizations of the yield curve as well as the individual return of the 2yr(?) bond. Not to scale.

Things look miserable here in Canada. Bonds are selling like mad. 2yr down to +0.25%. Yikes.

input_data = pd.DataFrame(raw_dataset[['Date','V39051','V39052','V39053','V39054','V39055']])


input_data = input_data.sort_index(ascending=False, axis=0).reset_index()
input_data.head()

input_data.V39051 = pd.to_numeric(input_data.V39051, errors='coerce')
input_data.V39052 = pd.to_numeric(input_data.V39052, errors='coerce')
input_data.V39053 = pd.to_numeric(input_data.V39053, errors='coerce')
input_data.V39054 = pd.to_numeric(input_data.V39054, errors='coerce')
input_data.V39055 = pd.to_numeric(input_data.V39055, errors='coerce')

input_data = input_data.dropna()


input_data['Date'] = 1
input_data['Date'].loc[input_data.V39051.shift(1) > input_data.V39051] = -1
input_data['Date'].loc[input_data.V39051.shift(1) < input_data.V39051] = 1


validation = input_data['Date']
input_data = input_data.drop(['Date'], axis=1)
input_data = input_data.drop(['index'], axis=1)



split = int(0.85 * (len(input_data)))

x_train, x_test, y_train, y_test = input_data[:split], input_data[split:], validation[:split], validation[split:]


print(x_train[30:45])


print(y_train[30:45])


plt.figure(0)
plt.plot(x_train)
plt.plot(x_test)


r_class = (1 - (input_data.V39051 / input_data.V39051.shift(1))) * 100
plt.figure(1)
plt.plot(r_class.cumsum())

    V39051  V39052  V39053  V39054  V39055
31    1.59    1.97    2.52    2.72    3.26
32    1.62    2.00    2.55    2.75    3.25
33    1.78    2.17    2.71    2.90    3.37
34    1.72    2.10    2.64    2.84    3.30
35    1.84    2.22    2.75    2.93    3.36
36    1.73    2.11    2.61    2.81    3.29
37    1.76    2.15    2.68    2.89    3.38
38    1.76    2.14    2.69    2.90    3.39
39    1.62    1.99    2.55    2.77    3.28
40    1.63    2.01    2.58    2.80    3.30
41    1.67    2.05    2.61    2.83    3.32
42    1.72    2.10    2.65    2.87    3.35
43    1.82    2.20    2.76    2.97    3.43
44    1.78    2.17    2.73    2.94    3.41
45    1.81    2.20    2.76    2.97    3.44
31   -1
32    1
33    1
34   -1
35    1
36   -1
37    1
38    1
39   -1
40    1
41    1
42    1
43    1
44   -1
45    1
Name: Date, dtype: int64

/usr/local/lib/python3.6/dist-packages/pandas/core/indexing.py:671: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)

[<matplotlib.lines.Line2D at 0x7fa27761b7f0>]

dtc = DecisionTreeClassifier(criterion='gini', splitter='best', max_depth=10, min_samples_split=110, max_features='log2')
dtc.fit(x_train, y_train)

rfc = RandomForestClassifier(n_estimators = 10000, criterion='gini', max_depth=10, min_samples_split=90, max_features='log2')
rfc.fit(x_train, y_train)

pac = PassiveAggressiveClassifier()
pac.fit(x_train, y_train)



print("PassiveAggressiveClassifier Accuracy:", accuracy_score(y_test, pac.predict(x_test)))
print("RandomForestClassifier Accuracy:", accuracy_score(y_test, rfc.predict(x_test)))
print("DecisionTreeClassifier Accuracy:", accuracy_score(y_test, dtc.predict(x_test)))



vc = VotingClassifier(estimators=[('rfc', rfc), ('pac', pac), ('dtc', dtc)])
vc.fit(x_train, y_train)

print("VotingClassifier Accuracy:", accuracy_score(y_test, vc.predict(x_test)))

print(x_test.dtypes)


predict_class = vc.predict(x_test)
predict_regress = dtc.predict(x_test)

p_class = pd.DataFrame(predict_class)
p_regress = pd.DataFrame(predict_regress)

print('Here we start')
print(p_class.head(5))

p_class['true'] = p_class.astype('float64')
p_regress['true'] = p_regress.astype('float64')

return_class = (1 - (input_data.V39051 / input_data.V39051.shift(-1))) * 100
return_regress = (1 - (input_data.V39051 / input_data.V39051.shift(-1))) * 100

return_class = return_class.reset_index()
return_regress = return_regress.reset_index()

return_class = return_class.V39051 * p_class['true']
return_regress = return_regress.V39051 * p_regress['true']

print(return_regress.head(25))


plt.figure(0)
plt.plot(return_class.cumsum(), color='blue')
plt.plot(return_regress.cumsum(), color='cyan')

bond_root_return = (1 - (x_test.V39051 / x_test.V39051.shift(1))) * 100
plt.figure(1)
plt.plot(bond_root_return.cumsum())

plt.figure(2)
plt.plot(p_class['true'])

plt.figure(3)
plt.plot(p_regress['true'])


#tree.plot_tree(cls)

PassiveAggressiveClassifier Accuracy: 0.6187290969899666
RandomForestClassifier Accuracy: 0.5719063545150501
DecisionTreeClassifier Accuracy: 0.5652173913043478
VotingClassifier Accuracy: 0.5785953177257525
V39051    float64
V39052    float64
V39053    float64
V39054    float64
V39055    float64
dtype: object
Here we start
   0
0  1
1  1
2  1
3 -1
4  1
0     2.105263
1     1.554404
2     0.515464
3    -4.301075
4    -1.639344
5     8.955224
6     1.951220
7    -1.485149
8    -2.020202
9     0.000000
10   -3.664921
11    3.045685
12   -0.510204
13   -3.157895
14   -1.063830
15   -7.428571
16   -1.744186
17    2.272727
18    2.762431
19    6.217617
20    1.530612
21    2.487562
22   -3.608247
23   -6.593407
24    2.673797
dtype: float64

[<matplotlib.lines.Line2D at 0x7fa276f3d860>]

Seems like classifiers won't produce reliable prediciton models for the movement of the 2yr bond when given the previous curves.

It is interesting that it can consistently outperform a buy and hold strategy when Bonds are tanking. I wonder if it'll underperform as the Bonds get bid up.

I will elave this notebook here. I intend to use the Canadian Bond Yield Data in conjunction with some other economic data in a future endeavour. For now this was a fun way to brush up on SKLearn and make sure that the data I had procured was workable.

	Date	V39051	V39052	V39053	V39054	V39055
0	2018-03-30	Bank holiday	Bank holiday	Bank holiday	Bank holiday	Bank holiday
1	2018-03-29	1.77	1.88	1.96	2.02	2.09
2	2018-03-28	1.80	1.92	2.00	2.05	2.11
3	2018-03-27	1.83	1.94	2.02	2.08	2.14
4	2018-03-26	1.88	1.99	2.09	2.15	2.23

Good Coder, Bad Trader 2020