This was the original notebook I used to analyze SqueezeMetrics and Spot Gamma data. It proved to be very porofitable and I've since written a nice big post about how to use these signals to trade equity derivatives. Please read this report for some context and insight on this notebook.
import pandas as pd
import numpy as np
import re
import networkx as nx
import matplotlib.pyplot as plt
#Sci-Kit Learn Classification Trees
import sklearn as skl
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import PassiveAggressiveRegressor
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import Perceptron
from sklearn.linear_model import RANSACRegressor
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn import tree
# Gensim
import gensim
import gensim.corpora as corpora
from gensim.utils import simple_preprocess
from gensim.models import CoherenceModel
from gensim.test.utils import datapath
import spacy
import matplotlib as mpl
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from google.colab import drive
drive.mount('/content/drive')
Just our usual suspects here.
Pandas, NumPy, MatPlotLib, you know the drill.
SKLearn so you can mess around with some basic machine learnings.
There's some cleaning to do with the data, but also some information we'd like to extract.
We also want comparable data between the squeeze and spot models.
raw_squeeze_data = pd.read_csv('/content/drive/My Drive/Gamma Hunting/Spot Gamma/SqueezeMetrics.csv')
raw_spot_data = pd.read_csv('/content/drive/My Drive/Gamma Hunting/Spot Gamma/SpotGammaBacktestResults.csv')
raw_vix_data = pd.read_csv('/content/drive/My Drive/Gamma Hunting/Spot Gamma/VIXHistorical.csv')
print(raw_squeeze_data)
DayMove = (raw_vix_data['VIX Close'] - raw_vix_data['VIX Open'])
DayPL = (raw_vix_data['VIX Close'] - raw_vix_data['VIX Open']) / raw_vix_data['VIX Open'] * 100
WeekAhead = (raw_vix_data['VIX Close'] - raw_vix_data['VIX Close'].shift(4)) / raw_vix_data['VIX Close'].shift(4) * 100
MonthAhead = (raw_vix_data['VIX Close'] - raw_vix_data['VIX Close'].shift(20)) / raw_vix_data['VIX Close'].shift(20) * 100
final_vix_data = raw_vix_data
final_vix_data['Week Ahead'] = WeekAhead
final_vix_data['Month Ahead'] = MonthAhead
final_vix_data['Day Move'] = DayMove
final_vix_data['Day PL'] = DayPL
print(raw_spot_data.columns)
DayMove = (raw_squeeze_data['price'] - raw_squeeze_data['price'].shift(-1))
DayPL = (raw_squeeze_data['price'] - raw_squeeze_data['price'].shift(-1)) / raw_squeeze_data['price'].shift(-1) * 100
WeekAhead = (raw_squeeze_data['price'] - raw_squeeze_data['price'].shift(4)) / raw_squeeze_data['price'].shift(4) * 100
MonthAhead = (raw_squeeze_data['price'] - raw_squeeze_data['price'].shift(20)) / raw_squeeze_data['price'].shift(20) * 100
WeekAheadFlat = raw_squeeze_data['price'] - raw_squeeze_data['price'].shift(2)
#print(DayPL)
final_data = raw_squeeze_data
final_data['Week Ahead'] = WeekAhead
final_data['Week Ahead Flat'] = WeekAheadFlat
final_data['Month Ahead'] = MonthAhead
final_data['Day Move'] = DayMove
final_data['Day PL'] = DayPL
final_squeeze_data = final_data
DayMove = (raw_spot_data['Underlying'] - raw_spot_data['Underlying'].shift(-1))
DayPL = (raw_spot_data['Underlying'] - raw_spot_data['Underlying'].shift(-1)) / raw_spot_data['Underlying'].shift(-1) * 100
WeekAhead = (raw_spot_data['Underlying'] - raw_spot_data['Underlying'].shift(4)) / raw_spot_data['Underlying'].shift(4) * 100
MonthAhead = (raw_spot_data['Underlying'] - raw_spot_data['Underlying'].shift(20)) / raw_spot_data['Underlying'].shift(20) * 100
final_spot_data = raw_spot_data
final_spot_data['Week Ahead'] = WeekAhead
final_spot_data['Month Ahead'] = MonthAhead
final_spot_data['Day Move'] = DayMove
final_spot_data['Day PL'] = DayPL
pd.to_datetime(final_data['date'])
pd.to_datetime(final_vix_data['Date'])
pd.to_datetime(final_spot_data['Date'])
Things look good so far. Data looks clean abnd appropriate.
Let's compare these indicators with each other. Matplotlib for Plotting.
vix_sorted = final_vix_data.sort_values('VIX Close')
final_data = final_squeeze_data
#final_data['gex'] = final_data['Total Gamma']
print(final_data.columns)
x = final_data['gex']
y = final_data['Month Ahead']
plt.figure(6)
plt.scatter(x, y)
plt.xlabel('Market Gamma (Billions)')
plt.ylabel('Market 20-Day P/L (%)')
plt.grid(color='g', linestyle='-', linewidth=0.3)
plt.savefig('/content/drive/My Drive/Gamma Hunting/Spot Gamma/SPOTGEXvs220DayReturns.png')
plt.show()
big_gamma_limit = 4000000000 #4bn 4 000 000 000
big_gamma = final_data.loc[final_data['gex'] > big_gamma_limit]
action_ratio = big_gamma.shape[0] / final_data.shape[0]
print(action_ratio)
big_gamma_winners = big_gamma.loc[big_gamma['Month Ahead'] > 0.33]
winners_ratio = big_gamma_winners.shape[0] / big_gamma.shape[0]
print(winners_ratio)
print('\n\nNegative Gamma:')
negative_gamma = final_data.loc[final_data['gex'] < 0]
negative_ratio = negative_gamma.shape[0] / final_data.shape[0]
plt.figure(10)
x = negative_gamma['gex']
y = negative_gamma['Month Ahead']
plt.scatter(x, y)
plt.xlabel('Market Gamma (Billions)')
plt.ylabel('Market 20-Day P/L (%)')
plt.grid(color = 'g', linestyle = '-', linewidth = 0.3)
plt.savefig('/content/drive/My Drive/Gamma Hunting/Spot Gamma/SPOTNegativeGEXvsMonthReturns.png')
plt.show()
print(negative_ratio)
plt.figure(7)
x = big_gamma['gex']
y = big_gamma['Month Ahead']
plt.scatter(x, y)
plt.xlabel('Market Gamma (Billions)')
plt.ylabel('Market 20-Day P/L (%)')
plt.grid(color = 'g', linestyle = '-', linewidth = 0.3)
plt.savefig('/content/drive/My Drive/Gamma Hunting/Spot Gamma/SPOTGammaGEXvsMonthReturns.png')
plt.show()
plt.figure(8)
x = big_gamma['gex']
y = big_gamma['Week Ahead']
plt.scatter(x, y)
plt.xlabel('Market Gamma (Billions)')
plt.ylabel('Market 20-Day P/L (%)')
plt.grid(color = 'g', linestyle = '-', linewidth = 0.3)
plt.savefig('/content/drive/My Drive/Gamma Hunting/Spot Gamma/SPOTGammaGEXvsWeekReturns.png')
plt.show()
Below is just some very bad practice backtesting on this data. Draws a line in the sand to enter a position and assumes it was profitable before assuming it was unprofitable.
Any conclusions from these results are misguided. But it was a nice way to peer into the signals and have a rough estimate on the predictive capabilities of GEX. Results were good which only encouraged me to look further.
Kept here for posterity.
##
##
balance = 10000
def perform_backtest(row) :
if (row['gex'] > 4000000000) & (row['Week Ahead Flat'] < 25) :
return -1000
elif (row['gex'] > 4000000000) & (row['Week Ahead Flat'] > 25) :
return 100 * (row['Week Ahead Flat'] - 25)
else :
return 0
def perform_optionless(row) :
if (row['gex'] > 4000000000) :
return row['Week Ahead']
else :
return 0
def perform_long_backtest(row) :
if (row['gex'] > 4000000000) & (row['Week Ahead'] < 0.4) :
return -1000
elif (row['gex'] > 4000000000) & (row['Week Ahead'] > 0.4) :
return 1000 * (row['Week Ahead'] - 0.4)
elif (row['gex'] < 0) & (abs(row['Week Ahead']) > 0.5) :
return 400
elif (row['gex'] < 0) & (abs(row['Week Ahead']) < 0.5) :
return -1000
else :
return 0
def perform_straddle_backtest(row) :
if (row['gex'] < 0) & (abs(row['Week Ahead']) > 0.5) :
return 1000
elif (row['gex'] < 0) & (abs(row['Week Ahead']) < 0.5) :
return -1000
else :
return 0
def make_balances(row) :
global balance
balance = balance * (1 + (0.01 * row['Week Ahead']))
print(balance)
return balance
#...
#final_data['Optionless'] = final_data.apply(perform_optionless, axis=1)
#final_data['Backtest'] =
#final_data['Balance'] = final_data.apply(make_balances, axis=1)
#print(big_gamma['Backtest'])
#plt.figure(11)
#x = final_data['date']
#y = final_data['Balance']
#plt.plot(x, y)
#plt.xlabel('2004 - Present')
#plt.ylabel('Return')
#plt.grid(color = 'g', linestyle = '-', linewidth = 0.3)
#plt.savefig('/content/drive/My Drive/Gamma Hunting/Spot Gamma/BigGammaStraddleBacktestReturns.png')
#plt.show()
#...
It seems far more likely that VIX and GEX would share a relationship rather than GEX and SPX. Below I visualize how VIX performs based on the GEX print.
As well as what happens to SPX based on VIX + GEX.
short_vix_data = final_vix_data.iloc[::-1]
short_gex_data = final_squeeze_data.iloc[::-1]
short_vix_data = short_vix_data.iloc[short_gex_data.index]
#print(short_vix_data)
short_vix_data = short_vix_data.reset_index(drop=True)
short_gex_data = short_gex_data.iloc[::-1]
#print(short_vix_data)
#print(short_gex_data)
short_vix_data['GEX'] = short_gex_data['gex']
x = short_vix_data['GEX']
y = short_vix_data['Month Ahead']
plt.figure(0)
plt.scatter(x, y)
plt.xlabel('Market Gamma (Billions)')
plt.ylabel('VIX 20-Day P/L (%)')
plt.grid(color='g', linestyle='-', linewidth=0.3)
plt.savefig('/content/drive/My Drive/Gamma Hunting/Spot Gamma/GEXvsVIX20Day.png')
plt.show()
z = short_gex_data['gex']
y = short_vix_data['VIX Close']
x = short_gex_data['Week Ahead']
fig = plt.figure(0)
ax = fig.add_subplot(111, projection='3d')
ax.set_xlabel('GEX')
ax.set_ylabel('VIX')
ax.set_zlabel('P/L')
ax.scatter(x, y, z)
plt.savefig('/content/drive/My Drive/Gamma Hunting/Spot Gamma/GEXvsVIXvsDayPL.png')
Spot Gamma data includes signals that represent things like massive open interest on a particular Call stirke 'Resistance' or the price where Gamma flips negative which it deemed 'Volatility Trigger'.
We look at them here.
comfort = raw_spot_data.loc[raw_spot_data['Resistance'] > raw_spot_data['Underlying']]
volatile = raw_spot_data.loc[raw_spot_data['Volatility Trigger'] > raw_spot_data['Underlying']]
overbought = raw_spot_data.loc[raw_spot_data['Resistance'] < raw_spot_data['Underlying']]
pinning = raw_spot_data.loc[(raw_spot_data['High Gamma'] > raw_spot_data['Underlying'] * 0.998) & (raw_spot_data['High Gamma'] < raw_spot_data['Underlying'] * 1.002)]
plt.figure(1)
plt.grid()
comfort_plot = plt.scatter(comfort['Total Gamma'], comfort['Week Ahead'])
volatile_plot = plt.scatter(volatile['Total Gamma'], volatile['Week Ahead'])
plt.figure(2)
overbought_plot = plt.scatter(overbought['Total Gamma'], overbought['Week Ahead'])
pinning_plot = plt.scatter(pinning['Total Gamma'], pinning['Week Ahead'])
plt.grid()
plt.legend((comfort_plot, volatile_plot, overbought_plot, pinning_plot), ('Comfort', 'Vol', 'OB', 'Pin'))
The only conclusion I could find here was that the Volatility Trigger is an excellent price point to avoid longs the strategy would have otherwise taken. It is clear in the first plot that the losing trades when GEX was positive was also when SPX closed below the 'Volatility Trigger'.
Interesting, but Spot Gamma's data is very limited compared to Squeeze Metrics. I'll leave this here, a bit of a dead-end.
At this point I have written the original report which is posted on my website here
It has produced a profitable strategy, but I still want to explore this data. So I threw some ML at it and now the strategy is marginally better.
We begin with some classifiers and decision trees. The intention is to have them predict the direction of SPX, 4 days out.
These decision trees and regression algorithms are likely NOT going to return great results. You can learn more about them here, though.
data = final_spot_data
#for_input = data[['price', 'gex', 'dix']]
for_input = data[['Underlying', 'High Gamma', 'Resistance', 'Volatility Trigger',
'Put Wall', 'Total Gamma', 'Total Delta']]
for_input = for_input.fillna(0)
def upDown(row) :
if row['Week Ahead'] > 0 :
return 1
else :
return 0
data['UpDown'] = data.apply(upDown, axis = 1)
signal = data[['UpDown']]
split = int(0.85 * (len(for_input)))
print(data.head())
print(split)
x_train, x_test, y_train, y_test = for_input[:split], for_input[split:], signal[:split], signal[split:]
print(for_input[:split])
print(y_train)
#rls = DecisionTreeRegressor()
#rls = LinearRegression()
#rls = PassiveAggressiveClassifier()
#rls = tree.ExtraTreeRegressor()
#IDEAL FOR SQUEEZE DATA :: rls = RandomForestClassifier(n_estimators = 2000, max_depth = 8)
rls = RandomForestClassifier(n_estimators = 2000, max_depth = 8)
#rls = PassiveAggressiveClassifier()
rls.fit(x_train, y_train)
tls = Perceptron()
tls.fit(x_train, y_train)
qls = DecisionTreeClassifier()
qls.fit(x_train, y_train)
cls = PassiveAggressiveClassifier()
cls.fit(x_train, y_train)
#predict_class = qls.predict(x_test)
#predict_regress = rls.predict(x_test)
#p_class = pd.DataFrame(predict_class)
#p_regress = pd.DataFrame(predict_regress)
print(rls.predict(x_test))
print(tls.predict(x_test))
print(qls.predict(x_test))
print(cls.predict(x_test))
print("Random Forest Classifier:", accuracy_score(y_test, rls.predict(x_test)))
print("Decision Tree Regressor:", accuracy_score(y_test, tls.predict(x_test)))
print("Decision Tree Classifier:", accuracy_score(y_test, qls.predict(x_test)))
print("Passive Aggressive Classifier:", accuracy_score(y_test, cls.predict(x_test)))
As expected, a whole grab bag of signals with a wide range of accuracy results.
Doesn't seem like any particular model has converged and is able to produce good predictions.
When I originally began trading off this data, I decided to go long with cash secured puts on SPX, 20dte anytime SqueezeMetrics published Gamma that was >4bn.
This has been a very profitable strategy through 2018-2019. However, in earlier years this strategy didn't work because the market rarely hit 4bn in positive Gamma, if ever. That means that over time, SPX options and derivatives in general have evolved. I need something that may evolve with it.
Introducing K Means-Clusters!
Dynamically redraw arbitrary lines in the sand everytime you get a new sample or a 1000epochs of times if you haven't drawn any lines at all yet!
You can learn about K Means-Clustering here.
In short, I'm going to be using composite averages to group data samples.
I will create 4 clusters because I have 4 fingers on my right hand, excluding my thumb.
Then, I will evaluate each cluster to attempt to find a reliable long SPX, short SPX, long Vega, short Vega signal. This should be possible because going long SPX when Gamma >4bn was possible and that's just a linear dichotomizer.
from sklearn.cluster import KMeans, MiniBatchKMeans
#short_vix_data
cut_vix_data = short_vix_data.drop(['Week Ahead', 'Month Ahead', 'Day Move', 'Day PL'], axis=1)
merged_data = cut_vix_data.join(short_gex_data)
clusters = merged_data
print(clusters.columns)
to_input = clusters[['GEX', 'dix', 'VIX Open']]
to_input = to_input.fillna(0)
km = KMeans(n_clusters = 4, init = 'random').fit(to_input) #Create clusters based on the VIX Open, GEX and DIX values.
classes = pd.DataFrame(km.predict(to_input), columns = ['raw'])
to_viz = to_input
to_viz = pd.merge(to_input, clusters[['Week Ahead']], left_index=True, right_index=True)
to_viz['raw'] = classes['raw']
cond = [
(classes['raw'] == 0),
(classes['raw'] == 1),
(classes['raw'] == 2),
(classes['raw'] == 3),
(classes['raw'] == 4)]
choices = ["b", "g", "k", "m", "c"]
to_viz['marker'] = np.select(cond, choices)
print(to_viz['marker'].head())
x = to_viz['GEX']
y = to_viz['dix']
z = to_viz['VIX Open']
pl = to_viz['Week Ahead']
fig = plt.figure(0)
ax = fig.add_subplot(111, projection='3d')
ax.set_xlabel('GEX')
ax.set_ylabel('DIX')
ax.set_zlabel('VIX')
ax.scatter(x, y, z, c=to_viz['marker'].tolist())
ax.view_init(30, 0)
plt.draw()
ax.view_init(30, 30)
plt.draw()
ax.view_init(0, 90)
plt.draw()
plt.savefig('/content/drive/My Drive/Gamma Hunting/Spot Gamma/GEXvsVIXvsDIX.png')
fig = plt.figure(1)
ax = fig.add_subplot()
ax.scatter(x, pl, c=to_viz['marker'].tolist())
fig = plt.figure(2)
ax = fig.add_subplot()
ax.scatter(y, pl, c=to_viz['marker'].tolist()) #Visualize
merged_data['cluster'] = to_viz['raw']
merged_data['cluster name'] = to_viz['marker']
Wild to see how VIX has almost no dictation on how the KMeans clusters were established.
GEX on the other hand is almost entirely the separation process. KMeans has essentially done the same "line in the sand" process here.
Both models individually didn't provide any insight that goes beyond what we could already produce. Us as humans are pretty good at pattern recognition already, I suppose adding some ML wasn't going to improve on that much.
What it may improve on though, is itself. As time goes on a threshold of 2bn, 3bn, then 4bn, was required to grow in accordance to the market and preserve the excess returns of this strategy.
In this strategy, the cluster id number will be attached to each data sample as it is fed to a random forest classifier. It is here where the model will be assisted by the "dynamically moving thresholds" that are the kmeans cluster labels.
A random forest trained exclusively on the cluster that includes the highest GEX values consistenly out performs a manual strategy of buying when GEX >4bn
As such, this final piece will combine the k-clustering and decision trees to hopefully produce a model which may evolve in time with the market.
Only time will tell.
print(merged_data.columns)
#best_only = merged_data.loc[(merged_data['cluster'] == 3) | (merged_data['cluster'] == 2)]
best_only = merged_data.loc[merged_data['cluster'] == 0]
print(best_only.shape)
print(best_only.head(5))
#for_input = data[['price', 'gex', 'dix']]
for_input = best_only[['VIX Open', 'GEX', 'price', 'dix',
'cluster']]
for_input = for_input.fillna(0)
def upDownDay(row) :
if row['Week Ahead'] > 0 :
return 1
else :
return 0
merged_data['UpDown'] = merged_data.apply(upDownDay, axis = 1)
best_only['UpDown'] = best_only.apply(upDownDay, axis = 1)
signal = best_only[['UpDown']]
split = int(0.85 * (len(for_input)))
x_train, x_test, y_train, y_test = for_input[:split], for_input[split:], signal[:split], signal[split:]
x_train.shape
x_test.shape
#IDEL FOR SQUEEZE DATA :: rls = RandomForestClassifier(n_estimators = 2000, max_depth = 8)
rls = RandomForestClassifier(n_estimators = 2000, max_depth = 8)
rls.fit(x_train, y_train)
print("Random Forest Accuracy:", accuracy_score(y_test, rls.predict(x_test)))
#print(rls.predict_proba(x_test))
predictions = pd.DataFrame(rls.predict_proba(x_test))
print(y_test.shape)
print(predictions.shape)
y_test = y_test.reset_index()
print(best_only[['date']])
predictions['result'] = y_test['UpDown']
def prediction(row) :
if row[0] > 0.7 :
return 0
else :
return 1
print(predictions.head(5))
print(best_only.head(5))
Again, more robust backtesting is always required, but this little library is a great way to visualize the behaviour of the model on a chart.
Returns look good here, but it needs more robust training before it deserves some real money.
Of course, those final touches are done OFF the public internet.
!pip install backtesting
from backtesting import Backtest
from backtesting.lib import SignalStrategy, TrailingStrategy
historical_SPX_closes = raw_squeeze_data['price']
print(historical_SPX_closes.head(5))
column_names = ['Open','High','Low','Close']
backtest_data = pd.DataFrame(historical_SPX_closes, columns = column_names)
backtest_data.Open = historical_SPX_closes
backtest_data.High = historical_SPX_closes
backtest_data.Low = historical_SPX_closes
backtest_data.Close = historical_SPX_closes
backtest_dates = raw_squeeze_data['date']
#backtest_data.index = raw_squeeze_data['date']
print(backtest_data.head(5))
k_signal = []
for date in backtest_dates :
for entry in best_only.date :
if date == entry :
k_signal.append(1)
break;
k_signal.append(0)
print(k_signal)
rf_signal = []
for date in backtest_dates :
for entry in best_only.date :
if date == entry :
rf_signal.append(1)
break;
rf_signal.append(0)
print(rf_signal)
class kCluster(SignalStrategy, TrailingStrategy):
def init(self):
super().init()
print(self.data.index)
self.set_signal(k_signal)
self.set_trailing_sl(4)
class random_forest(SignalStrategy, TrailingStrategy):
def init(self):
super().init()
print(self.data.index)
self.set_signal(rf_signal)
self.set_trailing_sl(2)
backtest = Backtest(backtest_data, kCluster, commission=0.002)
backtest.run()
backtest.plot()
What about other classification methods? What about a walk-forward to guarantee that the k-means clusters will "update the threshold" the way you intend/hope? What about the Long/Short Vega Signals?
What about 'em? I'm a firm believer in not prostituting a strategy. So for those who got this far and are still asking, shoot me an email!
trent.rand@pm.me, trent.rand@gmail.com
Just like my favourite volatility trader, I'll be walking straight to the bank.