Imports#

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm

import os

import sys

sys.path.insert(1, "./../")


from pathlib import Path


from settings import config

import level_1_filters as f1
import level_2_filters as f2
import level_3_filters as f3
from datetime import date


import warnings
import plotly.io as pio

pio.templates.default = "plotly_white"
warnings.filterwarnings("ignore")
OUTPUT_DIR = Path(config("OUTPUT_DIR"))
DATA_DIR = Path(config("DATA_DIR"))
WRDS_USERNAME = config("WRDS_USERNAME")

START_DATE_01 = date(1996, 1, 1)
END_DATE_01 = date(2012, 1, 31)

START_DATE_02 = date(2012, 2, 1)
END_DATE_02 = date(2024, 12, 31)

NOTE_START = START_DATE_01
NOTE_END = END_DATE_01
DATE_RANGE = f"{pd.Timestamp(START_DATE_01):%Y-%m}_{pd.Timestamp(END_DATE_02):%Y-%m}"

Function Definitions#

def compare_filtered_data_to_orig(filtered_df, orig_df, filter_name="Identical Filter"):
    print("|", filter_name, ":")
    print(
        f">> Records removed: {orig_df.shape[0] - filtered_df.shape[0]:,.0f} out of {orig_df.shape[0]:,.0f} ({(orig_df.shape[0] - filtered_df.shape[0]) / orig_df.shape[0]:,.2%})"
    )
    print(
        f">> Filtered data shape: {filtered_df.shape[0]:,.0f} rows // {filtered_df.shape[1]:,.0f} columns"
    )

Data Note#

  • The original CJS 2013 paper used data from 1986 through 2012 (26 years of data).

  • Due to the unavailability of SPX option data from 1985 to 1995, we replicated the 54 CJS portfolios using data from January 1996 to December 2019 (23 years).

  • Our dataset (from 1996 to 2019) comprises over 19.2 million rows of SPX options data.

  • The original effectiveness of the data filters was examined in The Puzzle of Filtering Index Options (Desai, Hammock, Holt; 2024). Due to similar reasons as outlined in that work (loss of data filter elegance when transposed across timeframes), we expect that the data filter parameters (and thus the portfolios constructed) will not yield identical results to the original published work, and the user should not have this expectation.

The spirit of this project is to replicate with the highest practical fidelity the process of data filtration and portfolio construction in the original CJS and HKM papers, without commenting on the effectiveness or appropriateness of the process and parameters. We leave that analysis to a future study.

file_path = Path(DATA_DIR / "options" / "data_1996-01_2019-12.parquet")

if file_path.exists():
    print(">> File already exists. Loading data from file...")
    # Load the data from the file
    raw_option_data = pd.read_parquet(file_path)
else:
    print(">> File does not exist. Loading data from WRDS...")
    # Load the data from WRDS
    raw_option_data = pd.concat(
        [
            pd.read_parquet(
                Path(DATA_DIR / "options" / "data_1996-01_2012-01.parquet")
            ),
            pd.read_parquet(
                Path(DATA_DIR / "options" / "data_2012-02_2019-12.parquet")
            ),
        ]
    )
    raw_option_data.to_parquet(file_path)

# add the mid price
raw_option_data["mid_price"] = (
    raw_option_data["best_bid"] + raw_option_data["best_offer"]
) / 2
# adjust strike price
raw_option_data["strike_price"] /= 1000
# calc moneyness
raw_option_data = f1.calc_moneyness(raw_option_data)
# rename IV column
raw_option_data.rename(columns={"impl_volatility": "IV"}, inplace=True)

raw_option_data
>> File already exists. Loading data from file...
secid date open close cp_flag exdate IV tb_m3 volume open_interest best_bid best_offer strike_price contract_size mid_price moneyness
0 108105.0 1996-01-04 621.32 617.70 C 1996-03-16 NaN 5.04 0.0 1704.0 55.000 56.000 565.0 100.0 55.500 0.914684
1 108105.0 1996-01-04 621.32 617.70 C 1996-01-20 NaN 5.04 0.0 108.0 75.500 76.500 540.0 100.0 76.000 0.874211
2 108105.0 1996-01-05 617.70 616.71 C 1996-02-17 NaN 5.03 500.0 5900.0 57.750 58.750 560.0 100.0 58.250 0.908044
3 108105.0 1996-01-08 616.71 618.46 C 1996-03-16 NaN 5.03 0.0 315.0 145.500 146.500 475.0 100.0 146.000 0.768037
4 108105.0 1996-01-09 618.46 609.45 C 1996-12-21 NaN 5.01 0.0 8.0 188.375 189.375 425.0 100.0 188.875 0.697350
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
66102 108105.0 2019-12-31 3215.18 3230.78 P 2020-12-31 NaN 1.52 0.0 0.0 944.700 962.900 4200.0 100.0 953.800 1.299996
66103 108105.0 2019-12-31 3215.18 3230.78 P 2020-12-31 NaN 1.52 0.0 0.0 1042.000 1060.300 4300.0 100.0 1051.150 1.330948
66104 108105.0 2019-12-31 3215.18 3230.78 P 2020-12-31 NaN 1.52 0.0 0.0 1140.000 1158.200 4400.0 100.0 1149.100 1.361900
66105 108105.0 2019-12-31 3215.18 3230.78 P 2020-12-31 NaN 1.52 0.0 0.0 1237.900 1256.100 4500.0 100.0 1247.000 1.392853
66106 108105.0 2019-12-31 3215.18 3230.78 P 2020-12-31 NaN 1.52 0.0 0.0 1335.900 1354.100 4600.0 100.0 1345.000 1.423805

19213898 rows × 16 columns

# Histogram parameters
counts, bins, patches = plt.hist(raw_option_data["date"], bins=30)

# Apply plasma_r colormap
cmap = cm.get_cmap("plasma_r", len(patches))
normed = (counts - counts.min()) / (counts.max() - counts.min())
for patch, norm in zip(patches, normed):
    patch.set_facecolor(cmap(norm))

# Labels and aesthetics
plt.xlabel("Date")
plt.yscale("log")
plt.grid()
plt.ylabel("Number of Options (log scale)")
plt.title("SPX Options Traded Over Time")
plt.xticks(rotation=45)
plt.tight_layout()

# Save before show
plt.savefig(OUTPUT_DIR / "spx_options_over_time.png")
plt.show()
../_images/b974df993ab52c9842451a015525f150bbfff59d5aa7ffe85f645fa139ff884e.png
f3.build_raw_iv_chart(raw_option_data, DATE_RANGE)
../_images/13c7e6d7f19d416878e161ed14670599aaf725a766c243735b089fff32d1c598.png

1. Data Filtration#

In order to minimize possible quoting errors, CJS filtered the raw options data through 3 levels of filters. The filters are applied to the trade-in (buy) side to make sure the portfolios are buying into reliable quotes. When positions are exited, if there is no quote in the filtered data, the raw data is searched. These filters are detailed in Appendix B of CJS.

Level 1 Filters#

  • Identical Filter: Retain only one instance of quotes with the same option type, strike price, expiration date/maturity, and price.

  • Identical Except Price Filter: There are a few sets of quotes with identical terms (type, strike, and maturity) but different prices. Keep the quote whose T-bill-based implied volatility is closest to that of its moneyness neighbors, and delete the others.

  • Bid = 0 Filter: Drop quotes with a bid price of zero, thereby avoiding low-valued options. Also, a zero bid may indicate censoring as negative bids cannot be recorded.

  • Volume = 0 Filter: Drop quotes of zero for volumes. Note: Appendix B of CJS does not explicitly detail this filter, but we include it here since it is included in Table B.1. Filters of CJS.

Identical Filter#

We drop records with identical option type, strike, expiration date, and price. In each such case, we eliminate all but one of the quotes.

spx_filtered = f1.identical_filter(raw_option_data)
spx_filtered
secid date open close cp_flag exdate IV tb_m3 volume open_interest best_bid best_offer strike_price contract_size mid_price moneyness
0 108105.0 1996-01-04 621.32 617.70 C 1996-03-16 NaN 5.04 0.0 1704.0 55.000 56.000 565.0 100.0 55.500 0.914684
1 108105.0 1996-01-04 621.32 617.70 C 1996-01-20 NaN 5.04 0.0 108.0 75.500 76.500 540.0 100.0 76.000 0.874211
2 108105.0 1996-01-05 617.70 616.71 C 1996-02-17 NaN 5.03 500.0 5900.0 57.750 58.750 560.0 100.0 58.250 0.908044
3 108105.0 1996-01-08 616.71 618.46 C 1996-03-16 NaN 5.03 0.0 315.0 145.500 146.500 475.0 100.0 146.000 0.768037
4 108105.0 1996-01-09 618.46 609.45 C 1996-12-21 NaN 5.01 0.0 8.0 188.375 189.375 425.0 100.0 188.875 0.697350
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
66102 108105.0 2019-12-31 3215.18 3230.78 P 2020-12-31 NaN 1.52 0.0 0.0 944.700 962.900 4200.0 100.0 953.800 1.299996
66103 108105.0 2019-12-31 3215.18 3230.78 P 2020-12-31 NaN 1.52 0.0 0.0 1042.000 1060.300 4300.0 100.0 1051.150 1.330948
66104 108105.0 2019-12-31 3215.18 3230.78 P 2020-12-31 NaN 1.52 0.0 0.0 1140.000 1158.200 4400.0 100.0 1149.100 1.361900
66105 108105.0 2019-12-31 3215.18 3230.78 P 2020-12-31 NaN 1.52 0.0 0.0 1237.900 1256.100 4500.0 100.0 1247.000 1.392853
66106 108105.0 2019-12-31 3215.18 3230.78 P 2020-12-31 NaN 1.52 0.0 0.0 1335.900 1354.100 4600.0 100.0 1345.000 1.423805

18936796 rows × 16 columns

compare_filtered_data_to_orig(spx_filtered, raw_option_data, "Identical Filter")
| Identical Filter :
>> Records removed: 277,102 out of 19,213,898 (1.44%)
>> Filtered data shape: 18,936,796 rows // 16 columns

Identical Except Price Filter#

We drop records with identical terms (type, strike, and maturity) but different prices. Retained quotes are those whose T-bill-based implied volatility is closest to that of its moneyness neighbors, and delete the others.

spx_filtered_2 = f1.identical_but_price_filter(spx_filtered)
spx_filtered_2
secid date open close cp_flag exdate IV tb_m3 volume open_interest best_bid best_offer strike_price contract_size mid_price moneyness
0 108105.0 1996-01-04 621.32 617.70 C 1996-03-16 NaN 5.04 0.0 1704.0 55.00 56.00 565.0 100.0 55.50 0.914684
1 108105.0 1996-01-04 621.32 617.70 C 1996-01-20 NaN 5.04 0.0 108.0 75.50 76.50 540.0 100.0 76.00 0.874211
2 108105.0 1996-01-04 621.32 617.70 C 1996-03-16 0.106792 5.04 150.0 5633.0 24.75 25.75 600.0 100.0 25.25 0.971345
3 108105.0 1996-01-04 621.32 617.70 C 1996-02-17 NaN 5.04 0.0 90.0 48.25 49.25 570.0 100.0 48.75 0.922778
4 108105.0 1996-01-04 621.32 617.70 C 1996-06-22 0.123411 5.04 0.0 195.0 38.50 39.50 595.0 100.0 39.00 0.963251
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
16379461 108105.0 2019-12-31 3215.18 3230.78 P 2020-01-17 0.426246 1.52 24.0 719.0 0.25 0.35 2545.0 100.0 0.30 0.787735
16379462 108105.0 2019-12-31 3215.18 3230.78 P 2020-02-21 0.188281 1.52 22.0 231.0 14.30 14.60 2985.0 100.0 14.45 0.923925
16379463 108105.0 2019-12-31 3215.18 3230.78 P 2020-03-20 0.234228 1.52 26.0 1003.0 8.50 8.70 2730.0 100.0 8.60 0.844997
16379464 108105.0 2019-12-31 3215.18 3230.78 P 2020-04-17 0.134421 1.52 0.0 651.0 92.90 93.80 3230.0 100.0 93.35 0.999759
16379465 108105.0 2019-12-31 3215.18 3230.78 P 2020-05-15 0.138240 1.52 0.0 0.0 107.20 108.30 3230.0 100.0 107.75 0.999759

16379466 rows × 16 columns

compare_filtered_data_to_orig(
    spx_filtered_2, spx_filtered, "Identical Except Price Filter:"
)
| Identical Except Price Filter: :
>> Records removed: 2,557,330 out of 18,936,796 (13.50%)
>> Filtered data shape: 16,379,466 rows // 16 columns

Filter Options with Bid = 0#

We drop quotes with bids of zero (implying little to no market interest) and thereby avoiding low-valued options.

spx_filtered_3 = f1.delete_zero_bid_filter(spx_filtered_2)
compare_filtered_data_to_orig(spx_filtered_3, spx_filtered_2, "Delete Zero Bid Filter")
| Delete Zero Bid Filter :
>> Records removed: 1,341,403 out of 16,379,466 (8.19%)
>> Filtered data shape: 15,038,063 rows // 16 columns
fig, axes = plt.subplots(1, 2, figsize=(12, 6))

# Plot distribution of best_bid for optm_l1_id
axes[0].hist(spx_filtered_2["best_bid"], bins=30)
axes[0].set_xlabel("Best Bid")
axes[0].set_ylabel("Frequency")
axes[0].set_title("Distribution of Best Bid - Pre-filter")

# Plot distribution of best_bid for optm_l1_zbid
axes[1].hist(spx_filtered_3["best_bid"], bins=30, color="darkred")
axes[1].set_xlabel("Best Bid")
axes[1].set_ylabel("Frequency")
axes[1].set_title("Distribution of Best Bid - Post-filter")

plt.tight_layout()
plt.show()
print("Need some better plots here...")
../_images/54928d6ce41bd4449d6ac629083b5de41251df3ba1741c621d6ce12b50639156.png
Need some better plots here...

Filter Options with Vol = 0#

Table B.1 of CJS 2013 appears to signal the inclusion of a Volume = 0 filter, however, Appendix B does not describe this filter. We note that there are a significant number of quotes with zero volume in our dataset, and the application of this filter would dramatically skew the results from the original dataset (with 70% of remaining records dropped using this criterion). Given that Appendix B of CJS 2013 did not describe this filter, we assume its inclusion in Table B.1 was an error.

spx_filtered_4 = f1.delete_zero_volume_filter(spx_filtered_3)
compare_filtered_data_to_orig(
    spx_filtered_4, spx_filtered_3, "Delete Zero Volume Filter"
)
| Delete Zero Volume Filter :
>> Records removed: 10,570,785 out of 15,038,063 (70.29%)
>> Filtered data shape: 4,467,278 rows // 16 columns
spx_filtered_4 = spx_filtered_3.copy(deep=True)
zero_vol_rows = spx_filtered_3[spx_filtered_3["volume"] == 0]
plt.hist(zero_vol_rows["date"], bins=30)
plt.xlabel("Date")
plt.ylabel("Count")
plt.title("Distribution of Options with Zero Volume")
plt.show()
../_images/66e97210fadf9f0b19fad7e1cfbdfea22c6d0cd5d74aaa4d55e98f2dff579a5c.png
# save L1 filtered data
spx_l1_filtered = spx_filtered_4.copy(deep=True)
spx_l1_filtered.to_parquet(
    Path(DATA_DIR / "options" / f"L1_filtered_{DATE_RANGE}.parquet"), index=False
)
f3.build_l1_iv_chart(spx_l1_filtered, date_range=DATE_RANGE)
../_images/4fd631a962f412ed0b13902fbf937e0618fb4af711777828055e385fde621b69.png

Level 2 Filters#

  • Days to Maturity <7 or >180 Filter: Drop options with fewer than seven or more than 180 calendar days to expiration.

  • IV<5% or >100% Filter: We remove all option quotes with implied volatilities lower than 5% or higher than 100%, computed using T-bill interest rates.

  • Moneyness <0.8 or >1.2 Filter: We remove all option quotes with moneyness, the ratio of strike price to index price, below 0.8 or above 1.2. These options have little value beyond their intrinsic value and are also very thinly traded.

  • Implied Interest Rate <0 Filter: When filtering outliers, we use T-bill interest rates to compute implied volatilities. T-bill interest rates are obtained from the Federal Reserve’s H.15 release. We assign a T-bill rate to each observation by assuming that we can use the next shortest rate if the time to expiration of the option is shorter than the shortest constant maturity rate. Our goal is to obtain an interest rate that is as close as possible to the one faced by investors in the options market. It appears that the T-bill rates are not the relevant ones when pricing these options. Specifically, when the T-bill rates are used, put and call implied volatilities do not line up very well; for example, the T-bill rate tends to be too high for short maturity options, perhaps because no T-bill has maturity of less than a month. To address these issues, we compute a put-call parity-implied interest rate. Since we believe that put-call parity holds reasonably well in this deep and liquid European options market, we use the put-call parity-implied interest rate as our interest rate in the remainder of the paper and for further filters. To construct this rate, we take all put-call pairs of a given maturity and impose put-call parity using the bid-ask midpoint as the price, and allowing the interest rate to adjust. We remove 89,563 pairs with a negative implied interest rate. We then take the median-implied interest rate across all remaining pairs of the same maturity with moneyness between 0.95 and 1.05 and assign it to all quotes with that maturity. We are able to directly assign an implied interest rate to 93% of our sample in this way. We fill in the gaps by interpolating across maturities and if necessary, across days. Our implied interest rate is on average 54 bps above the T-bill rate

  • Unable to Compute IV Filter: We remove quotes that imply negative time value.

Days to Maturity <7 or >180 Filter#

We drop records with days to maturity less than 7 and greater than 180 days. The short maturity options tend to move erratically close to expiration and the long maturity options lack volume and open interest.

spx_filtered_5 = f2.days_to_maturity_filter(spx_filtered_4, min_days=7, max_days=180)
spx_filtered_5
secid date open close cp_flag exdate IV tb_m3 volume open_interest best_bid best_offer strike_price contract_size mid_price moneyness days_to_maturity
0 108105.0 1996-01-04 621.32 617.70 C 1996-03-16 NaN 5.04 0.0 1704.0 55.00 56.00 565.0 100.0 55.50 0.914684 72 days
1 108105.0 1996-01-04 621.32 617.70 C 1996-01-20 NaN 5.04 0.0 108.0 75.50 76.50 540.0 100.0 76.00 0.874211 16 days
2 108105.0 1996-01-04 621.32 617.70 C 1996-03-16 0.106792 5.04 150.0 5633.0 24.75 25.75 600.0 100.0 25.25 0.971345 72 days
3 108105.0 1996-01-04 621.32 617.70 C 1996-02-17 NaN 5.04 0.0 90.0 48.25 49.25 570.0 100.0 48.75 0.922778 44 days
4 108105.0 1996-01-04 621.32 617.70 C 1996-06-22 0.123411 5.04 0.0 195.0 38.50 39.50 595.0 100.0 39.00 0.963251 170 days
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
16379461 108105.0 2019-12-31 3215.18 3230.78 P 2020-01-17 0.426246 1.52 24.0 719.0 0.25 0.35 2545.0 100.0 0.30 0.787735 17 days
16379462 108105.0 2019-12-31 3215.18 3230.78 P 2020-02-21 0.188281 1.52 22.0 231.0 14.30 14.60 2985.0 100.0 14.45 0.923925 52 days
16379463 108105.0 2019-12-31 3215.18 3230.78 P 2020-03-20 0.234228 1.52 26.0 1003.0 8.50 8.70 2730.0 100.0 8.60 0.844997 80 days
16379464 108105.0 2019-12-31 3215.18 3230.78 P 2020-04-17 0.134421 1.52 0.0 651.0 92.90 93.80 3230.0 100.0 93.35 0.999759 108 days
16379465 108105.0 2019-12-31 3215.18 3230.78 P 2020-05-15 0.138240 1.52 0.0 0.0 107.20 108.30 3230.0 100.0 107.75 0.999759 136 days

10659473 rows × 17 columns

compare_filtered_data_to_orig(spx_filtered_5, spx_filtered_4, "Days to Maturity Filter")
| Days to Maturity Filter :
>> Records removed: 4,378,590 out of 15,038,063 (29.12%)
>> Filtered data shape: 10,659,473 rows // 17 columns

IV<5% or >100% Filter#

We drop quotes with implied volatilities lower than 5% or higher than 100%, computed using T-bill interest rates of the nearest matching maturity. Such extreme IV values are likely a quotation problem or attached to low-value options.

spx_filtered_6 = f2.iv_range_filter(spx_filtered_5, min_iv=0.05, max_iv=1.0)
spx_filtered_6
secid date open close cp_flag exdate IV tb_m3 volume open_interest best_bid best_offer strike_price contract_size mid_price moneyness days_to_maturity
2 108105.0 1996-01-04 621.32 617.70 C 1996-03-16 0.106792 5.04 150.0 5633.0 24.750 25.750 600.0 100.0 25.250 0.971345 72 days
4 108105.0 1996-01-04 621.32 617.70 C 1996-06-22 0.123411 5.04 0.0 195.0 38.500 39.500 595.0 100.0 39.000 0.963251 170 days
6 108105.0 1996-01-04 621.32 617.70 C 1996-02-17 0.093700 5.04 13.0 1364.0 10.625 11.375 615.0 100.0 11.000 0.995629 44 days
7 108105.0 1996-01-04 621.32 617.70 C 1996-02-17 0.092695 5.04 0.0 1702.0 0.250 0.500 655.0 100.0 0.375 1.060385 44 days
10 108105.0 1996-01-04 621.32 617.70 C 1996-03-16 0.101465 5.04 463.0 5037.0 9.125 9.875 625.0 100.0 9.500 1.011818 72 days
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
16379461 108105.0 2019-12-31 3215.18 3230.78 P 2020-01-17 0.426246 1.52 24.0 719.0 0.250 0.350 2545.0 100.0 0.300 0.787735 17 days
16379462 108105.0 2019-12-31 3215.18 3230.78 P 2020-02-21 0.188281 1.52 22.0 231.0 14.300 14.600 2985.0 100.0 14.450 0.923925 52 days
16379463 108105.0 2019-12-31 3215.18 3230.78 P 2020-03-20 0.234228 1.52 26.0 1003.0 8.500 8.700 2730.0 100.0 8.600 0.844997 80 days
16379464 108105.0 2019-12-31 3215.18 3230.78 P 2020-04-17 0.134421 1.52 0.0 651.0 92.900 93.800 3230.0 100.0 93.350 0.999759 108 days
16379465 108105.0 2019-12-31 3215.18 3230.78 P 2020-05-15 0.138240 1.52 0.0 0.0 107.200 108.300 3230.0 100.0 107.750 0.999759 136 days

9586861 rows × 17 columns

compare_filtered_data_to_orig(spx_filtered_6, spx_filtered_5, "IV Range Filter")
| IV Range Filter :
>> Records removed: 1,072,612 out of 10,659,473 (10.06%)
>> Filtered data shape: 9,586,861 rows // 17 columns

Moneyness <0.8 or >1.2 Filter#

We remove all option quotes with moneyness (\(\frac{K}{S}\)) below 0.8 or above 1.2. These options have little value beyond their intrinsic value and are also very thinly traded.

spx_filtered_7 = f2.moneyness_filter(
    spx_filtered_6, min_moneyness=0.8, max_moneyness=1.2
)
spx_filtered_7
secid date open close cp_flag exdate IV tb_m3 volume open_interest best_bid best_offer strike_price contract_size mid_price moneyness days_to_maturity
0 108105.0 1996-01-04 621.32 617.70 C 1996-03-16 0.106792 5.04 150.0 5633.0 24.750 25.750 600.0 100.0 25.250 0.971345 72 days
1 108105.0 1996-01-04 621.32 617.70 C 1996-06-22 0.123411 5.04 0.0 195.0 38.500 39.500 595.0 100.0 39.000 0.963251 170 days
2 108105.0 1996-01-04 621.32 617.70 C 1996-02-17 0.093700 5.04 13.0 1364.0 10.625 11.375 615.0 100.0 11.000 0.995629 44 days
3 108105.0 1996-01-04 621.32 617.70 C 1996-02-17 0.092695 5.04 0.0 1702.0 0.250 0.500 655.0 100.0 0.375 1.060385 44 days
4 108105.0 1996-01-04 621.32 617.70 C 1996-03-16 0.101465 5.04 463.0 5037.0 9.125 9.875 625.0 100.0 9.500 1.011818 72 days
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
7657970 108105.0 2019-12-31 3215.18 3230.78 P 2020-05-29 0.120268 1.52 0.0 0.0 561.300 575.500 3800.0 100.0 568.400 1.176187 150 days
7657971 108105.0 2019-12-31 3215.18 3230.78 P 2020-02-21 0.188281 1.52 22.0 231.0 14.300 14.600 2985.0 100.0 14.450 0.923925 52 days
7657972 108105.0 2019-12-31 3215.18 3230.78 P 2020-03-20 0.234228 1.52 26.0 1003.0 8.500 8.700 2730.0 100.0 8.600 0.844997 80 days
7657973 108105.0 2019-12-31 3215.18 3230.78 P 2020-04-17 0.134421 1.52 0.0 651.0 92.900 93.800 3230.0 100.0 93.350 0.999759 108 days
7657974 108105.0 2019-12-31 3215.18 3230.78 P 2020-05-15 0.138240 1.52 0.0 0.0 107.200 108.300 3230.0 100.0 107.750 0.999759 136 days

7657975 rows × 17 columns

compare_filtered_data_to_orig(spx_filtered_7, spx_filtered_6, "Moneyness Filter")
| Moneyness Filter :
>> Records removed: 1,928,886 out of 9,586,861 (20.12%)
>> Filtered data shape: 7,657,975 rows // 17 columns

Implied Interest Rate < 0 Filter#

We remove all option quotes with negative implied interest rates, assuming put-call parity.

spx_filtered_8 = f2.implied_interest_rate_filter(spx_filtered_7)
spx_filtered_8
 |-- PCP filter: Check ok --> Underlying prices, strike prices of put and call options match exactly.
secid date open close cp_flag exdate IV tb_m3 volume open_interest best_bid best_offer strike_price contract_size mid_price moneyness days_to_maturity pc_parity_int_rate
0 108105.0 1996-01-04 621.32 617.70 P 1996-01-20 0.282774 5.04 8.0 6914.0 0.0625 0.1250 540.0 100.0 0.09375 0.874211 16 days 0.015898
1 108105.0 1996-01-04 621.32 617.70 P 1996-01-20 0.265845 5.04 0.0 3897.0 0.0625 0.1250 545.0 100.0 0.09375 0.882305 16 days 0.015898
2 108105.0 1996-01-04 621.32 617.70 P 1996-01-20 0.266303 5.04 0.0 3259.0 0.1250 0.1875 550.0 100.0 0.15625 0.890400 16 days 0.015898
3 108105.0 1996-01-04 621.32 617.70 P 1996-01-20 0.255099 5.04 0.0 862.0 0.1250 0.2500 555.0 100.0 0.18750 0.898494 16 days 0.015898
4 108105.0 1996-01-04 621.32 617.70 P 1996-01-20 0.247617 5.04 100.0 7438.0 0.1875 0.3125 560.0 100.0 0.25000 0.906589 16 days 0.015898
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
5137994 108105.0 2019-12-31 3215.18 3230.78 P 2020-06-19 0.110925 1.52 20.0 310.0 222.8000 228.9000 3425.0 100.0 225.85000 1.060116 171 days 0.014769
5137995 108105.0 2019-12-31 3215.18 3230.78 C 2020-06-19 0.108577 1.52 400.0 3483.0 24.6000 25.1000 3450.0 100.0 24.85000 1.067854 171 days 0.014769
5137996 108105.0 2019-12-31 3215.18 3230.78 P 2020-06-19 0.108540 1.52 0.0 12.0 241.2000 247.6000 3450.0 100.0 244.40000 1.067854 171 days 0.014769
5137997 108105.0 2019-12-31 3215.18 3230.78 C 2020-06-19 0.106848 1.52 36.0 227.0 19.6000 20.1000 3475.0 100.0 19.85000 1.075592 171 days 0.014769
5137998 108105.0 2019-12-31 3215.18 3230.78 P 2020-06-19 0.106814 1.52 0.0 0.0 260.9000 267.5000 3475.0 100.0 264.20000 1.075592 171 days 0.014769

5137999 rows × 18 columns

compare_filtered_data_to_orig(
    spx_filtered_8, spx_filtered_7, "Negative Implied Interest Rate Filter"
)
| Negative Implied Interest Rate Filter :
>> Records removed: 2,519,976 out of 7,657,975 (32.91%)
>> Filtered data shape: 5,137,999 rows // 18 columns

Unable to Compute IV Filter#

We remove all option quotes that imply negative time value.

spx_filtered_9 = f2.unable_to_compute_iv_filter(spx_filtered_8)
spx_filtered_9
secid date open close cp_flag exdate IV tb_m3 volume open_interest best_bid best_offer strike_price contract_size mid_price moneyness days_to_maturity pc_parity_int_rate intrinsic
0 108105.0 1996-01-04 621.32 617.70 P 1996-01-20 0.282774 5.04 8.0 6914.0 0.0625 0.1250 540.0 100.0 0.09375 0.874211 16 days 0.015898 0.00
1 108105.0 1996-01-04 621.32 617.70 P 1996-01-20 0.265845 5.04 0.0 3897.0 0.0625 0.1250 545.0 100.0 0.09375 0.882305 16 days 0.015898 0.00
2 108105.0 1996-01-04 621.32 617.70 P 1996-01-20 0.266303 5.04 0.0 3259.0 0.1250 0.1875 550.0 100.0 0.15625 0.890400 16 days 0.015898 0.00
3 108105.0 1996-01-04 621.32 617.70 P 1996-01-20 0.255099 5.04 0.0 862.0 0.1250 0.2500 555.0 100.0 0.18750 0.898494 16 days 0.015898 0.00
4 108105.0 1996-01-04 621.32 617.70 P 1996-01-20 0.247617 5.04 100.0 7438.0 0.1875 0.3125 560.0 100.0 0.25000 0.906589 16 days 0.015898 0.00
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
5137994 108105.0 2019-12-31 3215.18 3230.78 P 2020-06-19 0.110925 1.52 20.0 310.0 222.8000 228.9000 3425.0 100.0 225.85000 1.060116 171 days 0.014769 194.22
5137995 108105.0 2019-12-31 3215.18 3230.78 C 2020-06-19 0.108577 1.52 400.0 3483.0 24.6000 25.1000 3450.0 100.0 24.85000 1.067854 171 days 0.014769 0.00
5137996 108105.0 2019-12-31 3215.18 3230.78 P 2020-06-19 0.108540 1.52 0.0 12.0 241.2000 247.6000 3450.0 100.0 244.40000 1.067854 171 days 0.014769 219.22
5137997 108105.0 2019-12-31 3215.18 3230.78 C 2020-06-19 0.106848 1.52 36.0 227.0 19.6000 20.1000 3475.0 100.0 19.85000 1.075592 171 days 0.014769 0.00
5137998 108105.0 2019-12-31 3215.18 3230.78 P 2020-06-19 0.106814 1.52 0.0 0.0 260.9000 267.5000 3475.0 100.0 264.20000 1.075592 171 days 0.014769 244.22

4657476 rows × 19 columns

compare_filtered_data_to_orig(
    spx_filtered_9, spx_filtered_8, "Unable to Compute IV Filter"
)
| Unable to Compute IV Filter :
>> Records removed: 480,523 out of 5,137,999 (9.35%)
>> Filtered data shape: 4,657,476 rows // 19 columns
# save L2 filtered data
spx_l2_filtered = spx_filtered_9.copy(deep=True)
spx_l2_filtered.to_parquet(
    Path(DATA_DIR / "options" / f"L2_filtered_{DATE_RANGE}.parquet"), index=False
)
f3.build_l2_iv_chart(spx_l2_filtered, date_range=DATE_RANGE)
../_images/ae9dcb0d9b0cb867858665ade28ff89f41cf9b6e518d7abf2eabcd5c11fb6630.png

Level 3 Filters#

  • IV Filter: The IV filter removes volatility outliers to reduce the prevalence of apparent butterfly arbitrage.

  • Put-Call Parity Filter: The puts and calls need to be matched up based on trading date, expiry date, and option type.

IV Filter#

The IV filter removes volatility outliers to reduce the prevalence of apparent butterfly arbitrage. This involves dropping calls and puts that have the same expiration date and strike price, but have anomalous prices due to extreme implied volatility values. For each date and maturity, we fit a quadratic curve to the implied volatility of puts and calls (separately) through the observed log implied volatilities.

l2_data_with_fit, spx_filtered_10 = f3.IV_filter(spx_filtered_9, DATE_RANGE)
spx_filtered_10
 
>> Running IV filter...
 |-- IV filter: applying quadratic fit...
 |-- !! Execution time: apply_quadratic_iv_fit --> 26.82598 seconds
 |-- IV filter: filtering outliers...
 |-- IV filter: saving L3 IV-filtered data...
secid date open close cp_flag exdate IV tb_m3 volume open_interest best_bid best_offer strike_price contract_size mid_price moneyness days_to_maturity pc_parity_int_rate intrinsic log_iv fitted_iv rel_distance_iv moneyness_bin stdev_iv_moneyness_bin is_outlier_iv
date exdate cp_flag
1996-01-04 1996-01-20 C 14 108105.0 1996-01-04 621.32 617.70 C 1996-01-20 0.082711 5.04 444.0 5905.0 10.0000 10.375 610.0 100.0 10.18750 0.987534 16 days 0.015898 7.70 -2.492403 -2.377298 4.841838 (0.975, 1.0] 3.516840 False
16 108105.0 1996-01-04 621.32 617.70 C 1996-01-20 0.109019 5.04 465.0 4270.0 7.5000 8.000 615.0 100.0 7.75000 0.995629 16 days 0.015898 2.70 -2.216233 -2.353701 -5.840486 (0.975, 1.0] 3.516840 False
18 108105.0 1996-01-04 621.32 617.70 C 1996-01-20 0.101986 5.04 2606.0 6635.0 4.5000 4.875 620.0 100.0 4.68750 1.003723 16 days 0.015898 0.00 -2.282920 -2.330581 -2.045020 (1.0, 1.025] 5.219336 False
20 108105.0 1996-01-04 621.32 617.70 C 1996-01-20 0.097356 5.04 4022.0 5969.0 1.1875 1.375 630.0 100.0 1.28125 1.019913 16 days 0.015898 0.00 -2.329381 -2.285771 1.907866 (1.0, 1.025] 5.219336 False
22 108105.0 1996-01-04 621.32 617.70 C 1996-01-20 0.101756 5.04 1627.0 6224.0 0.6250 0.750 635.0 100.0 0.68750 1.028007 16 days 0.015898 0.00 -2.285177 -2.264082 0.931727 (1.025, 1.05] 4.396845 False
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2019-12-31 2020-06-19 P 5137990 108105.0 2019-12-31 3215.18 3230.78 P 2020-06-19 0.117063 1.52 0.0 395.0 192.2000 193.000 3375.0 100.0 192.60000 1.044639 171 days 0.014769 144.22 -2.145043 -2.132215 0.601630 (1.025, 1.05] 4.396845 False
5137992 108105.0 2019-12-31 3215.18 3230.78 P 2020-06-19 0.113900 1.52 0.0 163.0 208.2000 209.100 3400.0 100.0 208.65000 1.052377 171 days 0.014769 169.22 -2.172434 -2.165558 0.317525 (1.05, 1.075] 5.236635 False
5137994 108105.0 2019-12-31 3215.18 3230.78 P 2020-06-19 0.110925 1.52 20.0 310.0 222.8000 228.900 3425.0 100.0 225.85000 1.060116 171 days 0.014769 194.22 -2.198901 -2.199596 -0.031616 (1.05, 1.075] 5.236635 False
5137996 108105.0 2019-12-31 3215.18 3230.78 P 2020-06-19 0.108540 1.52 0.0 12.0 241.2000 247.600 3450.0 100.0 244.40000 1.067854 171 days 0.014769 219.22 -2.220637 -2.234330 -0.612848 (1.05, 1.075] 5.236635 False
5137998 108105.0 2019-12-31 3215.18 3230.78 P 2020-06-19 0.106814 1.52 0.0 0.0 260.9000 267.500 3475.0 100.0 264.20000 1.075592 171 days 0.014769 244.22 -2.236666 -2.269758 -1.457925 (1.075, 1.1] 5.723928 False

4301341 rows × 25 columns

compare_filtered_data_to_orig(spx_filtered_10, spx_filtered_9, "IV Filter")
| IV Filter :
>> Records removed: 356,135 out of 4,657,476 (7.65%)
>> Filtered data shape: 4,301,341 rows // 25 columns
f3.build_l2_iv_chart(l2_data_with_fit, date_range=DATE_RANGE)
../_images/4f8a05cce2f80b45220a691c3e24114a1ea73bc249585a7ee3c48b5d61ced5a8.png
f3.build_l3_iv_chart(spx_filtered_10, DATE_RANGE)
../_images/82a96b60b0785d9aaad4915e2ddec903ca66906f78ca5ea5a67d0c79271873c5.png

Put-Call Parity Filter#

The puts and calls need to be matched up based on trading date, expiry date, and option type. We then calculate the put-call parity implied interest rate, and filter out outliers based on the standard deviation of the relative distance between the interest rate implied by put-call parity, and the calculated daily median 3-month T-bill rate from the pulled data.

spx_filtered_11 = f3.put_call_filter(spx_filtered_10, DATE_RANGE)
spx_filtered_11
 
>> Running PCP filter...
 |-- PCP filter: calculating bid-ask midpoint...
 |-- PCP filter: building put-call pairs...
 |-- PCP filter: calculating PCP implied interest rate...
 |-- PCP filter: Check ok --> Underlying prices, strike prices of put and call options match exactly.
 |-- PCP filter: filtering outliers...
 |-- PCP filter: building L3 final filtered options chart...
 |-- PCP filter complete.
secid open close cp_flag IV tb_m3 volume open_interest best_bid best_offer strike_price contract_size mid_price days_to_maturity pc_parity_int_rate intrinsic log_iv fitted_iv rel_distance_iv moneyness_bin stdev_iv_moneyness_bin is_outlier_iv
date exdate moneyness
1996-01-04 1996-01-20 0.987534 108105.0 621.32 617.70 C 0.082711 5.04 444.0 5905.0 10.0000 10.375 610.0 100.0 10.18750 16 days 0.015898 7.70 -2.492403 -2.377298 4.841838 (0.975, 1.0] 3.516840 False
1.019913 108105.0 621.32 617.70 C 0.097356 5.04 4022.0 5969.0 1.1875 1.375 630.0 100.0 1.28125 16 days 0.015898 0.00 -2.329381 -2.285771 1.907866 (1.0, 1.025] 5.219336 False
1.028007 108105.0 621.32 617.70 C 0.101756 5.04 1627.0 6224.0 0.6250 0.750 635.0 100.0 0.68750 16 days 0.015898 0.00 -2.285177 -2.264082 0.931727 (1.025, 1.05] 4.396845 False
1.036102 108105.0 621.32 617.70 C 0.100588 5.04 0.0 6593.0 0.1875 0.375 640.0 100.0 0.28125 16 days 0.015898 0.00 -2.296722 -2.242870 2.401027 (1.025, 1.05] 4.396845 False
1996-02-17 0.963251 108105.0 621.32 617.70 C 0.071852 5.04 3.0 34.0 25.2500 26.250 595.0 100.0 25.75000 44 days 0.014622 22.70 -2.633147 -2.563785 2.705450 (0.95, 0.975] 2.301503 False
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2019-12-31 2020-06-19 1.044639 108105.0 3215.18 3230.78 P 0.117063 1.52 0.0 395.0 192.2000 193.000 3375.0 100.0 192.60000 171 days 0.014769 144.22 -2.145043 -2.132215 0.601630 (1.025, 1.05] 4.396845 False
1.052377 108105.0 3215.18 3230.78 P 0.113900 1.52 0.0 163.0 208.2000 209.100 3400.0 100.0 208.65000 171 days 0.014769 169.22 -2.172434 -2.165558 0.317525 (1.05, 1.075] 5.236635 False
1.060116 108105.0 3215.18 3230.78 P 0.110925 1.52 20.0 310.0 222.8000 228.900 3425.0 100.0 225.85000 171 days 0.014769 194.22 -2.198901 -2.199596 -0.031616 (1.05, 1.075] 5.236635 False
1.067854 108105.0 3215.18 3230.78 P 0.108540 1.52 0.0 12.0 241.2000 247.600 3450.0 100.0 244.40000 171 days 0.014769 219.22 -2.220637 -2.234330 -0.612848 (1.05, 1.075] 5.236635 False
1.075592 108105.0 3215.18 3230.78 P 0.106814 1.52 0.0 0.0 260.9000 267.500 3475.0 100.0 264.20000 171 days 0.014769 244.22 -2.236666 -2.269758 -1.457925 (1.075, 1.1] 5.723928 False

3195886 rows × 22 columns

compare_filtered_data_to_orig(
    spx_filtered_11, spx_filtered_10, "Put-Call Parity Filter"
)
| Put-Call Parity Filter :
>> Records removed: 1,105,455 out of 4,301,341 (25.70%)
>> Filtered data shape: 3,195,886 rows // 22 columns
# save L3 filtered data
spx_l3_filtered = spx_filtered_11.copy(deep=True)
spx_l3_filtered.to_parquet(
    Path(DATA_DIR / "options" / f"L3_filtered_{DATE_RANGE}.parquet"), index=False
)
f3.build_l3_iv_pcp_chart(spx_l3_filtered, DATE_RANGE)
../_images/796da945b08b97a487c2c7cdf2c8c8948adc80867e490ffe46d86eee9a85ef04.png
# save final cleaned data to folder
spx_filtered_final = spx_filtered_11.copy(deep=True)

final_savefile = DATA_DIR / "options" / f"spx_filtered_final_{DATE_RANGE}.parquet"

try:
    spx_filtered_final.to_parquet(final_savefile, index=True)
    print(f">> Final filtered data saved to {final_savefile}")
except FileNotFoundError:
    print(f">> {DATA_DIR} does not exist. Creating directory...")
    os.makedirs(DATA_DIR, exist_ok=True)
    spx_filtered_final.to_parquet(final_savefile, index=True)
    print(f">> Final filtered data saved to {final_savefile}")
>> Final filtered data saved to /Users/jbejarano/GitRepositories/ftsfr/_data/options/spx_filtered_final_1996-01_2024-12.parquet