Neural Pricer (MLP)
PyTorch MLP trained to mimic the American binomial pricer on ~100k generated rows (uniform sampling of \(S, K \in [20, 500]\), \(\sigma \in [0.05, 1]\), \(r \in [0, 0.10]\), \(T \in [0.05, 2]\), up to 3 dividends per option with 70% probability of having any). Two design choices that matter:
- Feature engineering: instead of feeding raw \((S, K, D_i, t_i)\), the inputs are scale-invariant ratios: moneyness \(S/K\), intrinsic ratio \(\max(S-K, 0)/K\), dividend yields \(D_i/S\), and dividend timing ratios \(t_i/T\). This makes a \(\$100\) stock and a \(\$400\) stock look the same to the network.
- Scale-invariant target: the network predicts \(C/K\) rather than \(C\) directly, so the loss isn’t dominated by the high-strike tail.
Architecture is a 4-layer residual MLP with SiLU activations, Huber loss, AdamW + ReduceLROnPlateau.
import argparse
import json
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
RAW_COLS = [
"initialStockPrice", "strikePrice", "volatility", "interestRate", "totalTime",
"dividend_amt_1", "dividend_time_1",
"dividend_amt_2", "dividend_time_2",
]
ENGINEERED_FEATS = [
"moneyness", # S / K
"intrinsic_ratio", # max(S - K, 0) / K
"volatility",
"interestRate",
"totalTime",
"div_yield_1", # D1 / S
"div_time_ratio_1", # t1 / T
"div_yield_2",
"div_time_ratio_2",
]
TARGET_COL = "optionPrice"
class MLPRegressor(nn.Module):
def __init__(self, in_dim: int):
super().__init__()
self.input_layer = nn.Linear(in_dim, 128)
self.layer1 = nn.Linear(128, 128)
self.layer2 = nn.Linear(128, 128)
self.layer3 = nn.Linear(128, 64)
self.output_layer = nn.Linear(64, 1)
self.activation = nn.SiLU()
def forward(self, x):
x = self.activation(self.input_layer(x))
identity = x
x = self.activation(self.layer1(x))
x = self.layer2(x)
x = self.activation(x + identity) # residual
x = self.activation(self.layer3(x))
return self.output_layer(x)
def engineer_features(df):
df["moneyness"] = df["initialStockPrice"] / df["strikePrice"]
df["intrinsic_ratio"] = np.maximum(0, df["initialStockPrice"] - df["strikePrice"]) / df["strikePrice"]
df["div_yield_1"] = df["dividend_amt_1"] / df["initialStockPrice"]
df["div_yield_2"] = df["dividend_amt_2"] / df["initialStockPrice"]
df["div_time_ratio_1"] = df["dividend_time_1"] / df["totalTime"]
df["div_time_ratio_2"] = df["dividend_time_2"] / df["totalTime"]
df[["div_time_ratio_1", "div_time_ratio_2"]] = df[
["div_time_ratio_1", "div_time_ratio_2"]
].fillna(0)
return df[ENGINEERED_FEATS].astype(np.float32).to_numpy()
def fit_standardizer(X):
mean = X.mean(axis=0)
std = X.std(axis=0)
std = np.where(std < 1e-12, 1.0, std)
return mean, std
def standardize(X, mean, std):
return (X - mean) / std
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--csv", type=str, default="options.csv")
parser.add_argument("--epochs", type=int, default=50)
parser.add_argument("--batch_size", type=int, default=256)
parser.add_argument("--lr", type=float, default=1e-3)
parser.add_argument("--val_frac", type=float, default=0.2)
parser.add_argument("--seed", type=int, default=0)
parser.add_argument("--out_dir", type=str, default="artifacts")
args = parser.parse_args()
torch.manual_seed(args.seed)
np.random.seed(args.seed)
os.makedirs(args.out_dir, exist_ok=True)
df = pd.read_csv(args.csv)
X = engineer_features(df)
y = (df[TARGET_COL] / df["strikePrice"]).astype(np.float32).to_numpy().reshape(-1, 1)
N = len(df)
n_train = int((1 - args.val_frac) * N)
X_train_raw, y_train = X[:n_train], y[:n_train]
X_val_raw, y_val = X[n_train:], y[n_train:]
x_mean, x_std = fit_standardizer(X_train_raw)
X_train = standardize(X_train_raw, x_mean, x_std)
X_val = standardize(X_val_raw, x_mean, x_std)
train_loader = DataLoader(
TensorDataset(torch.from_numpy(X_train), torch.from_numpy(y_train)),
batch_size=args.batch_size, shuffle=True,
)
val_loader = DataLoader(
TensorDataset(torch.from_numpy(X_val), torch.from_numpy(y_val)),
batch_size=args.batch_size, shuffle=False,
)
model = MLPRegressor(in_dim=9)
criterion = nn.HuberLoss(delta=1.0)
optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min", patience=5, factor=0.5)
def run_epoch(loader, train: bool):
model.train(train)
total, count = 0.0, 0
for xb, yb in loader:
if train:
optimizer.zero_grad()
preds = model(xb)
loss = criterion(preds, yb)
loss.backward()
optimizer.step()
else:
with torch.no_grad():
preds = model(xb)
loss = criterion(preds, yb)
total += loss.item() * xb.size(0)
count += xb.size(0)
return total / count
best_val = float("inf")
for epoch in range(1, args.epochs + 1):
tr = run_epoch(train_loader, train=True)
va = run_epoch(val_loader, train=False)
scheduler.step(va)
print(f"epoch {epoch:03d} | train loss {tr:.8f} | val loss {va:.8f}")
if va < best_val:
best_val = va
torch.save(model.state_dict(), os.path.join(args.out_dir, "model.pt"))
stats = {
"raw_cols": RAW_COLS,
"engineered_feats": ENGINEERED_FEATS,
"x_mean": x_mean.tolist(),
"x_std": x_std.tolist(),
"note": "Model predicts (Price / Strike). Multiply by Strike for $ value.",
}
with open(os.path.join(args.out_dir, "metadata.json"), "w") as f:
json.dump(stats, f, indent=2)
if __name__ == "__main__":
main()