Files
quant/main.py
Gahow Wang 149a00c458 chore: backtest engine fee model, metrics, and strategy fixes
- main.py: add IBKR-style tiered fee schedule (fee_base + fee_per_share),
  PIT universe support, and open-to-close execution improvements
- metrics.py: add raw_summary helper for JSON-safe metric export
- Misc strategy fixes: deprecation warnings, NaN handling

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-21 20:57:56 +08:00

319 lines
13 KiB
Python

import argparse
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import data_manager
import factor_attribution
import metrics
import universe_history as uh
from strategies.adaptive_momentum import AdaptiveMomentumStrategy
from strategies.buy_and_hold import BuyAndHoldStrategy
from strategies.dual_momentum import DualMomentumStrategy
from strategies.inverse_vol import InverseVolatilityStrategy
from strategies.mean_reversion import MeanReversionStrategy
from strategies.momentum import MomentumStrategy
from strategies.momentum_quality import MomentumQualityStrategy
from strategies.multi_factor import MultiFactorStrategy
from strategies.recovery_momentum import RecoveryMomentumStrategy
from strategies.trend_following import TrendFollowingStrategy
from universe import UNIVERSES
# ---------------------------------------------------------------------------
# Backtest engine
# ---------------------------------------------------------------------------
def backtest(
strategy,
data: pd.DataFrame,
initial_capital: float = 100_000,
transaction_cost: float = 0.001,
fixed_fee: float = 0.0,
fee_base: float = 0.0,
fee_per_share: float = 0.0,
open_data: pd.DataFrame | None = None,
) -> pd.Series:
"""
Vectorized backtest.
Parameters
----------
strategy : Strategy
Any class implementing generate_signals(data) → DataFrame of weights.
data : pd.DataFrame
Adjusted close prices, one column per asset.
initial_capital : float
Starting portfolio value.
transaction_cost : float
One-way cost per unit of turnover (e.g. 0.001 = 10 bps).
fixed_fee : float
Floor of the per-trade fee (e.g. 2.0 = $2 minimum per buy/sell).
With fee_per_share=0 (default), this is also the actual per-trade fee.
fee_base : float
Fixed component of a per-share tiered fee schedule. The actual
per-trade fee is ``max(fixed_fee, fee_base + fee_per_share * shares)``.
fee_per_share : float
Per-share variable component of the tiered fee (e.g. 0.009 = $0.009/share).
With fee_base=1.88 + fee_per_share=0.009 + fixed_fee=2.0 you get an
IBKR-style schedule: max(2, 1.88 + 0.009 * shares).
open_data : pd.DataFrame, optional
Open prices. When provided, enables open-to-close execution mode.
Returns
-------
pd.Series
Daily equity curve.
"""
if open_data is not None:
# Open-to-close mode:
# Strategy's shift(1) on open prices gives: weights[t] = f(open_{t-1})
# But open_t is known at morning of day t, so undo shift to get f(open_t)
# Then execute at close_t, earning close_t → close_{t+1}
weights = strategy.generate_signals(open_data)
weights = weights.shift(-1).fillna(0.0)
weights = weights.reindex(data.index).fillna(0.0)
positions = weights
# Returns earned: close[t] → close[t+1], weighted by positions decided at open[t]
close_returns = data.pct_change().fillna(0.0)
portfolio_returns = (close_returns * positions.shift(1).fillna(0.0)).sum(axis=1)
else:
# Classic close-to-close mode
weights = strategy.generate_signals(data)
weights = weights.reindex(data.index).fillna(0.0)
positions = weights
daily_returns = data.pct_change().fillna(0.0)
portfolio_returns = (daily_returns * positions).sum(axis=1)
# Turnover cost: sum of absolute weight changes each day
turnover = positions.diff().abs().sum(axis=1).fillna(0.0)
portfolio_returns -= turnover * transaction_cost
# Per-trade fee. Supports both flat ($2/trade) and tiered (IBKR-style)
# schedules: fee = max(fixed_fee, fee_base + fee_per_share * shares).
if fixed_fee > 0 or fee_base > 0 or fee_per_share > 0:
weight_changes = positions.diff().fillna(0.0)
equity_running = (1 + portfolio_returns).cumprod() * initial_capital
eq_prev = equity_running.shift(1).fillna(initial_capital)
if fee_per_share > 0:
# Convert per-ticker weight change into share count traded.
# dollar_traded[i, t] = |w[i,t] - w[i,t-1]| * equity[t-1]
# shares_traded[i, t] = dollar_traded / price[i, t]
dollar_traded = weight_changes.abs().mul(eq_prev, axis=0)
shares_traded = dollar_traded.div(data).replace(
[np.inf, -np.inf], 0.0,
).fillna(0.0)
per_trade_fee = (fee_base + fee_per_share * shares_traded).clip(
lower=fixed_fee,
)
trade_mask = weight_changes.abs() > 1e-8
per_trade_fee = per_trade_fee.where(trade_mask, 0.0)
daily_fee = per_trade_fee.sum(axis=1)
else:
n_trades = (weight_changes.abs() > 1e-8).sum(axis=1)
daily_fee = n_trades * fixed_fee
fee_impact = daily_fee / eq_prev
portfolio_returns -= fee_impact
equity = (1 + portfolio_returns).cumprod() * initial_capital
return equity
# ---------------------------------------------------------------------------
# Visualization
# ---------------------------------------------------------------------------
def plot_results(results: pd.DataFrame) -> None:
"""Two-panel chart: equity curves (top) + drawdowns (bottom)."""
# Compute drawdowns
drawdowns = results.div(results.cummax()) - 1
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 9), sharex=True,
gridspec_kw={"height_ratios": [3, 1]})
for col in results.columns:
ax1.plot(results.index, results[col], label=col, linewidth=1.5)
ax1.set_title("Strategy Comparison — Equity Curves", fontsize=14)
ax1.set_ylabel("Portfolio Value ($)")
ax1.legend(loc="upper left")
ax1.grid(True, alpha=0.3)
for col in drawdowns.columns:
ax2.plot(drawdowns.index, drawdowns[col] * 100, label=col, linewidth=1.0)
ax2.set_title("Drawdowns")
ax2.set_ylabel("Drawdown (%)")
ax2.set_xlabel("Date")
ax2.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main() -> None:
parser = argparse.ArgumentParser(description="Run strategy backtest")
parser.add_argument(
"--market", choices=UNIVERSES.keys(), default="us",
help="Market universe to backtest (default: us)",
)
parser.add_argument(
"--capital", type=float, default=None,
help="Initial capital (default: 10000)",
)
parser.add_argument(
"--top-n", type=int, default=None,
help="Number of stocks for selective strategies (default: ~10%% of universe)",
)
parser.add_argument(
"--years", type=int, default=None,
help="Limit backtest to last N years of data",
)
parser.add_argument(
"--no-plot", action="store_true",
help="Skip plotting charts",
)
parser.add_argument(
"--fixed-fee", type=float, default=0.0,
help="Fixed dollar cost per trade, e.g. 2.0 means $2 per buy or sell",
)
parser.add_argument(
"--execution", choices=["close", "open-close"], default="close",
help="Execution mode: 'close' (default, signal & execute on close) or "
"'open-close' (signal on morning open, execute at close)",
)
parser.add_argument(
"--attribution", action="store_true",
help="Run factor attribution after performance metrics",
)
parser.add_argument(
"--attribution-model", choices=["capm", "ff5", "ff5plus", "all"], default="all",
help="Factor model selection for attribution output",
)
parser.add_argument(
"--attribution-export", default=None,
help="Directory to export factor attribution CSVs",
)
args = parser.parse_args()
initial_capital = args.capital if args.capital is not None else 10_000
use_open = args.execution == "open-close"
universe = UNIVERSES[args.market]
tickers = universe["fetch"]()
benchmark = universe["benchmark"]
benchmark_label = universe["benchmark_label"]
# PIT universe: include all historical index members for US market
pit_intervals = None
if args.market == "us":
pit_intervals = uh.load_sp500_history()
historical_tickers = uh.all_tickers_ever(pit_intervals)
all_tickers = sorted(set(tickers + historical_tickers + [benchmark]))
print(f"--- PIT universe: {len(all_tickers)} tickers (current + historical members) ---")
else:
all_tickers = sorted(set(tickers + [benchmark]))
result = data_manager.update(args.market, all_tickers, with_open=use_open)
if use_open:
data, open_data = result
else:
data = result
open_data = None
if args.years:
cutoff = data.index[-1] - pd.DateOffset(years=args.years)
data = data[data.index >= cutoff]
if open_data is not None:
open_data = open_data[open_data.index >= cutoff]
print(f"--- Sliced to last {args.years} years: {data.index[0].date()} to {data.index[-1].date()} ---")
# Apply PIT mask: NaN out prices for non-member dates
if pit_intervals is not None:
print("--- Applying PIT membership mask (survivorship-bias fix) ---")
data = uh.mask_prices(data, pit_intervals)
if open_data is not None:
open_data = uh.mask_prices(open_data, pit_intervals)
# Filter tickers to only those with any valid data
if pit_intervals is not None:
tickers = [t for t in data.columns if t != benchmark and data[t].notna().any()]
else:
tickers = [t for t in tickers if t in data.columns]
print(f"--- Universe: {len(tickers)} stocks + {benchmark} benchmark ---")
top_n = args.top_n if args.top_n else max(5, len(tickers) // 10)
print(f"--- Selective strategies will pick top {top_n} stocks ---")
if args.fixed_fee > 0:
print(f"--- Fixed fee: ${args.fixed_fee:.2f} per trade ---")
if use_open:
print(f"--- Execution: open-close (signal on open, execute at close) ---")
# Build strategy tuples: (strategy, close_data, open_data_or_None)
open_tickers = open_data[tickers] if open_data is not None else None
open_all = open_data if open_data is not None else None
strategies = {
# --- Original strategies ---
"Buy & Hold (EW)": (BuyAndHoldStrategy(), data[tickers], open_tickers),
"Momentum": (MomentumStrategy(lookback=252, skip=21, top_n=top_n), data[tickers], open_tickers),
"Inverse Volatility": (InverseVolatilityStrategy(vol_window=20), data[tickers], open_tickers),
"Multi-Factor": (MultiFactorStrategy(tickers=tickers, benchmark=benchmark, top_n=top_n), data, open_all),
# --- New strategies ---
"Mean Reversion": (MeanReversionStrategy(top_n=top_n), data[tickers], open_tickers),
"Trend Following": (TrendFollowingStrategy(ma_window=150, momentum_period=126, top_n=top_n), data[tickers], open_tickers),
"Dual Momentum": (DualMomentumStrategy(top_n=top_n), data[tickers], open_tickers),
"Momentum+Quality": (MomentumQualityStrategy(momentum_period=252, skip=21, top_n=top_n), data[tickers], open_tickers),
"Mom+InvVol": (AdaptiveMomentumStrategy(top_n=top_n), data[tickers], open_tickers),
"Recovery+Mom Top20": (RecoveryMomentumStrategy(top_n=min(20, top_n)), data[tickers], open_tickers),
"Recovery+Mom Top10": (RecoveryMomentumStrategy(top_n=10), data[tickers], open_tickers),
}
results: dict[str, pd.Series] = {}
for name, (strategy, strat_data, strat_open) in strategies.items():
print(f"\nRunning: {name}")
results[name] = backtest(strategy, strat_data, initial_capital=initial_capital,
fixed_fee=args.fixed_fee, open_data=strat_open)
# Add benchmark (normalized to same initial capital)
bench = data[benchmark].dropna()
results[benchmark_label] = (bench / bench.iloc[0]) * initial_capital
results_df = pd.DataFrame(results)
# --- Performance metrics ---
for name, equity in results_df.items():
eq = equity.dropna()
if len(eq) < 2:
print(f"\n {name}: insufficient data, skipping")
continue
metrics.summary(eq, name=name)
if args.attribution:
summary_df, loadings_df = factor_attribution.attribute_strategies(
results_df=results_df,
benchmark_label=benchmark_label,
benchmark=benchmark,
price_data=data,
market=args.market,
model_selection=args.attribution_model,
)
factor_attribution.print_attribution_summary(summary_df)
if args.attribution_export:
factor_attribution.export_attribution(summary_df, loadings_df, args.attribution_export)
print(f"Attribution CSVs written to {args.attribution_export}")
# --- Visualization ---
if not args.no_plot:
plot_results(results_df.dropna())
if __name__ == "__main__":
main()