Files
quant/research/strategy_sweep.py

146 lines
6.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Unified 3/5/10-year PIT backtest for every production strategy.
Runs the full strategy roster against the point-in-time S&P 500 price matrix
from research/pit_backtest and reports CAGR / Sharpe / Sortino / MaxDD / Calmar
for three trailing windows. Results are written to data/sweep_<years>y.csv and
printed to stdout.
Usage:
uv run python -m research.strategy_sweep
"""
import os
import pandas as pd
import research.pit_backtest as pit
from strategies.adaptive_momentum import AdaptiveMomentumStrategy
from strategies.dual_momentum import DualMomentumStrategy
from strategies.factor_combo import SIGNAL_REGISTRY, FactorComboStrategy
from strategies.inverse_vol import InverseVolatilityStrategy
from strategies.mean_reversion import MeanReversionStrategy
from strategies.momentum import MomentumStrategy
from strategies.momentum_quality import MomentumQualityStrategy
from strategies.multi_factor import MultiFactorStrategy
from strategies.recovery_momentum import RecoveryMomentumStrategy
from strategies.trend_following import TrendFollowingStrategy
DATA_DIR = "data"
BENCHMARK = "SPY"
def build_strategies(tickers: list[str]) -> dict:
"""Instantiate every production strategy; returns {name: strategy}."""
top_n = max(5, len(tickers) // 10)
strategies: dict = {
# --- Baselines ---
"SPY buy-and-hold": None, # handled separately
"Momentum": MomentumStrategy(lookback=252, skip=21, top_n=top_n),
"Inverse Volatility": InverseVolatilityStrategy(vol_window=20),
"Multi-Factor": MultiFactorStrategy(tickers=tickers, benchmark=BENCHMARK,
top_n=top_n),
"Mean Reversion": MeanReversionStrategy(top_n=top_n),
"Trend Following": TrendFollowingStrategy(ma_window=150, momentum_period=126,
top_n=top_n),
"Dual Momentum": DualMomentumStrategy(top_n=top_n),
"Momentum+Quality": MomentumQualityStrategy(momentum_period=252, skip=21,
top_n=top_n),
"Mom+InvVol": AdaptiveMomentumStrategy(top_n=top_n),
"Recovery+Mom Top20": RecoveryMomentumStrategy(top_n=min(20, top_n)),
"Recovery+Mom Top10": RecoveryMomentumStrategy(top_n=10),
}
# Factor-combo (monthly rebalance; biweekly is the other interesting one,
# but monthly aligns with how the RecoveryMomentum defaults are set).
for name in SIGNAL_REGISTRY:
key = f"fc_{name.replace('+', '_').replace('×', 'x')}_monthly"
strategies[key] = FactorComboStrategy(name, rebal_freq=21, top_n=10)
return strategies
def slice_years(prices: pd.DataFrame, years: int) -> pd.DataFrame:
cutoff = prices.index[-1] - pd.DateOffset(years=years)
return prices[prices.index >= cutoff]
def run_one(name: str, strat, prices: pd.DataFrame,
tickers: list[str]) -> dict:
if strat is None:
# SPY buy-and-hold
spy = prices[BENCHMARK].dropna()
eq = (spy / spy.iloc[0]) * 10_000
return {"strategy": name, **{k: v for k, v in pit.summarize(eq, name=name).items()
if k != "name"}}
# MultiFactor needs the benchmark column → pass full `prices`; others only tickers.
if isinstance(strat, MultiFactorStrategy):
strat_prices = prices # keep SPY column
else:
strat_prices = prices[tickers]
eq = pit.backtest(strategy=strat, prices=strat_prices, initial_capital=10_000,
transaction_cost=0.001)
return {"strategy": name, **{k: v for k, v in pit.summarize(eq, name=name).items()
if k != "name"}}
def fmt(row: dict) -> str:
return (f" {row['strategy']:<44s} "
f"CAGR={row['CAGR']*100:>6.1f}% "
f"Sharpe={row['Sharpe']:>5.2f} "
f"Sortino={row['Sortino']:>5.2f} "
f"MaxDD={row['MaxDD']*100:>6.1f}% "
f"Calmar={row['Calmar']:>5.2f}")
def main() -> None:
print("Loading point-in-time price data…")
raw = pit.load_pit_prices()
masked = pit.pit_universe(raw)
# Preserve SPY even though it's not in the membership intervals.
if BENCHMARK in raw.columns:
masked[BENCHMARK] = raw[BENCHMARK]
tickers = [c for c in masked.columns if c != BENCHMARK]
print(f" tickers={len(tickers)} rows={len(masked)} "
f"range={masked.index[0].date()}{masked.index[-1].date()}")
all_results: dict[int, pd.DataFrame] = {}
for years in (10, 5, 3):
sliced = slice_years(masked, years)
strategies = build_strategies(tickers)
print("\n" + "=" * 110)
print(f"Window = last {years} years ({sliced.index[0].date()}{sliced.index[-1].date()})")
print("=" * 110)
rows = []
for name, strat in strategies.items():
try:
rows.append(run_one(name, strat, sliced, tickers))
except Exception as exc: # noqa: BLE001
print(f" [skip] {name}: {type(exc).__name__}: {exc}")
continue
df = pd.DataFrame(rows).sort_values("Sharpe", ascending=False)
for _, r in df.iterrows():
print(fmt(r))
out = os.path.join(DATA_DIR, f"sweep_{years}y.csv")
df.to_csv(out, index=False)
all_results[years] = df
print(f" → saved {out}")
# Cross-window comparison: only strategies present in all windows.
print("\n" + "=" * 110)
print("Cross-window CAGR comparison (sorted by 10y Sharpe)")
print("=" * 110)
pivot = pd.DataFrame({
f"CAGR_{y}y": all_results[y].set_index("strategy")["CAGR"]
for y in (10, 5, 3)
})
sharpe10 = all_results[10].set_index("strategy")["Sharpe"]
pivot["Sharpe_10y"] = sharpe10
pivot = pivot.sort_values("Sharpe_10y", ascending=False)
print(pivot.to_string(formatters={
"CAGR_10y": "{:.1%}".format, "CAGR_5y": "{:.1%}".format,
"CAGR_3y": "{:.1%}".format, "Sharpe_10y": "{:.2f}".format,
}))
if __name__ == "__main__":
main()