Add point-in-time S&P 500 backtest to expose survivorship bias

The existing framework fetches today's S&P 500 constituents from Wikipedia
and applies that list to the entire 10-year price history — classic
survivorship bias. Stocks that went bankrupt or were removed for poor
performance are absent, while today's winners (which may have been minor
names 10 years ago) are implicitly selected. This materially inflates
reported strategy returns.

New pipeline:
  - universe_history.py reconstructs per-ticker membership intervals by
    walking Wikipedia's "Selected changes" table backward from today.
  - research/fetch_historical.py downloads prices for all 848 tickers
    that were ever members (Yahoo returns ~675 of them; ~170 fully
    delisted names are unavailable — remaining partial bias).
  - research/pit_backtest.py masks prices to NaN outside membership
    windows so strategies naturally cannot select non-members.
  - research/strategies_plus.py adds RecoveryMomentumPlus (generalized
    Recovery+Momentum with configurable weighting / blend / regime hook)
    and an EnsembleStrategy.
  - research/optimize.py runs five experiments: bias drift, hyperparameter
    sweep (2016-2022 train / 2023-2026 test), SPY MA regime filter,
    weighting schemes, and an uncorrelated-config ensemble.

Headline finding: the biased backtest reports 40.9% CAGR for
recovery_mom_top10 over 2016-2026; the point-in-time version reports
22.4% (vs 14.0% SPY buy-and-hold). True edge is ~8pp CAGR, not ~27pp.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-04-17 16:26:02 +08:00
parent 2015b62104
commit 5e1c4a681d
7 changed files with 910 additions and 0 deletions

150
research/strategies_plus.py Normal file
View File

@@ -0,0 +1,150 @@
"""
Optimization variants of RecoveryMomentumStrategy.
Four dimensions explored:
1. Hyperparameters (top_n, recovery_window, mom_lookback, rebal_freq, weights)
2. Regime filter: zero-out weights when SPY < MA200
3. Weighting scheme: equal / inverse-vol / rank-weighted
4. Ensemble: weighted blend of multiple strategies
All strategies follow the same Strategy protocol (generate_signals → weights DF).
"""
import numpy as np
import pandas as pd
from strategies.base import Strategy
# ---------------------------------------------------------------------------
# Generalized Recovery+Momentum strategy
# ---------------------------------------------------------------------------
class RecoveryMomentumPlus(Strategy):
"""
Recovery + momentum composite with configurable blend, weighting, and
regime filter hooks.
Parameters
----------
recovery_window : int
Lookback for the recovery factor (price / rolling min - 1).
mom_lookback : int
Long-horizon momentum window total length.
mom_skip : int
Short-term reversal skip for momentum.
rebal_freq : int
Trading-day rebalance interval.
top_n : int
Number of stocks selected each rebalance.
rec_weight : float in [0, 1]
Weight of recovery factor in composite rank blend (mom_weight = 1 - rec_weight).
weighting : {"equal", "inv_vol", "rank"}
Portfolio weighting scheme for the selected top_n.
vol_window : int
Volatility lookback when weighting="inv_vol".
"""
def __init__(self,
recovery_window: int = 63,
mom_lookback: int = 252,
mom_skip: int = 21,
rebal_freq: int = 21,
top_n: int = 10,
rec_weight: float = 0.5,
weighting: str = "equal",
vol_window: int = 60):
if weighting not in ("equal", "inv_vol", "rank"):
raise ValueError(f"weighting must be equal|inv_vol|rank, got {weighting!r}")
self.recovery_window = recovery_window
self.mom_lookback = mom_lookback
self.mom_skip = mom_skip
self.rebal_freq = rebal_freq
self.top_n = top_n
self.rec_weight = rec_weight
self.weighting = weighting
self.vol_window = vol_window
def generate_signals(self, data: pd.DataFrame) -> pd.DataFrame:
# Factors
recovery = data / data.rolling(self.recovery_window).min() - 1
momentum = data.shift(self.mom_skip).pct_change(self.mom_lookback - self.mom_skip)
rec_rank = recovery.rank(axis=1, pct=True, na_option="keep")
mom_rank = momentum.rank(axis=1, pct=True, na_option="keep")
composite = self.rec_weight * rec_rank + (1 - self.rec_weight) * mom_rank
# Top-N selection
rank = composite.rank(axis=1, ascending=False, na_option="bottom")
n_valid = composite.notna().sum(axis=1)
enough = n_valid >= self.top_n
top_mask = (rank <= self.top_n) & enough.values.reshape(-1, 1)
# Weighting within top-N
if self.weighting == "equal":
raw = top_mask.astype(float)
elif self.weighting == "rank":
# Higher composite → higher weight within top-N
ranked_score = composite.where(top_mask, 0.0)
raw = ranked_score
elif self.weighting == "inv_vol":
# Use inverse realized-volatility as weights within top-N
rets = data.pct_change()
vol = rets.rolling(self.vol_window).std()
inv_vol = 1.0 / vol.replace(0, np.nan)
raw = inv_vol.where(top_mask, 0.0).fillna(0.0)
row_sums = raw.sum(axis=1).replace(0, np.nan)
signals = raw.div(row_sums, axis=0).fillna(0.0)
# Rebalance
warmup = max(self.mom_lookback, self.recovery_window, self.vol_window)
rebal_mask = pd.Series(False, index=data.index)
rebal_indices = list(range(warmup, len(data), self.rebal_freq))
rebal_mask.iloc[rebal_indices] = True
signals[~rebal_mask] = np.nan
signals = signals.ffill().fillna(0.0)
signals.iloc[:warmup] = 0.0
return signals.shift(1).fillna(0.0)
# ---------------------------------------------------------------------------
# Ensemble
# ---------------------------------------------------------------------------
class EnsembleStrategy(Strategy):
"""
Weighted blend of several sub-strategies. Each sub-strategy produces a
weight matrix; we linearly combine them. The result still sums to (at
most) 1 per row since each sub-strategy does.
"""
def __init__(self, components: list[tuple[Strategy, float]]):
total = sum(w for _, w in components)
self.components = [(s, w / total) for s, w in components]
def generate_signals(self, data: pd.DataFrame) -> pd.DataFrame:
out = None
for strat, w in self.components:
sig = strat.generate_signals(data).mul(w)
if out is None:
out = sig
else:
# Align columns (should be identical since same data passed)
out = out.add(sig, fill_value=0.0)
return out
# ---------------------------------------------------------------------------
# Regime filter helper
# ---------------------------------------------------------------------------
def spy_ma200_filter(spy: pd.Series, ma_window: int = 200) -> pd.Series:
"""
Boolean Series: True when SPY close > SPY MA(ma_window), shifted by 1 to
avoid lookahead. Use as `regime_filter=...` in pit_backtest.backtest().
"""
ma = spy.rolling(ma_window, min_periods=ma_window).mean()
signal = (spy > ma).fillna(False)
return signal.shift(1).fillna(False)