Add point-in-time S&P 500 backtest to expose survivorship bias

The existing framework fetches today's S&P 500 constituents from Wikipedia and applies that list to the entire 10-year price history — classic survivorship bias. Stocks that went bankrupt or were removed for poor performance are absent, while today's winners (which may have been minor names 10 years ago) are implicitly selected. This materially inflates reported strategy returns. New pipeline: - universe_history.py reconstructs per-ticker membership intervals by walking Wikipedia's "Selected changes" table backward from today. - research/fetch_historical.py downloads prices for all 848 tickers that were ever members (Yahoo returns ~675 of them; ~170 fully delisted names are unavailable — remaining partial bias). - research/pit_backtest.py masks prices to NaN outside membership windows so strategies naturally cannot select non-members. - research/strategies_plus.py adds RecoveryMomentumPlus (generalized Recovery+Momentum with configurable weighting / blend / regime hook) and an EnsembleStrategy. - research/optimize.py runs five experiments: bias drift, hyperparameter sweep (2016-2022 train / 2023-2026 test), SPY MA regime filter, weighting schemes, and an uncorrelated-config ensemble. Headline finding: the biased backtest reports 40.9% CAGR for recovery_mom_top10 over 2016-2026; the point-in-time version reports 22.4% (vs 14.0% SPY buy-and-hold). True edge is ~8pp CAGR, not ~27pp. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-17 16:26:02 +08:00
parent 2015b62104
commit 5e1c4a681d
7 changed files with 910 additions and 0 deletions
--- a/research/strategies_plus.py
+++ b/research/strategies_plus.py
@@ -0,0 +1,150 @@
+"""
+Optimization variants of RecoveryMomentumStrategy.
+
+Four dimensions explored:
+  1. Hyperparameters (top_n, recovery_window, mom_lookback, rebal_freq, weights)
+  2. Regime filter: zero-out weights when SPY < MA200
+  3. Weighting scheme: equal / inverse-vol / rank-weighted
+  4. Ensemble: weighted blend of multiple strategies
+
+All strategies follow the same Strategy protocol (generate_signals → weights DF).
+"""
+
+import numpy as np
+import pandas as pd
+
+from strategies.base import Strategy
+
+
+# ---------------------------------------------------------------------------
+# Generalized Recovery+Momentum strategy
+# ---------------------------------------------------------------------------
+
+class RecoveryMomentumPlus(Strategy):
+    """
+    Recovery + momentum composite with configurable blend, weighting, and
+    regime filter hooks.
+
+    Parameters
+    ----------
+    recovery_window : int
+        Lookback for the recovery factor (price / rolling min - 1).
+    mom_lookback : int
+        Long-horizon momentum window total length.
+    mom_skip : int
+        Short-term reversal skip for momentum.
+    rebal_freq : int
+        Trading-day rebalance interval.
+    top_n : int
+        Number of stocks selected each rebalance.
+    rec_weight : float in [0, 1]
+        Weight of recovery factor in composite rank blend (mom_weight = 1 - rec_weight).
+    weighting : {"equal", "inv_vol", "rank"}
+        Portfolio weighting scheme for the selected top_n.
+    vol_window : int
+        Volatility lookback when weighting="inv_vol".
+    """
+
+    def __init__(self,
+                 recovery_window: int = 63,
+                 mom_lookback: int = 252,
+                 mom_skip: int = 21,
+                 rebal_freq: int = 21,
+                 top_n: int = 10,
+                 rec_weight: float = 0.5,
+                 weighting: str = "equal",
+                 vol_window: int = 60):
+        if weighting not in ("equal", "inv_vol", "rank"):
+            raise ValueError(f"weighting must be equal|inv_vol|rank, got {weighting!r}")
+        self.recovery_window = recovery_window
+        self.mom_lookback = mom_lookback
+        self.mom_skip = mom_skip
+        self.rebal_freq = rebal_freq
+        self.top_n = top_n
+        self.rec_weight = rec_weight
+        self.weighting = weighting
+        self.vol_window = vol_window
+
+    def generate_signals(self, data: pd.DataFrame) -> pd.DataFrame:
+        # Factors
+        recovery = data / data.rolling(self.recovery_window).min() - 1
+        momentum = data.shift(self.mom_skip).pct_change(self.mom_lookback - self.mom_skip)
+
+        rec_rank = recovery.rank(axis=1, pct=True, na_option="keep")
+        mom_rank = momentum.rank(axis=1, pct=True, na_option="keep")
+        composite = self.rec_weight * rec_rank + (1 - self.rec_weight) * mom_rank
+
+        # Top-N selection
+        rank = composite.rank(axis=1, ascending=False, na_option="bottom")
+        n_valid = composite.notna().sum(axis=1)
+        enough = n_valid >= self.top_n
+        top_mask = (rank <= self.top_n) & enough.values.reshape(-1, 1)
+
+        # Weighting within top-N
+        if self.weighting == "equal":
+            raw = top_mask.astype(float)
+        elif self.weighting == "rank":
+            # Higher composite → higher weight within top-N
+            ranked_score = composite.where(top_mask, 0.0)
+            raw = ranked_score
+        elif self.weighting == "inv_vol":
+            # Use inverse realized-volatility as weights within top-N
+            rets = data.pct_change()
+            vol = rets.rolling(self.vol_window).std()
+            inv_vol = 1.0 / vol.replace(0, np.nan)
+            raw = inv_vol.where(top_mask, 0.0).fillna(0.0)
+
+        row_sums = raw.sum(axis=1).replace(0, np.nan)
+        signals = raw.div(row_sums, axis=0).fillna(0.0)
+
+        # Rebalance
+        warmup = max(self.mom_lookback, self.recovery_window, self.vol_window)
+        rebal_mask = pd.Series(False, index=data.index)
+        rebal_indices = list(range(warmup, len(data), self.rebal_freq))
+        rebal_mask.iloc[rebal_indices] = True
+        signals[~rebal_mask] = np.nan
+        signals = signals.ffill().fillna(0.0)
+        signals.iloc[:warmup] = 0.0
+
+        return signals.shift(1).fillna(0.0)
+
+
+# ---------------------------------------------------------------------------
+# Ensemble
+# ---------------------------------------------------------------------------
+
+class EnsembleStrategy(Strategy):
+    """
+    Weighted blend of several sub-strategies. Each sub-strategy produces a
+    weight matrix; we linearly combine them. The result still sums to (at
+    most) 1 per row since each sub-strategy does.
+    """
+
+    def __init__(self, components: list[tuple[Strategy, float]]):
+        total = sum(w for _, w in components)
+        self.components = [(s, w / total) for s, w in components]
+
+    def generate_signals(self, data: pd.DataFrame) -> pd.DataFrame:
+        out = None
+        for strat, w in self.components:
+            sig = strat.generate_signals(data).mul(w)
+            if out is None:
+                out = sig
+            else:
+                # Align columns (should be identical since same data passed)
+                out = out.add(sig, fill_value=0.0)
+        return out
+
+
+# ---------------------------------------------------------------------------
+# Regime filter helper
+# ---------------------------------------------------------------------------
+
+def spy_ma200_filter(spy: pd.Series, ma_window: int = 200) -> pd.Series:
+    """
+    Boolean Series: True when SPY close > SPY MA(ma_window), shifted by 1 to
+    avoid lookahead. Use as `regime_filter=...` in pit_backtest.backtest().
+    """
+    ma = spy.rolling(ma_window, min_periods=ma_window).mean()
+    signal = (spy > ma).fillna(False)
+    return signal.shift(1).fillna(False)