""" PIT-compliant backtest: mask prices to historical S&P 500 membership. Compares: 1. BIASED: current S&P 500 constituents applied back to 2016 (what we had before) 2. PIT: historical membership mask — each date only sees stocks that were actually S&P 500 members on that date This isolates the survivorship bias in our previous results. """ from __future__ import annotations import os, sys import numpy as np import pandas as pd sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from strategies.ensemble_alpha import SharpeBoostedEnsembleStrategy import universe_history as uh from research.pit_backtest import load_pit_prices, pit_universe def compute_metrics(daily_rets: pd.Series) -> dict: eq = (1 + daily_rets).cumprod() n_years = len(daily_rets) / 252.0 cagr = eq.iloc[-1] ** (1.0 / n_years) - 1.0 vol = daily_rets.std() * np.sqrt(252) sharpe = daily_rets.mean() / daily_rets.std() * np.sqrt(252) if daily_rets.std() > 0 else 0 running_max = eq.cummax() dd = eq / running_max - 1 max_dd = dd.min() calmar = cagr / abs(max_dd) if max_dd != 0 else 0 return {"cagr": cagr, "vol": vol, "sharpe": sharpe, "max_dd": max_dd, "calmar": calmar} def yearly_returns(daily_rets: pd.Series) -> pd.Series: eq = (1 + daily_rets).cumprod() yearly = eq.resample("YE").last().pct_change() yearly.iloc[0] = eq.resample("YE").last().iloc[0] - 1 yearly.index = yearly.index.year return yearly def run_strategy(data: pd.DataFrame, start="2016-10-01", end="2026-05-13"): """Run SharpeBoostedEnsembleStrategy on given price data.""" strat = SharpeBoostedEnsembleStrategy() weights = strat.generate_signals(data) daily_rets = (weights * data.pct_change().fillna(0.0)).sum(axis=1) return daily_rets.loc[start:end] def main(): print("=" * 90) print("SURVIVORSHIP BIAS TEST: PIT Membership vs Current Constituents") print("=" * 90) # --- Load PIT prices (includes delisted stocks) --- print("\n--- Loading PIT price data ---") pit_prices_raw = load_pit_prices() print(f" Raw PIT prices: {pit_prices_raw.shape}") # --- Apply PIT membership mask --- print("\n--- Applying PIT membership mask ---") intervals = uh.load_sp500_history() pit_prices = pit_universe(pit_prices_raw) print(f" PIT-masked prices: {pit_prices.shape}") # Show how many stocks are available at various dates for d in ["2016-12-30", "2018-12-31", "2020-12-31", "2022-12-30", "2024-12-31"]: if d in pit_prices.index.strftime("%Y-%m-%d").tolist(): n_avail = pit_prices.loc[d].notna().sum() print(f" {d}: {n_avail} stocks available") else: # Find nearest date idx = pit_prices.index.get_indexer([pd.Timestamp(d)], method="nearest") actual = pit_prices.index[idx[0]] n_avail = pit_prices.loc[actual].notna().sum() print(f" {actual.strftime('%Y-%m-%d')}: {n_avail} stocks available") # --- Create biased version: use all stocks in us_pit (no mask) --- # This simulates "using today's S&P 500 back in 2016" biased_prices = pit_prices_raw.copy() print(f"\n Biased (no mask) prices: {biased_prices.shape}") # --- Run strategy on both --- # Use start=2016-10-01 because PIT data starts 2016-04-19 and we need # 252 days of warmup start = "2017-06-01" # ~252 trading days after 2016-04-19 end = "2026-05-13" print(f"\n--- Running strategy ({start} to {end}) ---") print(" Running on PIT-masked data...") pit_rets = run_strategy(pit_prices, start=start, end=end) pit_m = compute_metrics(pit_rets) print(" Running on biased data (no mask)...") biased_rets = run_strategy(biased_prices, start=start, end=end) biased_m = compute_metrics(biased_rets) # --- Also compare with SPY --- spy_rets = pit_prices_raw["SPY"].pct_change().fillna(0.0).loc[start:end] spy_m = compute_metrics(spy_rets) # --- Results --- print(f"\n{'=' * 90}") print("RESULTS COMPARISON") print(f"{'=' * 90}") print(f"{'Metric':<12s} {'PIT (correct)':>16s} {'Biased (no mask)':>18s} {'SPY':>12s}") print("-" * 60) for metric, fmt in [("cagr", "{:.1f}%"), ("vol", "{:.1f}%"), ("sharpe", "{:.2f}"), ("max_dd", "{:.1f}%"), ("calmar", "{:.2f}")]: scale = 100 if "%" in fmt else 1 pit_val = pit_m[metric] * scale biased_val = biased_m[metric] * scale spy_val = spy_m[metric] * scale print(f" {metric:<12s} {fmt.format(pit_val):>16s} {fmt.format(biased_val):>18s} {fmt.format(spy_val):>12s}") # --- Yearly comparison --- print(f"\n{'=' * 90}") print("YEARLY RETURNS") print(f"{'=' * 90}") pit_yr = yearly_returns(pit_rets) biased_yr = yearly_returns(biased_rets) spy_yr = yearly_returns(spy_rets) print(f" {'Year':>4s} {'PIT':>10s} {'Biased':>10s} {'Delta':>10s} {'SPY':>10s}") print(f" {'-'*50}") for year in sorted(set(pit_yr.index) | set(biased_yr.index)): p = pit_yr.get(year, float("nan")) b = biased_yr.get(year, float("nan")) s = spy_yr.get(year, float("nan")) delta = p - b if not (np.isnan(p) or np.isnan(b)) else float("nan") print(f" {year:>4d} {p*100:>+9.1f}% {b*100:>+9.1f}% {delta*100:>+9.1f}pp {s*100:>+9.1f}%") # --- Analyze which stocks are affected --- print(f"\n{'=' * 90}") print("SURVIVORSHIP BIAS ANALYSIS") print(f"{'=' * 90}") # Find stocks that are NOT in current S&P 500 but WERE members historically from universe import get_sp500 current_sp500 = set(get_sp500()) # Stocks removed from S&P 500 during our backtest period (2016-2026) removed_during = [] added_during = [] for ticker, ivs in intervals.items(): for start_d, end_d in ivs: if end_d and "2016" <= end_d <= "2026": removed_during.append((ticker, end_d)) if start_d and "2016" <= start_d <= "2026": added_during.append((ticker, start_d)) removed_during.sort(key=lambda x: x[1]) added_during.sort(key=lambda x: x[1]) print(f"\n Stocks REMOVED from S&P 500 during 2016-2026: {len(removed_during)}") print(f" Stocks ADDED to S&P 500 during 2016-2026: {len(added_during)}") print(f"\n Most impactful removals (stocks that biased backtest would wrongly exclude):") # Check which removed stocks had price data and what happened to them removed_with_prices = [] for ticker, remove_date in removed_during: if ticker in pit_prices_raw.columns: # What was their return from when they were removed? try: remove_ts = pd.Timestamp(remove_date) pre = pit_prices_raw.loc[:remove_ts, ticker].dropna() if len(pre) > 63: # Get 3-month return before removal ret_3m = pre.iloc[-1] / pre.iloc[-63] - 1 if len(pre) > 63 else np.nan removed_with_prices.append((ticker, remove_date, ret_3m)) except Exception: pass removed_with_prices.sort(key=lambda x: x[2] if not np.isnan(x[2]) else 0) print(f" {'Ticker':<8s} {'Removed':>12s} {'3m ret before':>14s} {'Impact'}") for ticker, rd, ret in removed_with_prices[:15]: impact = "Would have been selected (recovery signal)" if ret < -0.20 else "Neutral" print(f" {ticker:<8s} {rd:>12s} {ret*100:>+13.1f}% {impact}") print(f"\n Notable ADDITIONS (stocks biased backtest wrongly includes early):") # Key stocks that were added during our period notable_adds = [(t, d) for t, d in added_during if t in ["TSLA", "MRNA", "CVNA", "PLTR", "APP", "SMCI", "AXON", "SATS"]] for ticker, add_date in notable_adds: print(f" {ticker:<8s} added {add_date} — biased backtest selects it BEFORE this date!") # --- Check: did we select any non-member stocks in PIT backtest? --- print(f"\n{'=' * 90}") print("PIT AUDIT: Verify no look-ahead in PIT backtest") print(f"{'=' * 90}") strat = SharpeBoostedEnsembleStrategy() pit_weights = strat.generate_signals(pit_prices) # For each date, check that all non-zero weight stocks are S&P 500 members mask = uh.membership_mask(pit_prices.index, intervals, list(pit_prices.columns)) violations = 0 for date in pit_weights.index: active = pit_weights.loc[date] active_tickers = active[active > 0.001].index.tolist() for t in active_tickers: if t in mask.columns and not mask.loc[date, t]: violations += 1 if violations <= 5: print(f" VIOLATION: {t} selected on {date.strftime('%Y-%m-%d')} but NOT a member!") if violations == 0: print(" NO VIOLATIONS: All selected stocks were S&P 500 members on their selection date.") else: print(f" Total violations: {violations}") # --- Bootstrap on PIT returns --- print(f"\n{'=' * 90}") print("BOOTSTRAP: PIT-corrected returns") print(f"{'=' * 90}") from research.trend_rider_p0 import block_bootstrap boot = block_bootstrap(pit_rets, n_boot=5000, block_len=42) print(f" Sharpe: median={boot['sharpe'].median():.2f} " f"5th={boot['sharpe'].quantile(0.05):.2f} " f"95th={boot['sharpe'].quantile(0.95):.2f}") print(f" CAGR: median={boot['cagr'].median()*100:.1f}% " f"5th={boot['cagr'].quantile(0.05)*100:.1f}% " f"95th={boot['cagr'].quantile(0.95)*100:.1f}%") print(f" MaxDD: median={boot['max_drawdown'].median()*100:.1f}% " f"5th={boot['max_drawdown'].quantile(0.05)*100:.1f}% " f"95th={boot['max_drawdown'].quantile(0.95)*100:.1f}%") print(f" P(Sharpe > 1.5): {(boot['sharpe'] > 1.5).mean()*100:.1f}%") print(f" P(Sharpe > 1.0): {(boot['sharpe'] > 1.0).mean()*100:.1f}%") if __name__ == "__main__": main()