quant/research/pit_comparison.py

"""
PIT-compliant backtest: mask prices to historical S&P 500 membership.

Compares:
1. BIASED: current S&P 500 constituents applied back to 2016 (what we had before)
2. PIT: historical membership mask — each date only sees stocks that were
   actually S&P 500 members on that date

This isolates the survivorship bias in our previous results.
"""
from __future__ import annotations
import os, sys
import numpy as np
import pandas as pd

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from strategies.ensemble_alpha import SharpeBoostedEnsembleStrategy
import universe_history as uh
from research.pit_backtest import load_pit_prices, pit_universe


def compute_metrics(daily_rets: pd.Series) -> dict:
    eq = (1 + daily_rets).cumprod()
    n_years = len(daily_rets) / 252.0
    cagr = eq.iloc[-1] ** (1.0 / n_years) - 1.0
    vol = daily_rets.std() * np.sqrt(252)
    sharpe = daily_rets.mean() / daily_rets.std() * np.sqrt(252) if daily_rets.std() > 0 else 0
    running_max = eq.cummax()
    dd = eq / running_max - 1
    max_dd = dd.min()
    calmar = cagr / abs(max_dd) if max_dd != 0 else 0
    return {"cagr": cagr, "vol": vol, "sharpe": sharpe, "max_dd": max_dd, "calmar": calmar}


def yearly_returns(daily_rets: pd.Series) -> pd.Series:
    eq = (1 + daily_rets).cumprod()
    yearly = eq.resample("YE").last().pct_change()
    yearly.iloc[0] = eq.resample("YE").last().iloc[0] - 1
    yearly.index = yearly.index.year
    return yearly


def run_strategy(data: pd.DataFrame, start="2016-10-01", end="2026-05-13"):
    """Run SharpeBoostedEnsembleStrategy on given price data."""
    strat = SharpeBoostedEnsembleStrategy()
    weights = strat.generate_signals(data)
    daily_rets = (weights * data.pct_change().fillna(0.0)).sum(axis=1)
    return daily_rets.loc[start:end]


def main():
    print("=" * 90)
    print("SURVIVORSHIP BIAS TEST: PIT Membership vs Current Constituents")
    print("=" * 90)

    # --- Load PIT prices (includes delisted stocks) ---
    print("\n--- Loading PIT price data ---")
    pit_prices_raw = load_pit_prices()
    print(f"  Raw PIT prices: {pit_prices_raw.shape}")

    # --- Apply PIT membership mask ---
    print("\n--- Applying PIT membership mask ---")
    intervals = uh.load_sp500_history()
    pit_prices = pit_universe(pit_prices_raw)
    print(f"  PIT-masked prices: {pit_prices.shape}")

    # Show how many stocks are available at various dates
    for d in ["2016-12-30", "2018-12-31", "2020-12-31", "2022-12-30", "2024-12-31"]:
        if d in pit_prices.index.strftime("%Y-%m-%d").tolist():
            n_avail = pit_prices.loc[d].notna().sum()
            print(f"    {d}: {n_avail} stocks available")
        else:
            # Find nearest date
            idx = pit_prices.index.get_indexer([pd.Timestamp(d)], method="nearest")
            actual = pit_prices.index[idx[0]]
            n_avail = pit_prices.loc[actual].notna().sum()
            print(f"    {actual.strftime('%Y-%m-%d')}: {n_avail} stocks available")

    # --- Create biased version: use all stocks in us_pit (no mask) ---
    # This simulates "using today's S&P 500 back in 2016"
    biased_prices = pit_prices_raw.copy()
    print(f"\n  Biased (no mask) prices: {biased_prices.shape}")

    # --- Run strategy on both ---
    # Use start=2016-10-01 because PIT data starts 2016-04-19 and we need
    # 252 days of warmup
    start = "2017-06-01"  # ~252 trading days after 2016-04-19
    end = "2026-05-13"

    print(f"\n--- Running strategy ({start} to {end}) ---")
    print("  Running on PIT-masked data...")
    pit_rets = run_strategy(pit_prices, start=start, end=end)
    pit_m = compute_metrics(pit_rets)

    print("  Running on biased data (no mask)...")
    biased_rets = run_strategy(biased_prices, start=start, end=end)
    biased_m = compute_metrics(biased_rets)

    # --- Also compare with SPY ---
    spy_rets = pit_prices_raw["SPY"].pct_change().fillna(0.0).loc[start:end]
    spy_m = compute_metrics(spy_rets)

    # --- Results ---
    print(f"\n{'=' * 90}")
    print("RESULTS COMPARISON")
    print(f"{'=' * 90}")
    print(f"{'Metric':<12s} {'PIT (correct)':>16s} {'Biased (no mask)':>18s} {'SPY':>12s}")
    print("-" * 60)
    for metric, fmt in [("cagr", "{:.1f}%"), ("vol", "{:.1f}%"), ("sharpe", "{:.2f}"),
                        ("max_dd", "{:.1f}%"), ("calmar", "{:.2f}")]:
        scale = 100 if "%" in fmt else 1
        pit_val = pit_m[metric] * scale
        biased_val = biased_m[metric] * scale
        spy_val = spy_m[metric] * scale
        print(f"  {metric:<12s} {fmt.format(pit_val):>16s} {fmt.format(biased_val):>18s} {fmt.format(spy_val):>12s}")

    # --- Yearly comparison ---
    print(f"\n{'=' * 90}")
    print("YEARLY RETURNS")
    print(f"{'=' * 90}")
    pit_yr = yearly_returns(pit_rets)
    biased_yr = yearly_returns(biased_rets)
    spy_yr = yearly_returns(spy_rets)

    print(f"  {'Year':>4s}  {'PIT':>10s}  {'Biased':>10s}  {'Delta':>10s}  {'SPY':>10s}")
    print(f"  {'-'*50}")
    for year in sorted(set(pit_yr.index) | set(biased_yr.index)):
        p = pit_yr.get(year, float("nan"))
        b = biased_yr.get(year, float("nan"))
        s = spy_yr.get(year, float("nan"))
        delta = p - b if not (np.isnan(p) or np.isnan(b)) else float("nan")
        print(f"  {year:>4d}  {p*100:>+9.1f}%  {b*100:>+9.1f}%  {delta*100:>+9.1f}pp  {s*100:>+9.1f}%")

    # --- Analyze which stocks are affected ---
    print(f"\n{'=' * 90}")
    print("SURVIVORSHIP BIAS ANALYSIS")
    print(f"{'=' * 90}")

    # Find stocks that are NOT in current S&P 500 but WERE members historically
    from universe import get_sp500
    current_sp500 = set(get_sp500())

    # Stocks removed from S&P 500 during our backtest period (2016-2026)
    removed_during = []
    added_during = []
    for ticker, ivs in intervals.items():
        for start_d, end_d in ivs:
            if end_d and "2016" <= end_d <= "2026":
                removed_during.append((ticker, end_d))
            if start_d and "2016" <= start_d <= "2026":
                added_during.append((ticker, start_d))

    removed_during.sort(key=lambda x: x[1])
    added_during.sort(key=lambda x: x[1])

    print(f"\n  Stocks REMOVED from S&P 500 during 2016-2026: {len(removed_during)}")
    print(f"  Stocks ADDED to S&P 500 during 2016-2026: {len(added_during)}")

    print(f"\n  Most impactful removals (stocks that biased backtest would wrongly exclude):")
    # Check which removed stocks had price data and what happened to them
    removed_with_prices = []
    for ticker, remove_date in removed_during:
        if ticker in pit_prices_raw.columns:
            # What was their return from when they were removed?
            try:
                remove_ts = pd.Timestamp(remove_date)
                pre = pit_prices_raw.loc[:remove_ts, ticker].dropna()
                if len(pre) > 63:
                    # Get 3-month return before removal
                    ret_3m = pre.iloc[-1] / pre.iloc[-63] - 1 if len(pre) > 63 else np.nan
                    removed_with_prices.append((ticker, remove_date, ret_3m))
            except Exception:
                pass

    removed_with_prices.sort(key=lambda x: x[2] if not np.isnan(x[2]) else 0)
    print(f"  {'Ticker':<8s} {'Removed':>12s} {'3m ret before':>14s} {'Impact'}")
    for ticker, rd, ret in removed_with_prices[:15]:
        impact = "Would have been selected (recovery signal)" if ret < -0.20 else "Neutral"
        print(f"  {ticker:<8s} {rd:>12s} {ret*100:>+13.1f}% {impact}")

    print(f"\n  Notable ADDITIONS (stocks biased backtest wrongly includes early):")
    # Key stocks that were added during our period
    notable_adds = [(t, d) for t, d in added_during
                    if t in ["TSLA", "MRNA", "CVNA", "PLTR", "APP", "SMCI", "AXON", "SATS"]]
    for ticker, add_date in notable_adds:
        print(f"  {ticker:<8s} added {add_date} — biased backtest selects it BEFORE this date!")

    # --- Check: did we select any non-member stocks in PIT backtest? ---
    print(f"\n{'=' * 90}")
    print("PIT AUDIT: Verify no look-ahead in PIT backtest")
    print(f"{'=' * 90}")

    strat = SharpeBoostedEnsembleStrategy()
    pit_weights = strat.generate_signals(pit_prices)

    # For each date, check that all non-zero weight stocks are S&P 500 members
    mask = uh.membership_mask(pit_prices.index, intervals, list(pit_prices.columns))
    violations = 0
    for date in pit_weights.index:
        active = pit_weights.loc[date]
        active_tickers = active[active > 0.001].index.tolist()
        for t in active_tickers:
            if t in mask.columns and not mask.loc[date, t]:
                violations += 1
                if violations <= 5:
                    print(f"  VIOLATION: {t} selected on {date.strftime('%Y-%m-%d')} but NOT a member!")

    if violations == 0:
        print("  NO VIOLATIONS: All selected stocks were S&P 500 members on their selection date.")
    else:
        print(f"  Total violations: {violations}")

    # --- Bootstrap on PIT returns ---
    print(f"\n{'=' * 90}")
    print("BOOTSTRAP: PIT-corrected returns")
    print(f"{'=' * 90}")
    from research.trend_rider_p0 import block_bootstrap
    boot = block_bootstrap(pit_rets, n_boot=5000, block_len=42)
    print(f"  Sharpe: median={boot['sharpe'].median():.2f}  "
          f"5th={boot['sharpe'].quantile(0.05):.2f}  "
          f"95th={boot['sharpe'].quantile(0.95):.2f}")
    print(f"  CAGR:   median={boot['cagr'].median()*100:.1f}%  "
          f"5th={boot['cagr'].quantile(0.05)*100:.1f}%  "
          f"95th={boot['cagr'].quantile(0.95)*100:.1f}%")
    print(f"  MaxDD:  median={boot['max_drawdown'].median()*100:.1f}%  "
          f"5th={boot['max_drawdown'].quantile(0.05)*100:.1f}%  "
          f"95th={boot['max_drawdown'].quantile(0.95)*100:.1f}%")
    print(f"  P(Sharpe > 1.5): {(boot['sharpe'] > 1.5).mean()*100:.1f}%")
    print(f"  P(Sharpe > 1.0): {(boot['sharpe'] > 1.0).mean()*100:.1f}%")


if __name__ == "__main__":
    main()