Files
quant/research/pit_comparison.py
Gahow Wang 541f7bcf5b research: add strategy evaluation and exploration scripts
Add 28 research scripts covering DCA simulation, momentum evaluation,
Sharpe optimization, trend rider analysis, and US fundamentals exploration.
2026-05-14 12:54:08 +08:00

235 lines
9.8 KiB
Python

"""
PIT-compliant backtest: mask prices to historical S&P 500 membership.
Compares:
1. BIASED: current S&P 500 constituents applied back to 2016 (what we had before)
2. PIT: historical membership mask — each date only sees stocks that were
actually S&P 500 members on that date
This isolates the survivorship bias in our previous results.
"""
from __future__ import annotations
import os, sys
import numpy as np
import pandas as pd
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from strategies.ensemble_alpha import SharpeBoostedEnsembleStrategy
import universe_history as uh
from research.pit_backtest import load_pit_prices, pit_universe
def compute_metrics(daily_rets: pd.Series) -> dict:
eq = (1 + daily_rets).cumprod()
n_years = len(daily_rets) / 252.0
cagr = eq.iloc[-1] ** (1.0 / n_years) - 1.0
vol = daily_rets.std() * np.sqrt(252)
sharpe = daily_rets.mean() / daily_rets.std() * np.sqrt(252) if daily_rets.std() > 0 else 0
running_max = eq.cummax()
dd = eq / running_max - 1
max_dd = dd.min()
calmar = cagr / abs(max_dd) if max_dd != 0 else 0
return {"cagr": cagr, "vol": vol, "sharpe": sharpe, "max_dd": max_dd, "calmar": calmar}
def yearly_returns(daily_rets: pd.Series) -> pd.Series:
eq = (1 + daily_rets).cumprod()
yearly = eq.resample("YE").last().pct_change()
yearly.iloc[0] = eq.resample("YE").last().iloc[0] - 1
yearly.index = yearly.index.year
return yearly
def run_strategy(data: pd.DataFrame, start="2016-10-01", end="2026-05-13"):
"""Run SharpeBoostedEnsembleStrategy on given price data."""
strat = SharpeBoostedEnsembleStrategy()
weights = strat.generate_signals(data)
daily_rets = (weights * data.pct_change().fillna(0.0)).sum(axis=1)
return daily_rets.loc[start:end]
def main():
print("=" * 90)
print("SURVIVORSHIP BIAS TEST: PIT Membership vs Current Constituents")
print("=" * 90)
# --- Load PIT prices (includes delisted stocks) ---
print("\n--- Loading PIT price data ---")
pit_prices_raw = load_pit_prices()
print(f" Raw PIT prices: {pit_prices_raw.shape}")
# --- Apply PIT membership mask ---
print("\n--- Applying PIT membership mask ---")
intervals = uh.load_sp500_history()
pit_prices = pit_universe(pit_prices_raw)
print(f" PIT-masked prices: {pit_prices.shape}")
# Show how many stocks are available at various dates
for d in ["2016-12-30", "2018-12-31", "2020-12-31", "2022-12-30", "2024-12-31"]:
if d in pit_prices.index.strftime("%Y-%m-%d").tolist():
n_avail = pit_prices.loc[d].notna().sum()
print(f" {d}: {n_avail} stocks available")
else:
# Find nearest date
idx = pit_prices.index.get_indexer([pd.Timestamp(d)], method="nearest")
actual = pit_prices.index[idx[0]]
n_avail = pit_prices.loc[actual].notna().sum()
print(f" {actual.strftime('%Y-%m-%d')}: {n_avail} stocks available")
# --- Create biased version: use all stocks in us_pit (no mask) ---
# This simulates "using today's S&P 500 back in 2016"
biased_prices = pit_prices_raw.copy()
print(f"\n Biased (no mask) prices: {biased_prices.shape}")
# --- Run strategy on both ---
# Use start=2016-10-01 because PIT data starts 2016-04-19 and we need
# 252 days of warmup
start = "2017-06-01" # ~252 trading days after 2016-04-19
end = "2026-05-13"
print(f"\n--- Running strategy ({start} to {end}) ---")
print(" Running on PIT-masked data...")
pit_rets = run_strategy(pit_prices, start=start, end=end)
pit_m = compute_metrics(pit_rets)
print(" Running on biased data (no mask)...")
biased_rets = run_strategy(biased_prices, start=start, end=end)
biased_m = compute_metrics(biased_rets)
# --- Also compare with SPY ---
spy_rets = pit_prices_raw["SPY"].pct_change().fillna(0.0).loc[start:end]
spy_m = compute_metrics(spy_rets)
# --- Results ---
print(f"\n{'=' * 90}")
print("RESULTS COMPARISON")
print(f"{'=' * 90}")
print(f"{'Metric':<12s} {'PIT (correct)':>16s} {'Biased (no mask)':>18s} {'SPY':>12s}")
print("-" * 60)
for metric, fmt in [("cagr", "{:.1f}%"), ("vol", "{:.1f}%"), ("sharpe", "{:.2f}"),
("max_dd", "{:.1f}%"), ("calmar", "{:.2f}")]:
scale = 100 if "%" in fmt else 1
pit_val = pit_m[metric] * scale
biased_val = biased_m[metric] * scale
spy_val = spy_m[metric] * scale
print(f" {metric:<12s} {fmt.format(pit_val):>16s} {fmt.format(biased_val):>18s} {fmt.format(spy_val):>12s}")
# --- Yearly comparison ---
print(f"\n{'=' * 90}")
print("YEARLY RETURNS")
print(f"{'=' * 90}")
pit_yr = yearly_returns(pit_rets)
biased_yr = yearly_returns(biased_rets)
spy_yr = yearly_returns(spy_rets)
print(f" {'Year':>4s} {'PIT':>10s} {'Biased':>10s} {'Delta':>10s} {'SPY':>10s}")
print(f" {'-'*50}")
for year in sorted(set(pit_yr.index) | set(biased_yr.index)):
p = pit_yr.get(year, float("nan"))
b = biased_yr.get(year, float("nan"))
s = spy_yr.get(year, float("nan"))
delta = p - b if not (np.isnan(p) or np.isnan(b)) else float("nan")
print(f" {year:>4d} {p*100:>+9.1f}% {b*100:>+9.1f}% {delta*100:>+9.1f}pp {s*100:>+9.1f}%")
# --- Analyze which stocks are affected ---
print(f"\n{'=' * 90}")
print("SURVIVORSHIP BIAS ANALYSIS")
print(f"{'=' * 90}")
# Find stocks that are NOT in current S&P 500 but WERE members historically
from universe import get_sp500
current_sp500 = set(get_sp500())
# Stocks removed from S&P 500 during our backtest period (2016-2026)
removed_during = []
added_during = []
for ticker, ivs in intervals.items():
for start_d, end_d in ivs:
if end_d and "2016" <= end_d <= "2026":
removed_during.append((ticker, end_d))
if start_d and "2016" <= start_d <= "2026":
added_during.append((ticker, start_d))
removed_during.sort(key=lambda x: x[1])
added_during.sort(key=lambda x: x[1])
print(f"\n Stocks REMOVED from S&P 500 during 2016-2026: {len(removed_during)}")
print(f" Stocks ADDED to S&P 500 during 2016-2026: {len(added_during)}")
print(f"\n Most impactful removals (stocks that biased backtest would wrongly exclude):")
# Check which removed stocks had price data and what happened to them
removed_with_prices = []
for ticker, remove_date in removed_during:
if ticker in pit_prices_raw.columns:
# What was their return from when they were removed?
try:
remove_ts = pd.Timestamp(remove_date)
pre = pit_prices_raw.loc[:remove_ts, ticker].dropna()
if len(pre) > 63:
# Get 3-month return before removal
ret_3m = pre.iloc[-1] / pre.iloc[-63] - 1 if len(pre) > 63 else np.nan
removed_with_prices.append((ticker, remove_date, ret_3m))
except Exception:
pass
removed_with_prices.sort(key=lambda x: x[2] if not np.isnan(x[2]) else 0)
print(f" {'Ticker':<8s} {'Removed':>12s} {'3m ret before':>14s} {'Impact'}")
for ticker, rd, ret in removed_with_prices[:15]:
impact = "Would have been selected (recovery signal)" if ret < -0.20 else "Neutral"
print(f" {ticker:<8s} {rd:>12s} {ret*100:>+13.1f}% {impact}")
print(f"\n Notable ADDITIONS (stocks biased backtest wrongly includes early):")
# Key stocks that were added during our period
notable_adds = [(t, d) for t, d in added_during
if t in ["TSLA", "MRNA", "CVNA", "PLTR", "APP", "SMCI", "AXON", "SATS"]]
for ticker, add_date in notable_adds:
print(f" {ticker:<8s} added {add_date} — biased backtest selects it BEFORE this date!")
# --- Check: did we select any non-member stocks in PIT backtest? ---
print(f"\n{'=' * 90}")
print("PIT AUDIT: Verify no look-ahead in PIT backtest")
print(f"{'=' * 90}")
strat = SharpeBoostedEnsembleStrategy()
pit_weights = strat.generate_signals(pit_prices)
# For each date, check that all non-zero weight stocks are S&P 500 members
mask = uh.membership_mask(pit_prices.index, intervals, list(pit_prices.columns))
violations = 0
for date in pit_weights.index:
active = pit_weights.loc[date]
active_tickers = active[active > 0.001].index.tolist()
for t in active_tickers:
if t in mask.columns and not mask.loc[date, t]:
violations += 1
if violations <= 5:
print(f" VIOLATION: {t} selected on {date.strftime('%Y-%m-%d')} but NOT a member!")
if violations == 0:
print(" NO VIOLATIONS: All selected stocks were S&P 500 members on their selection date.")
else:
print(f" Total violations: {violations}")
# --- Bootstrap on PIT returns ---
print(f"\n{'=' * 90}")
print("BOOTSTRAP: PIT-corrected returns")
print(f"{'=' * 90}")
from research.trend_rider_p0 import block_bootstrap
boot = block_bootstrap(pit_rets, n_boot=5000, block_len=42)
print(f" Sharpe: median={boot['sharpe'].median():.2f} "
f"5th={boot['sharpe'].quantile(0.05):.2f} "
f"95th={boot['sharpe'].quantile(0.95):.2f}")
print(f" CAGR: median={boot['cagr'].median()*100:.1f}% "
f"5th={boot['cagr'].quantile(0.05)*100:.1f}% "
f"95th={boot['cagr'].quantile(0.95)*100:.1f}%")
print(f" MaxDD: median={boot['max_drawdown'].median()*100:.1f}% "
f"5th={boot['max_drawdown'].quantile(0.05)*100:.1f}% "
f"95th={boot['max_drawdown'].quantile(0.95)*100:.1f}%")
print(f" P(Sharpe > 1.5): {(boot['sharpe'] > 1.5).mean()*100:.1f}%")
print(f" P(Sharpe > 1.0): {(boot['sharpe'] > 1.0).mean()*100:.1f}%")
if __name__ == "__main__":
main()