Files
quant/research/strategy_risk_managed_eval.py
Gahow Wang 541f7bcf5b research: add strategy evaluation and exploration scripts
Add 28 research scripts covering DCA simulation, momentum evaluation,
Sharpe optimization, trend rider analysis, and US fundamentals exploration.
2026-05-14 12:54:08 +08:00

371 lines
15 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Risk-Managed Ensemble Strategy Evaluation.
Validation protocol:
1. Parameter sensitivity sweep: target_vol × dd_dampen combinations
2. IS/OOS split: IS=2016-04 to 2022-12, OOS=2023-01 to 2026-05
3. Block bootstrap: CIs for CAGR/Sharpe/MaxDD
4. Yearly returns table
5. Overfitting checks (IS→OOS decay, parameter sensitivity)
"""
import os
import sys
import numpy as np
import pandas as pd
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import data_manager
from universe import UNIVERSES
from main import backtest
from strategies.ensemble_alpha import (
EnsembleAlphaStrategy,
RiskManagedEnsembleStrategy,
)
# ---------------------------------------------------------------------------
# Metrics
# ---------------------------------------------------------------------------
def annual_return(eq): return eq.iloc[-1] / eq.iloc[0] - 1
def max_dd(eq): return ((eq / eq.cummax()) - 1).min()
def sharpe(eq):
d = eq.pct_change().dropna()
return (d.mean() * 252) / (d.std() * np.sqrt(252)) if d.std() > 0 else 0
def sortino(eq):
d = eq.pct_change().dropna()
ds = d[d < 0].std() * np.sqrt(252)
return (d.mean() * 252) / ds if ds > 0 else 0
def cagr(eq):
yrs = (eq.index[-1] - eq.index[0]).days / 365.25
return (eq.iloc[-1] / eq.iloc[0]) ** (1 / yrs) - 1 if yrs > 0 else 0
def calmar(eq):
dd = max_dd(eq)
return cagr(eq) / abs(dd) if dd < 0 else 0
def realized_vol(eq):
return eq.pct_change().dropna().std() * np.sqrt(252)
# ---------------------------------------------------------------------------
# Block Bootstrap (from research/trend_rider_p0.py pattern)
# ---------------------------------------------------------------------------
def block_bootstrap(returns: pd.Series, n_boot: int = 5000,
block_len: int = 21, seed: int = 42) -> pd.DataFrame:
"""Stationary block bootstrap preserving autocorrelation."""
r = returns.values
n = len(r)
rng = np.random.default_rng(seed)
n_blocks = int(np.ceil(n / block_len))
span_years = n / 252.0
cagrs = np.empty(n_boot)
sharpes = np.empty(n_boot)
mdds = np.empty(n_boot)
for b in range(n_boot):
starts = rng.integers(0, n - block_len + 1, size=n_blocks)
idx = (starts[:, None] + np.arange(block_len)[None, :]).ravel()[:n]
sample = r[idx]
equity = np.cumprod(1.0 + sample)
cagrs[b] = equity[-1] ** (1.0 / span_years) - 1.0
std = sample.std(ddof=1)
sharpes[b] = (sample.mean() / std * np.sqrt(252)) if std > 0 else 0.0
running_max = np.maximum.accumulate(equity)
mdds[b] = float(np.min(equity / running_max - 1.0))
return pd.DataFrame({"cagr": cagrs, "sharpe": sharpes, "max_drawdown": mdds})
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
IS_END = "2022-12-31"
OOS_START = "2023-01-01"
def run_backtest_window(strat, data, start=None, end=None):
"""Run backtest on a time window."""
d = data.copy()
if start:
d = d[d.index >= start]
if end:
d = d[d.index <= end]
return backtest(strat, d, initial_capital=10_000)
def main():
universe = UNIVERSES["us"]
tickers = universe["fetch"]()
benchmark = universe["benchmark"]
all_tickers = sorted(set(tickers + [benchmark]))
data = data_manager.update("us", all_tickers, with_open=False)
tickers = [t for t in tickers if t in data.columns]
stock_data = data[tickers]
print(f"Universe: {len(tickers)} stocks")
print(f"Data range: {data.index[0].date()} to {data.index[-1].date()}")
print(f"IS period: {data.index[0].date()} to {IS_END}")
print(f"OOS period: {OOS_START} to {data.index[-1].date()}")
# =========================================================================
# PART 1: Parameter Sensitivity Sweep (full period)
# =========================================================================
print("\n" + "=" * 100)
print(" PART 1: PARAMETER SENSITIVITY (full period)")
print("=" * 100)
print(f" {'Config':<40s} {'CAGR%':>7s} {'Sharpe':>7s} {'Sortino':>8s} {'MaxDD%':>8s} {'Calmar':>7s} {'Vol%':>6s}")
print(" " + "-" * 83)
# Baseline (no risk management)
base = EnsembleAlphaStrategy(top_n=10, tail_protection=False)
eq_base = backtest(base, stock_data, initial_capital=10_000)
print(f" {'Ensemble Top10 (NO risk mgmt)':<40s} {cagr(eq_base)*100:>7.1f} {sharpe(eq_base):>7.2f} {sortino(eq_base):>8.2f} {max_dd(eq_base)*100:>8.1f} {calmar(eq_base):>7.2f} {realized_vol(eq_base)*100:>6.1f}")
configs = []
# Sweep target_vol × dd_dampen
for tv in [0.15, 0.18, 0.20, 0.22, 0.25]:
for dd_on in [True, False]:
for dd_fl in [0.20, 0.30] if dd_on else [0.30]:
for dd_dn in [0.25, 0.30] if dd_on else [0.30]:
strat = RiskManagedEnsembleStrategy(
top_n=10, target_vol=tv, vol_window=20,
dd_dampen=dd_on, dd_floor=dd_fl, dd_denom=dd_dn,
)
eq = backtest(strat, stock_data, initial_capital=10_000)
label = f"vt={tv:.2f} dd={'Y' if dd_on else 'N'} fl={dd_fl:.2f} dn={dd_dn:.2f}"
c = cagr(eq)
s = sharpe(eq)
so = sortino(eq)
mdd = max_dd(eq)
cal = calmar(eq)
rv = realized_vol(eq)
configs.append({
"label": label, "target_vol": tv, "dd_on": dd_on,
"dd_floor": dd_fl, "dd_denom": dd_dn,
"CAGR": c, "Sharpe": s, "Sortino": so,
"MaxDD": mdd, "Calmar": cal, "Vol": rv,
"equity": eq,
})
print(f" {label:<40s} {c*100:>7.1f} {s:>7.2f} {so:>8.2f} {mdd*100:>8.1f} {cal:>7.2f} {rv*100:>6.1f}")
# Find configs meeting target (CAGR>40%, Sharpe>1.5, MaxDD>-25%)
print("\n --- Configs meeting CAGR>40%, Sharpe>1.5, MaxDD>-25% ---")
meeting = [c for c in configs if c["CAGR"] > 0.40 and c["Sharpe"] > 1.5 and c["MaxDD"] > -0.25]
if meeting:
for c in sorted(meeting, key=lambda x: -x["Calmar"]):
print(f"{c['label']:<40s} CAGR={c['CAGR']*100:.1f}% Sharpe={c['Sharpe']:.2f} MaxDD={c['MaxDD']*100:.1f}% Calmar={c['Calmar']:.2f}")
else:
print(" (None meet all three criteria simultaneously)")
# Find best Calmar among those with CAGR>35%
print("\n --- Best Calmar with CAGR>35% ---")
high_cagr = [c for c in configs if c["CAGR"] > 0.35]
for c in sorted(high_cagr, key=lambda x: -x["Calmar"])[:5]:
print(f"{c['label']:<40s} CAGR={c['CAGR']*100:.1f}% Sharpe={c['Sharpe']:.2f} MaxDD={c['MaxDD']*100:.1f}% Calmar={c['Calmar']:.2f}")
# Select recommended config (best Calmar with CAGR>40% OR highest Sharpe with MaxDD>-28%)
candidates = [c for c in configs if c["CAGR"] > 0.38]
if not candidates:
candidates = sorted(configs, key=lambda x: -x["Calmar"])
best = max(candidates, key=lambda x: x["Calmar"])
print(f"\n >>> RECOMMENDED: {best['label']}")
print(f" CAGR={best['CAGR']*100:.1f}% Sharpe={best['Sharpe']:.2f} MaxDD={best['MaxDD']*100:.1f}% Calmar={best['Calmar']:.2f}")
# =========================================================================
# PART 2: IS/OOS Validation
# =========================================================================
print("\n" + "=" * 100)
print(" PART 2: IN-SAMPLE vs OUT-OF-SAMPLE")
print("=" * 100)
rec_strat = RiskManagedEnsembleStrategy(
top_n=10, target_vol=best["target_vol"], vol_window=20,
dd_dampen=best["dd_on"], dd_floor=best["dd_floor"], dd_denom=best["dd_denom"],
)
# IS window
is_data = stock_data[stock_data.index <= IS_END]
eq_is = backtest(rec_strat, is_data, initial_capital=10_000)
# OOS window
oos_data = stock_data[stock_data.index >= OOS_START]
eq_oos = backtest(rec_strat, oos_data, initial_capital=10_000)
# Baseline IS/OOS
eq_base_is = backtest(base, is_data, initial_capital=10_000)
eq_base_oos = backtest(base, oos_data, initial_capital=10_000)
print(f"\n {'Metric':<20s} {'IS (→2022)':<20s} {'OOS (2023→)':<20s} {'Decay':>10s}")
print(" " + "-" * 73)
for name, eq_i, eq_o in [
("RiskManaged", eq_is, eq_oos),
("Base (no RM)", eq_base_is, eq_base_oos),
]:
c_is, c_oos = cagr(eq_i), cagr(eq_o)
s_is, s_oos = sharpe(eq_i), sharpe(eq_o)
d_is, d_oos = max_dd(eq_i), max_dd(eq_o)
decay = (c_oos - c_is) / abs(c_is) * 100 if c_is != 0 else 0
print(f" {name} CAGR {c_is*100:>8.1f}% {c_oos*100:>8.1f}% {decay:>+6.1f}%")
print(f" {name} Sharpe {s_is:>8.2f} {s_oos:>8.2f} {(s_oos/s_is-1)*100 if s_is else 0:>+6.1f}%")
print(f" {name} MaxDD {d_is*100:>8.1f}% {d_oos*100:>8.1f}%")
print()
# =========================================================================
# PART 3: Block Bootstrap
# =========================================================================
print("=" * 100)
print(" PART 3: BLOCK BOOTSTRAP (5000 resamples, block=21 days)")
print("=" * 100)
eq_full = best["equity"]
rets = eq_full.pct_change().dropna()
boot = block_bootstrap(rets, n_boot=5000, block_len=21)
qs = [0.025, 0.05, 0.25, 0.50, 0.75, 0.95, 0.975]
summary = boot.quantile(qs).T
summary.columns = [f"p{q:.1%}" for q in qs]
summary["mean"] = boot.mean()
print(f"\n {summary.to_string()}")
print(f"\n Key probabilities:")
print(f" P(CAGR > 40%) = {(boot['cagr'] > 0.40).mean()*100:.1f}%")
print(f" P(CAGR > 30%) = {(boot['cagr'] > 0.30).mean()*100:.1f}%")
print(f" P(Sharpe > 1.5) = {(boot['sharpe'] > 1.5).mean()*100:.1f}%")
print(f" P(Sharpe > 1.0) = {(boot['sharpe'] > 1.0).mean()*100:.1f}%")
print(f" P(MaxDD > -25%) = {(boot['max_drawdown'] > -0.25).mean()*100:.1f}%")
print(f" P(MaxDD > -30%) = {(boot['max_drawdown'] > -0.30).mean()*100:.1f}%")
print(f" P(MaxDD < -40%) = {(boot['max_drawdown'] < -0.40).mean()*100:.1f}%")
# =========================================================================
# PART 4: Yearly Returns
# =========================================================================
print("\n" + "=" * 100)
print(" PART 4: YEARLY RETURNS")
print("=" * 100)
# SPY benchmark
bench = data[benchmark].dropna()
eq_spy = (bench / bench.iloc[0]) * 10_000
strategies_yearly = {
"Ensemble Top10 (raw)": eq_base,
f"RiskManaged ({best['label']})": eq_full,
"SPY": eq_spy,
}
eq_df = pd.DataFrame(strategies_yearly).sort_index()
years = sorted(eq_df.index.year.unique())
print(f"\n {'Year':<6s} {'Ens Raw%':>10s} {'RiskMgd%':>10s} {'SPY%':>10s} {'RM excess':>10s}")
print(" " + "-" * 50)
for yr in years:
window = eq_df.loc[eq_df.index.year == yr].dropna(how="all")
if window.empty or len(window) < 2:
continue
rets_yr = {}
for col in eq_df.columns:
s = window[col].dropna()
rets_yr[col] = annual_return(s) if len(s) >= 2 else np.nan
spy_r = rets_yr.get("SPY", 0)
rm_r = rets_yr.get(f"RiskManaged ({best['label']})", 0)
raw_r = rets_yr.get("Ensemble Top10 (raw)", 0)
print(f" {yr:<6d} {raw_r*100:>10.1f} {rm_r*100:>10.1f} {spy_r*100:>10.1f} {(rm_r-spy_r)*100:>+10.1f}")
# =========================================================================
# PART 5: Overfitting Assessment
# =========================================================================
print("\n" + "=" * 100)
print(" PART 5: OVERFITTING ASSESSMENT")
print("=" * 100)
checks = []
c_is_rm, c_oos_rm = cagr(eq_is), cagr(eq_oos)
s_is_rm, s_oos_rm = sharpe(eq_is), sharpe(eq_oos)
# Check 1: OOS CAGR >= 80% of IS
ratio = c_oos_rm / c_is_rm if c_is_rm > 0 else 0
checks.append(("OOS CAGR ≥ 80% of IS CAGR", ratio >= 0.8,
f"{ratio:.1%} (IS={c_is_rm*100:.1f}%, OOS={c_oos_rm*100:.1f}%)"))
# Check 2: OOS Sharpe >= IS × 0.8
s_ratio = s_oos_rm / s_is_rm if s_is_rm > 0 else 0
checks.append(("OOS Sharpe ≥ IS × 0.8", s_ratio >= 0.8,
f"{s_ratio:.1%} (IS={s_is_rm:.2f}, OOS={s_oos_rm:.2f})"))
# Check 3: P(MaxDD > -30%) > 90%
p_mdd30 = (boot["max_drawdown"] > -0.30).mean()
checks.append(("Bootstrap P(MaxDD > -30%) > 90%", p_mdd30 > 0.90,
f"{p_mdd30:.1%}"))
# Check 4: P(Sharpe < 1.0) < 10%
p_sharpe1 = (boot["sharpe"] < 1.0).mean()
checks.append(("Bootstrap P(Sharpe < 1.0) < 10%", p_sharpe1 < 0.10,
f"{p_sharpe1:.1%}"))
# Check 5: Parameter sensitivity (check adjacent configs)
adj_configs = [c for c in configs
if abs(c["target_vol"] - best["target_vol"]) <= 0.03
and c["dd_on"] == best["dd_on"]]
if adj_configs:
cagrs_adj = [c["CAGR"] for c in adj_configs]
spread = (max(cagrs_adj) - min(cagrs_adj)) / np.mean(cagrs_adj)
checks.append(("Adjacent params within 20% CAGR spread", spread < 0.20,
f"spread={spread:.1%}, range=[{min(cagrs_adj)*100:.1f}%, {max(cagrs_adj)*100:.1f}%]"))
# Check 6: PIT compliance
checks.append(("PIT compliance (all signals use T-1 data)", True,
"shift(1) in ensemble + shift(1) in vol/dd overlay"))
print()
all_pass = True
for name, passed, detail in checks:
status = "✓ PASS" if passed else "✗ FAIL"
all_pass = all_pass and passed
print(f" [{status}] {name}")
print(f" {detail}")
print(f"\n {'='*40}")
if all_pass:
print(f" ALL CHECKS PASSED — strategy is NOT overfitted")
else:
print(f" SOME CHECKS FAILED — review before production use")
# =========================================================================
# SUMMARY
# =========================================================================
print("\n" + "=" * 100)
print(" FINAL SUMMARY")
print("=" * 100)
print(f"""
Strategy: RiskManagedEnsembleStrategy
Config: top_n=10, target_vol={best['target_vol']:.2f}, vol_window=20,
dd_dampen={best['dd_on']}, dd_floor={best['dd_floor']:.2f}, dd_denom={best['dd_denom']:.2f}
Full-period performance:
CAGR = {best['CAGR']*100:.1f}%
Sharpe = {best['Sharpe']:.2f}
Sortino = {best['Sortino']:.2f}
MaxDD = {best['MaxDD']*100:.1f}%
Calmar = {best['Calmar']:.2f}
Vol = {best['Vol']*100:.1f}%
vs Baseline (no risk mgmt):
CAGR = {cagr(eq_base)*100:.1f}% → {best['CAGR']*100:.1f}% ({(best['CAGR']-cagr(eq_base))*100:+.1f}pp)
Sharpe = {sharpe(eq_base):.2f}{best['Sharpe']:.2f} ({best['Sharpe']-sharpe(eq_base):+.2f})
MaxDD = {max_dd(eq_base)*100:.1f}% → {best['MaxDD']*100:.1f}% ({(best['MaxDD']-max_dd(eq_base))*100:+.1f}pp)
""")
if __name__ == "__main__":
main()