""" Comprehensive strategy improvement evaluation. Compares original strategies against improved versions, showing: - Yearly returns (2016-2025) - Key metrics (CAGR, Sharpe, MaxDD, Calmar) - Excess over SPY - Turnover analysis """ import numpy as np import pandas as pd import data_manager from universe import UNIVERSES from main import backtest # Original strategies from strategies.momentum import MomentumStrategy from strategies.recovery_momentum import RecoveryMomentumStrategy from strategies.momentum_quality import MomentumQualityStrategy from strategies.adaptive_momentum import AdaptiveMomentumStrategy from strategies.dual_momentum import DualMomentumStrategy from strategies.trend_following import TrendFollowingStrategy from strategies.multi_factor import MultiFactorStrategy from strategies.factor_combo import FactorComboStrategy # Improved strategies from strategies.enhanced_recovery_momentum import EnhancedRecoveryMomentumStrategy from strategies.improved_momentum_quality import ImprovedMomentumQualityStrategy from strategies.composite_alpha import CompositeAlphaStrategy def annual_return(eq: pd.Series) -> float: return eq.iloc[-1] / eq.iloc[0] - 1 def max_dd(eq: pd.Series) -> float: return ((eq / eq.cummax()) - 1).min() def sharpe(eq: pd.Series) -> float: daily = eq.pct_change().dropna() if daily.std() == 0: return 0.0 return (daily.mean() * 252) / (daily.std() * np.sqrt(252)) def sortino(eq: pd.Series) -> float: daily = eq.pct_change().dropna() downside = daily[daily < 0].std() * np.sqrt(252) if downside == 0: return 0.0 return (daily.mean() * 252) / downside def cagr(eq: pd.Series) -> float: yrs = (eq.index[-1] - eq.index[0]).days / 365.25 if yrs <= 0: return 0.0 return (eq.iloc[-1] / eq.iloc[0]) ** (1 / yrs) - 1 def turnover(weights: pd.DataFrame) -> float: """Average daily turnover.""" return weights.diff().abs().sum(axis=1).mean() def main(): # --- Load data --- universe = UNIVERSES["us"] tickers = universe["fetch"]() benchmark = universe["benchmark"] all_tickers = sorted(set(tickers + [benchmark])) data = data_manager.update("us", all_tickers, with_open=False) tickers = [t for t in tickers if t in data.columns] top_n = max(5, len(tickers) // 10) print(f"Universe: {len(tickers)} stocks + {benchmark}. top_n={top_n}") print(f"Data range: {data.index[0].date()} to {data.index[-1].date()}") # --- Build strategies --- strategies = { # === ORIGINALS === "Momentum (orig)": ( MomentumStrategy(lookback=252, skip=21, top_n=top_n), data[tickers] ), "Recovery+Mom Top20 (orig)": ( RecoveryMomentumStrategy(top_n=20), data[tickers] ), "Mom+Quality (orig)": ( MomentumQualityStrategy(momentum_period=252, skip=21, top_n=top_n), data[tickers] ), "Mom+InvVol (orig)": ( AdaptiveMomentumStrategy(top_n=top_n), data[tickers] ), "Dual Momentum (orig)": ( DualMomentumStrategy(top_n=top_n), data[tickers] ), "Trend Following (orig)": ( TrendFollowingStrategy(ma_window=150, momentum_period=126, top_n=top_n), data[tickers] ), "Multi-Factor (orig)": ( MultiFactorStrategy(tickers=tickers, benchmark=benchmark, top_n=top_n), data ), "FactorCombo rec+deep (orig)": ( FactorComboStrategy(signal_name="rec_mfilt+deep_upvol", rebal_freq=21, top_n=20), data[tickers] ), # === IMPROVED === "Enhanced RecMom Top20": ( EnhancedRecoveryMomentumStrategy( recovery_window=63, mom_lookback=252, mom_skip=21, intermediate_mom=126, vol_window=60, rebal_freq=21, top_n=20, regime_scale=True ), data[tickers] ), "Enhanced RecMom Top30": ( EnhancedRecoveryMomentumStrategy( recovery_window=63, mom_lookback=252, mom_skip=21, intermediate_mom=126, vol_window=60, rebal_freq=21, top_n=30, regime_scale=True ), data[tickers] ), "Improved MomQuality": ( ImprovedMomentumQualityStrategy( momentum_period=252, skip=21, quality_window=252, recovery_window=63, vol_window=60, rebal_freq=21, top_n=20 ), data[tickers] ), "Improved MomQuality Top30": ( ImprovedMomentumQualityStrategy( momentum_period=252, skip=21, quality_window=252, recovery_window=63, vol_window=60, rebal_freq=21, top_n=30 ), data[tickers] ), "Composite Alpha": ( CompositeAlphaStrategy( tickers=tickers, benchmark=benchmark, recovery_window=63, intermediate_period=147, skip=21, quality_window=252, vol_window=60, rebal_freq=10, top_n=20, regime_gate=True ), data ), "Composite Alpha Top30": ( CompositeAlphaStrategy( tickers=tickers, benchmark=benchmark, recovery_window=63, intermediate_period=147, skip=21, quality_window=252, vol_window=60, rebal_freq=10, top_n=30, regime_gate=True ), data ), "Composite Alpha NoRegime": ( CompositeAlphaStrategy( tickers=tickers, benchmark=benchmark, recovery_window=63, intermediate_period=147, skip=21, quality_window=252, vol_window=60, rebal_freq=10, top_n=20, regime_gate=False ), data ), } # --- Run backtests --- equity = {} for name, (strat, strat_data) in strategies.items(): print(f"Running {name}...") equity[name] = backtest(strat, strat_data, initial_capital=10_000) # SPY benchmark bench = data[benchmark].dropna() equity["SPY"] = (bench / bench.iloc[0]) * 10_000 eq_df = pd.DataFrame(equity).sort_index() # --- Yearly returns table --- years = list(range(2016, 2027)) rows = [] for yr in years: start = pd.Timestamp(f"{yr}-01-01") end = pd.Timestamp(f"{yr}-12-31") window = eq_df.loc[(eq_df.index >= start) & (eq_df.index <= end)].dropna(how="all") if window.empty: continue row = {"Year": yr} for col in eq_df.columns: s = window[col].dropna() if len(s) < 2: row[col] = np.nan else: row[col] = annual_return(s) rows.append(row) yr_df = pd.DataFrame(rows).set_index("Year") # --- Print results --- print("\n" + "=" * 80) print("YEARLY TOTAL RETURN (%)") print("=" * 80) print((yr_df * 100).round(2).to_string()) # Excess over SPY excess = yr_df.sub(yr_df["SPY"], axis=0).drop(columns=["SPY"]) print("\n" + "=" * 80) print("EXCESS vs SPY (percentage points)") print("=" * 80) print((excess * 100).round(2).to_string()) # --- Full-period summary --- print("\n" + "=" * 80) print("FULL-PERIOD METRICS") print("=" * 80) summary_rows = [] for col in eq_df.columns: eq = eq_df[col].dropna() if len(eq) < 252: continue summary_rows.append({ "Strategy": col, "CAGR %": cagr(eq) * 100, "Sharpe": sharpe(eq), "Sortino": sortino(eq), "Max DD %": max_dd(eq) * 100, "Calmar": cagr(eq) / abs(max_dd(eq)) if max_dd(eq) < 0 else 0, "Avg Ann Ret %": yr_df[col].mean() * 100 if col in yr_df.columns else np.nan, "Win Rate vs SPY": (excess[col] > 0).mean() * 100 if col in excess.columns else np.nan, }) summary = pd.DataFrame(summary_rows).sort_values("CAGR %", ascending=False) pd.set_option('display.max_columns', None) pd.set_option('display.width', 200) print(summary.round(2).to_string(index=False)) # --- Comparison: Improved vs Original --- print("\n" + "=" * 80) print("IMPROVEMENT ANALYSIS (best improved vs best original)") print("=" * 80) orig_cols = [c for c in eq_df.columns if "(orig)" in c] improved_cols = [c for c in eq_df.columns if c not in orig_cols and c != "SPY"] if orig_cols and improved_cols: best_orig = max(orig_cols, key=lambda c: cagr(eq_df[c].dropna())) best_improved = max(improved_cols, key=lambda c: cagr(eq_df[c].dropna())) orig_eq = eq_df[best_orig].dropna() imp_eq = eq_df[best_improved].dropna() print(f"\nBest original: {best_orig}") print(f" CAGR={cagr(orig_eq)*100:.2f}% Sharpe={sharpe(orig_eq):.2f} " f"MaxDD={max_dd(orig_eq)*100:.2f}% Calmar={cagr(orig_eq)/abs(max_dd(orig_eq)):.2f}") print(f"\nBest improved: {best_improved}") print(f" CAGR={cagr(imp_eq)*100:.2f}% Sharpe={sharpe(imp_eq):.2f} " f"MaxDD={max_dd(imp_eq)*100:.2f}% Calmar={cagr(imp_eq)/abs(max_dd(imp_eq)):.2f}") cagr_diff = (cagr(imp_eq) - cagr(orig_eq)) * 100 sharpe_diff = sharpe(imp_eq) - sharpe(orig_eq) dd_diff = (max_dd(imp_eq) - max_dd(orig_eq)) * 100 print(f"\nDelta: CAGR {cagr_diff:+.2f}pp Sharpe {sharpe_diff:+.2f} MaxDD {dd_diff:+.2f}pp") # --- Save results --- out_path = "data/strategy_improvement_results.csv" yr_df.to_csv(out_path) print(f"\nSaved yearly returns to {out_path}") summary_path = "data/strategy_improvement_summary.csv" summary.to_csv(summary_path, index=False) print(f"Saved summary to {summary_path}") if __name__ == "__main__": main()