From b9a2a6a57bbd65cd74a22db8e07f3427990b3f36 Mon Sep 17 00:00:00 2001 From: Gahow Wang Date: Thu, 14 May 2026 12:53:32 +0800 Subject: [PATCH] feat: add yearly sweep script for parameter optimization --- yearly_sweep.py | 159 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 yearly_sweep.py diff --git a/yearly_sweep.py b/yearly_sweep.py new file mode 100644 index 0000000..3757547 --- /dev/null +++ b/yearly_sweep.py @@ -0,0 +1,159 @@ +"""Run all US strategies and report yearly performance vs SPY.""" +import pandas as pd +import numpy as np + +import data_manager +from universe import UNIVERSES +from strategies.adaptive_momentum import AdaptiveMomentumStrategy +from strategies.buy_and_hold import BuyAndHoldStrategy +from strategies.dual_momentum import DualMomentumStrategy +from strategies.inverse_vol import InverseVolatilityStrategy +from strategies.mean_reversion import MeanReversionStrategy +from strategies.momentum import MomentumStrategy +from strategies.momentum_quality import MomentumQualityStrategy +from strategies.multi_factor import MultiFactorStrategy +from strategies.recovery_momentum import RecoveryMomentumStrategy +from strategies.trend_following import TrendFollowingStrategy +from main import backtest + + +def build_strategies(tickers, benchmark, data, top_n): + return { + "Buy & Hold (EW)": (BuyAndHoldStrategy(), data[tickers]), + "Momentum": (MomentumStrategy(lookback=252, skip=21, top_n=top_n), data[tickers]), + "Inverse Volatility": (InverseVolatilityStrategy(vol_window=20), data[tickers]), + "Multi-Factor": (MultiFactorStrategy(tickers=tickers, benchmark=benchmark, top_n=top_n), data), + "Mean Reversion": (MeanReversionStrategy(top_n=top_n), data[tickers]), + "Trend Following": (TrendFollowingStrategy(ma_window=150, momentum_period=126, top_n=top_n), data[tickers]), + "Dual Momentum": (DualMomentumStrategy(top_n=top_n), data[tickers]), + "Momentum+Quality": (MomentumQualityStrategy(momentum_period=252, skip=21, top_n=top_n), data[tickers]), + "Mom+InvVol": (AdaptiveMomentumStrategy(top_n=top_n), data[tickers]), + "Recovery+Mom Top20": (RecoveryMomentumStrategy(top_n=min(20, top_n)), data[tickers]), + "Recovery+Mom Top10": (RecoveryMomentumStrategy(top_n=10), data[tickers]), + } + + +def annual_return(eq: pd.Series) -> float: + return eq.iloc[-1] / eq.iloc[0] - 1 + + +def max_dd(eq: pd.Series) -> float: + return ((eq / eq.cummax()) - 1).min() + + +def main(): + universe = UNIVERSES["us"] + tickers = universe["fetch"]() + benchmark = universe["benchmark"] + all_tickers = sorted(set(tickers + [benchmark])) + + data = data_manager.update("us", all_tickers, with_open=False) + tickers = [t for t in tickers if t in data.columns] + top_n = max(5, len(tickers) // 10) + print(f"Universe: {len(tickers)} stocks + {benchmark}. top_n={top_n}") + print(f"Data range: {data.index[0].date()} to {data.index[-1].date()}") + + strategies = build_strategies(tickers, benchmark, data, top_n) + + equity = {} + for name, (strat, strat_data) in strategies.items(): + print(f"Running {name}...") + equity[name] = backtest(strat, strat_data, initial_capital=10_000) + + # SPY benchmark normalized + bench = data[benchmark].dropna() + equity["SPY"] = (bench / bench.iloc[0]) * 10_000 + + eq_df = pd.DataFrame(equity).sort_index() + + # Yearly returns table + years = list(range(2017, 2027)) + rows = [] + for yr in years: + start = pd.Timestamp(f"{yr}-01-01") + end = pd.Timestamp(f"{yr}-12-31") + window = eq_df.loc[(eq_df.index >= start) & (eq_df.index <= end)].dropna(how="all") + if window.empty: + continue + row = {"Year": yr} + for col in eq_df.columns: + s = window[col].dropna() + if len(s) < 2: + row[col] = np.nan + else: + row[col] = annual_return(s) + rows.append(row) + + yr_df = pd.DataFrame(rows).set_index("Year") + + # Excess over SPY + excess = yr_df.sub(yr_df["SPY"], axis=0).drop(columns=["SPY"]) + + print("\n=== Yearly Total Return (%) ===") + print((yr_df * 100).round(2).to_string()) + + print("\n=== Excess vs SPY (pp) ===") + print((excess * 100).round(2).to_string()) + + # Best strategy each year (excluding SPY) + strat_only = yr_df.drop(columns=["SPY"]) + best_per_year = strat_only.idxmax(axis=1) + best_ret = strat_only.max(axis=1) + spy_ret = yr_df["SPY"] + + print("\n=== Best Strategy per Year ===") + print(f"{'Year':<6}{'Strategy':<22}{'Return':>10}{'SPY':>10}{'Excess':>10}") + for yr in best_per_year.index: + s = best_per_year.loc[yr] + r = best_ret.loc[yr] + b = spy_ret.loc[yr] + print(f"{yr:<6}{s:<22}{r*100:>9.2f}%{b*100:>9.2f}%{(r-b)*100:>9.2f}pp") + + # Average metrics per strategy + print("\n=== Full-period Summary (across years) ===") + summary = pd.DataFrame({ + "Avg Annual Return": strat_only.mean() * 100, + "Median": strat_only.median() * 100, + "Std": strat_only.std() * 100, + "Years Beat SPY": strat_only.gt(spy_ret, axis=0).sum(), + "Best Years": (strat_only.idxmax(axis=1).value_counts() + .reindex(strat_only.columns, fill_value=0)), + }) + summary = summary.sort_values("Avg Annual Return", ascending=False) + print(summary.round(2).to_string()) + + # Overall equity-curve CAGR (compound) across all available years + def cagr(col): + s = eq_df[col].dropna() + yrs = (s.index[-1] - s.index[0]).days / 365.25 + if yrs <= 0: + return np.nan + return (s.iloc[-1] / s.iloc[0]) ** (1 / yrs) - 1 + + print("\n=== Compound Over Full Window (CAGR, Max DD) ===") + cagr_rows = [] + for c in eq_df.columns: + s = eq_df[c].dropna() + cagr_rows.append({ + "Strategy": c, + "CAGR %": cagr(c) * 100, + "Max DD %": max_dd(s) * 100, + "Total %": (s.iloc[-1] / s.iloc[0] - 1) * 100, + }) + cagr_df = pd.DataFrame(cagr_rows).sort_values("CAGR %", ascending=False) + print(cagr_df.round(2).to_string(index=False)) + + # Best "average" strategy (by mean annual return across full years) + best_avg = summary["Avg Annual Return"].idxmax() + print(f"\n>>> Best average strategy: {best_avg} " + f"({summary.loc[best_avg, 'Avg Annual Return']:.2f}% avg annual return, " + f"beat SPY in {int(summary.loc[best_avg, 'Years Beat SPY'])}/{len(strat_only)} years)") + + # Save CSV + out = "data/yearly_sweep.csv" + yr_df.to_csv(out) + print(f"\nSaved yearly returns to {out}") + + +if __name__ == "__main__": + main()