From b9a2a6a57bbd65cd74a22db8e07f3427990b3f36 Mon Sep 17 00:00:00 2001
From: Gahow Wang <gahow.wang@gmail.com>
Date: Thu, 14 May 2026 12:53:32 +0800
Subject: [PATCH] feat: add yearly sweep script for parameter optimization

---
 yearly_sweep.py | 159 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 159 insertions(+)
 create mode 100644 yearly_sweep.py

diff --git a/yearly_sweep.py b/yearly_sweep.py
new file mode 100644
index 0000000..3757547
--- /dev/null
+++ b/yearly_sweep.py
@@ -0,0 +1,159 @@
+"""Run all US strategies and report yearly performance vs SPY."""
+import pandas as pd
+import numpy as np
+
+import data_manager
+from universe import UNIVERSES
+from strategies.adaptive_momentum import AdaptiveMomentumStrategy
+from strategies.buy_and_hold import BuyAndHoldStrategy
+from strategies.dual_momentum import DualMomentumStrategy
+from strategies.inverse_vol import InverseVolatilityStrategy
+from strategies.mean_reversion import MeanReversionStrategy
+from strategies.momentum import MomentumStrategy
+from strategies.momentum_quality import MomentumQualityStrategy
+from strategies.multi_factor import MultiFactorStrategy
+from strategies.recovery_momentum import RecoveryMomentumStrategy
+from strategies.trend_following import TrendFollowingStrategy
+from main import backtest
+
+
+def build_strategies(tickers, benchmark, data, top_n):
+    return {
+        "Buy & Hold (EW)":    (BuyAndHoldStrategy(),                                          data[tickers]),
+        "Momentum":           (MomentumStrategy(lookback=252, skip=21, top_n=top_n),          data[tickers]),
+        "Inverse Volatility": (InverseVolatilityStrategy(vol_window=20),                      data[tickers]),
+        "Multi-Factor":       (MultiFactorStrategy(tickers=tickers, benchmark=benchmark, top_n=top_n), data),
+        "Mean Reversion":     (MeanReversionStrategy(top_n=top_n),                            data[tickers]),
+        "Trend Following":    (TrendFollowingStrategy(ma_window=150, momentum_period=126, top_n=top_n), data[tickers]),
+        "Dual Momentum":      (DualMomentumStrategy(top_n=top_n),                             data[tickers]),
+        "Momentum+Quality":   (MomentumQualityStrategy(momentum_period=252, skip=21, top_n=top_n), data[tickers]),
+        "Mom+InvVol":         (AdaptiveMomentumStrategy(top_n=top_n),                         data[tickers]),
+        "Recovery+Mom Top20": (RecoveryMomentumStrategy(top_n=min(20, top_n)),                data[tickers]),
+        "Recovery+Mom Top10": (RecoveryMomentumStrategy(top_n=10),                            data[tickers]),
+    }
+
+
+def annual_return(eq: pd.Series) -> float:
+    return eq.iloc[-1] / eq.iloc[0] - 1
+
+
+def max_dd(eq: pd.Series) -> float:
+    return ((eq / eq.cummax()) - 1).min()
+
+
+def main():
+    universe = UNIVERSES["us"]
+    tickers = universe["fetch"]()
+    benchmark = universe["benchmark"]
+    all_tickers = sorted(set(tickers + [benchmark]))
+
+    data = data_manager.update("us", all_tickers, with_open=False)
+    tickers = [t for t in tickers if t in data.columns]
+    top_n = max(5, len(tickers) // 10)
+    print(f"Universe: {len(tickers)} stocks + {benchmark}. top_n={top_n}")
+    print(f"Data range: {data.index[0].date()} to {data.index[-1].date()}")
+
+    strategies = build_strategies(tickers, benchmark, data, top_n)
+
+    equity = {}
+    for name, (strat, strat_data) in strategies.items():
+        print(f"Running {name}...")
+        equity[name] = backtest(strat, strat_data, initial_capital=10_000)
+
+    # SPY benchmark normalized
+    bench = data[benchmark].dropna()
+    equity["SPY"] = (bench / bench.iloc[0]) * 10_000
+
+    eq_df = pd.DataFrame(equity).sort_index()
+
+    # Yearly returns table
+    years = list(range(2017, 2027))
+    rows = []
+    for yr in years:
+        start = pd.Timestamp(f"{yr}-01-01")
+        end = pd.Timestamp(f"{yr}-12-31")
+        window = eq_df.loc[(eq_df.index >= start) & (eq_df.index <= end)].dropna(how="all")
+        if window.empty:
+            continue
+        row = {"Year": yr}
+        for col in eq_df.columns:
+            s = window[col].dropna()
+            if len(s) < 2:
+                row[col] = np.nan
+            else:
+                row[col] = annual_return(s)
+        rows.append(row)
+
+    yr_df = pd.DataFrame(rows).set_index("Year")
+
+    # Excess over SPY
+    excess = yr_df.sub(yr_df["SPY"], axis=0).drop(columns=["SPY"])
+
+    print("\n=== Yearly Total Return (%) ===")
+    print((yr_df * 100).round(2).to_string())
+
+    print("\n=== Excess vs SPY (pp) ===")
+    print((excess * 100).round(2).to_string())
+
+    # Best strategy each year (excluding SPY)
+    strat_only = yr_df.drop(columns=["SPY"])
+    best_per_year = strat_only.idxmax(axis=1)
+    best_ret = strat_only.max(axis=1)
+    spy_ret = yr_df["SPY"]
+
+    print("\n=== Best Strategy per Year ===")
+    print(f"{'Year':<6}{'Strategy':<22}{'Return':>10}{'SPY':>10}{'Excess':>10}")
+    for yr in best_per_year.index:
+        s = best_per_year.loc[yr]
+        r = best_ret.loc[yr]
+        b = spy_ret.loc[yr]
+        print(f"{yr:<6}{s:<22}{r*100:>9.2f}%{b*100:>9.2f}%{(r-b)*100:>9.2f}pp")
+
+    # Average metrics per strategy
+    print("\n=== Full-period Summary (across years) ===")
+    summary = pd.DataFrame({
+        "Avg Annual Return": strat_only.mean() * 100,
+        "Median": strat_only.median() * 100,
+        "Std": strat_only.std() * 100,
+        "Years Beat SPY": strat_only.gt(spy_ret, axis=0).sum(),
+        "Best Years": (strat_only.idxmax(axis=1).value_counts()
+                       .reindex(strat_only.columns, fill_value=0)),
+    })
+    summary = summary.sort_values("Avg Annual Return", ascending=False)
+    print(summary.round(2).to_string())
+
+    # Overall equity-curve CAGR (compound) across all available years
+    def cagr(col):
+        s = eq_df[col].dropna()
+        yrs = (s.index[-1] - s.index[0]).days / 365.25
+        if yrs <= 0:
+            return np.nan
+        return (s.iloc[-1] / s.iloc[0]) ** (1 / yrs) - 1
+
+    print("\n=== Compound Over Full Window (CAGR, Max DD) ===")
+    cagr_rows = []
+    for c in eq_df.columns:
+        s = eq_df[c].dropna()
+        cagr_rows.append({
+            "Strategy": c,
+            "CAGR %": cagr(c) * 100,
+            "Max DD %": max_dd(s) * 100,
+            "Total %": (s.iloc[-1] / s.iloc[0] - 1) * 100,
+        })
+    cagr_df = pd.DataFrame(cagr_rows).sort_values("CAGR %", ascending=False)
+    print(cagr_df.round(2).to_string(index=False))
+
+    # Best "average" strategy (by mean annual return across full years)
+    best_avg = summary["Avg Annual Return"].idxmax()
+    print(f"\n>>> Best average strategy: {best_avg} "
+          f"({summary.loc[best_avg, 'Avg Annual Return']:.2f}% avg annual return, "
+          f"beat SPY in {int(summary.loc[best_avg, 'Years Beat SPY'])}/{len(strat_only)} years)")
+
+    # Save CSV
+    out = "data/yearly_sweep.csv"
+    yr_df.to_csv(out)
+    print(f"\nSaved yearly returns to {out}")
+
+
+if __name__ == "__main__":
+    main()