feat: add yearly sweep script for parameter optimization

This commit is contained in:
2026-05-14 12:53:32 +08:00
parent 24663ebd35
commit b9a2a6a57b

159
yearly_sweep.py Normal file
View File

@@ -0,0 +1,159 @@
"""Run all US strategies and report yearly performance vs SPY."""
import pandas as pd
import numpy as np
import data_manager
from universe import UNIVERSES
from strategies.adaptive_momentum import AdaptiveMomentumStrategy
from strategies.buy_and_hold import BuyAndHoldStrategy
from strategies.dual_momentum import DualMomentumStrategy
from strategies.inverse_vol import InverseVolatilityStrategy
from strategies.mean_reversion import MeanReversionStrategy
from strategies.momentum import MomentumStrategy
from strategies.momentum_quality import MomentumQualityStrategy
from strategies.multi_factor import MultiFactorStrategy
from strategies.recovery_momentum import RecoveryMomentumStrategy
from strategies.trend_following import TrendFollowingStrategy
from main import backtest
def build_strategies(tickers, benchmark, data, top_n):
return {
"Buy & Hold (EW)": (BuyAndHoldStrategy(), data[tickers]),
"Momentum": (MomentumStrategy(lookback=252, skip=21, top_n=top_n), data[tickers]),
"Inverse Volatility": (InverseVolatilityStrategy(vol_window=20), data[tickers]),
"Multi-Factor": (MultiFactorStrategy(tickers=tickers, benchmark=benchmark, top_n=top_n), data),
"Mean Reversion": (MeanReversionStrategy(top_n=top_n), data[tickers]),
"Trend Following": (TrendFollowingStrategy(ma_window=150, momentum_period=126, top_n=top_n), data[tickers]),
"Dual Momentum": (DualMomentumStrategy(top_n=top_n), data[tickers]),
"Momentum+Quality": (MomentumQualityStrategy(momentum_period=252, skip=21, top_n=top_n), data[tickers]),
"Mom+InvVol": (AdaptiveMomentumStrategy(top_n=top_n), data[tickers]),
"Recovery+Mom Top20": (RecoveryMomentumStrategy(top_n=min(20, top_n)), data[tickers]),
"Recovery+Mom Top10": (RecoveryMomentumStrategy(top_n=10), data[tickers]),
}
def annual_return(eq: pd.Series) -> float:
return eq.iloc[-1] / eq.iloc[0] - 1
def max_dd(eq: pd.Series) -> float:
return ((eq / eq.cummax()) - 1).min()
def main():
universe = UNIVERSES["us"]
tickers = universe["fetch"]()
benchmark = universe["benchmark"]
all_tickers = sorted(set(tickers + [benchmark]))
data = data_manager.update("us", all_tickers, with_open=False)
tickers = [t for t in tickers if t in data.columns]
top_n = max(5, len(tickers) // 10)
print(f"Universe: {len(tickers)} stocks + {benchmark}. top_n={top_n}")
print(f"Data range: {data.index[0].date()} to {data.index[-1].date()}")
strategies = build_strategies(tickers, benchmark, data, top_n)
equity = {}
for name, (strat, strat_data) in strategies.items():
print(f"Running {name}...")
equity[name] = backtest(strat, strat_data, initial_capital=10_000)
# SPY benchmark normalized
bench = data[benchmark].dropna()
equity["SPY"] = (bench / bench.iloc[0]) * 10_000
eq_df = pd.DataFrame(equity).sort_index()
# Yearly returns table
years = list(range(2017, 2027))
rows = []
for yr in years:
start = pd.Timestamp(f"{yr}-01-01")
end = pd.Timestamp(f"{yr}-12-31")
window = eq_df.loc[(eq_df.index >= start) & (eq_df.index <= end)].dropna(how="all")
if window.empty:
continue
row = {"Year": yr}
for col in eq_df.columns:
s = window[col].dropna()
if len(s) < 2:
row[col] = np.nan
else:
row[col] = annual_return(s)
rows.append(row)
yr_df = pd.DataFrame(rows).set_index("Year")
# Excess over SPY
excess = yr_df.sub(yr_df["SPY"], axis=0).drop(columns=["SPY"])
print("\n=== Yearly Total Return (%) ===")
print((yr_df * 100).round(2).to_string())
print("\n=== Excess vs SPY (pp) ===")
print((excess * 100).round(2).to_string())
# Best strategy each year (excluding SPY)
strat_only = yr_df.drop(columns=["SPY"])
best_per_year = strat_only.idxmax(axis=1)
best_ret = strat_only.max(axis=1)
spy_ret = yr_df["SPY"]
print("\n=== Best Strategy per Year ===")
print(f"{'Year':<6}{'Strategy':<22}{'Return':>10}{'SPY':>10}{'Excess':>10}")
for yr in best_per_year.index:
s = best_per_year.loc[yr]
r = best_ret.loc[yr]
b = spy_ret.loc[yr]
print(f"{yr:<6}{s:<22}{r*100:>9.2f}%{b*100:>9.2f}%{(r-b)*100:>9.2f}pp")
# Average metrics per strategy
print("\n=== Full-period Summary (across years) ===")
summary = pd.DataFrame({
"Avg Annual Return": strat_only.mean() * 100,
"Median": strat_only.median() * 100,
"Std": strat_only.std() * 100,
"Years Beat SPY": strat_only.gt(spy_ret, axis=0).sum(),
"Best Years": (strat_only.idxmax(axis=1).value_counts()
.reindex(strat_only.columns, fill_value=0)),
})
summary = summary.sort_values("Avg Annual Return", ascending=False)
print(summary.round(2).to_string())
# Overall equity-curve CAGR (compound) across all available years
def cagr(col):
s = eq_df[col].dropna()
yrs = (s.index[-1] - s.index[0]).days / 365.25
if yrs <= 0:
return np.nan
return (s.iloc[-1] / s.iloc[0]) ** (1 / yrs) - 1
print("\n=== Compound Over Full Window (CAGR, Max DD) ===")
cagr_rows = []
for c in eq_df.columns:
s = eq_df[c].dropna()
cagr_rows.append({
"Strategy": c,
"CAGR %": cagr(c) * 100,
"Max DD %": max_dd(s) * 100,
"Total %": (s.iloc[-1] / s.iloc[0] - 1) * 100,
})
cagr_df = pd.DataFrame(cagr_rows).sort_values("CAGR %", ascending=False)
print(cagr_df.round(2).to_string(index=False))
# Best "average" strategy (by mean annual return across full years)
best_avg = summary["Avg Annual Return"].idxmax()
print(f"\n>>> Best average strategy: {best_avg} "
f"({summary.loc[best_avg, 'Avg Annual Return']:.2f}% avg annual return, "
f"beat SPY in {int(summary.loc[best_avg, 'Years Beat SPY'])}/{len(strat_only)} years)")
# Save CSV
out = "data/yearly_sweep.csv"
yr_df.to_csv(out)
print(f"\nSaved yearly returns to {out}")
if __name__ == "__main__":
main()