Add 28 research scripts covering DCA simulation, momentum evaluation, Sharpe optimization, trend rider analysis, and US fundamentals exploration.
289 lines
9.7 KiB
Python
289 lines
9.7 KiB
Python
"""
|
|
Comprehensive strategy improvement evaluation.
|
|
|
|
Compares original strategies against improved versions, showing:
|
|
- Yearly returns (2016-2025)
|
|
- Key metrics (CAGR, Sharpe, MaxDD, Calmar)
|
|
- Excess over SPY
|
|
- Turnover analysis
|
|
"""
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
|
|
import data_manager
|
|
from universe import UNIVERSES
|
|
from main import backtest
|
|
|
|
# Original strategies
|
|
from strategies.momentum import MomentumStrategy
|
|
from strategies.recovery_momentum import RecoveryMomentumStrategy
|
|
from strategies.momentum_quality import MomentumQualityStrategy
|
|
from strategies.adaptive_momentum import AdaptiveMomentumStrategy
|
|
from strategies.dual_momentum import DualMomentumStrategy
|
|
from strategies.trend_following import TrendFollowingStrategy
|
|
from strategies.multi_factor import MultiFactorStrategy
|
|
from strategies.factor_combo import FactorComboStrategy
|
|
|
|
# Improved strategies
|
|
from strategies.enhanced_recovery_momentum import EnhancedRecoveryMomentumStrategy
|
|
from strategies.improved_momentum_quality import ImprovedMomentumQualityStrategy
|
|
from strategies.composite_alpha import CompositeAlphaStrategy
|
|
|
|
|
|
def annual_return(eq: pd.Series) -> float:
|
|
return eq.iloc[-1] / eq.iloc[0] - 1
|
|
|
|
|
|
def max_dd(eq: pd.Series) -> float:
|
|
return ((eq / eq.cummax()) - 1).min()
|
|
|
|
|
|
def sharpe(eq: pd.Series) -> float:
|
|
daily = eq.pct_change().dropna()
|
|
if daily.std() == 0:
|
|
return 0.0
|
|
return (daily.mean() * 252) / (daily.std() * np.sqrt(252))
|
|
|
|
|
|
def sortino(eq: pd.Series) -> float:
|
|
daily = eq.pct_change().dropna()
|
|
downside = daily[daily < 0].std() * np.sqrt(252)
|
|
if downside == 0:
|
|
return 0.0
|
|
return (daily.mean() * 252) / downside
|
|
|
|
|
|
def cagr(eq: pd.Series) -> float:
|
|
yrs = (eq.index[-1] - eq.index[0]).days / 365.25
|
|
if yrs <= 0:
|
|
return 0.0
|
|
return (eq.iloc[-1] / eq.iloc[0]) ** (1 / yrs) - 1
|
|
|
|
|
|
def turnover(weights: pd.DataFrame) -> float:
|
|
"""Average daily turnover."""
|
|
return weights.diff().abs().sum(axis=1).mean()
|
|
|
|
|
|
def main():
|
|
# --- Load data ---
|
|
universe = UNIVERSES["us"]
|
|
tickers = universe["fetch"]()
|
|
benchmark = universe["benchmark"]
|
|
all_tickers = sorted(set(tickers + [benchmark]))
|
|
|
|
data = data_manager.update("us", all_tickers, with_open=False)
|
|
tickers = [t for t in tickers if t in data.columns]
|
|
top_n = max(5, len(tickers) // 10)
|
|
|
|
print(f"Universe: {len(tickers)} stocks + {benchmark}. top_n={top_n}")
|
|
print(f"Data range: {data.index[0].date()} to {data.index[-1].date()}")
|
|
|
|
# --- Build strategies ---
|
|
strategies = {
|
|
# === ORIGINALS ===
|
|
"Momentum (orig)": (
|
|
MomentumStrategy(lookback=252, skip=21, top_n=top_n),
|
|
data[tickers]
|
|
),
|
|
"Recovery+Mom Top20 (orig)": (
|
|
RecoveryMomentumStrategy(top_n=20),
|
|
data[tickers]
|
|
),
|
|
"Mom+Quality (orig)": (
|
|
MomentumQualityStrategy(momentum_period=252, skip=21, top_n=top_n),
|
|
data[tickers]
|
|
),
|
|
"Mom+InvVol (orig)": (
|
|
AdaptiveMomentumStrategy(top_n=top_n),
|
|
data[tickers]
|
|
),
|
|
"Dual Momentum (orig)": (
|
|
DualMomentumStrategy(top_n=top_n),
|
|
data[tickers]
|
|
),
|
|
"Trend Following (orig)": (
|
|
TrendFollowingStrategy(ma_window=150, momentum_period=126, top_n=top_n),
|
|
data[tickers]
|
|
),
|
|
"Multi-Factor (orig)": (
|
|
MultiFactorStrategy(tickers=tickers, benchmark=benchmark, top_n=top_n),
|
|
data
|
|
),
|
|
"FactorCombo rec+deep (orig)": (
|
|
FactorComboStrategy(signal_name="rec_mfilt+deep_upvol", rebal_freq=21, top_n=20),
|
|
data[tickers]
|
|
),
|
|
|
|
# === IMPROVED ===
|
|
"Enhanced RecMom Top20": (
|
|
EnhancedRecoveryMomentumStrategy(
|
|
recovery_window=63, mom_lookback=252, mom_skip=21,
|
|
intermediate_mom=126, vol_window=60,
|
|
rebal_freq=21, top_n=20, regime_scale=True
|
|
),
|
|
data[tickers]
|
|
),
|
|
"Enhanced RecMom Top30": (
|
|
EnhancedRecoveryMomentumStrategy(
|
|
recovery_window=63, mom_lookback=252, mom_skip=21,
|
|
intermediate_mom=126, vol_window=60,
|
|
rebal_freq=21, top_n=30, regime_scale=True
|
|
),
|
|
data[tickers]
|
|
),
|
|
"Improved MomQuality": (
|
|
ImprovedMomentumQualityStrategy(
|
|
momentum_period=252, skip=21, quality_window=252,
|
|
recovery_window=63, vol_window=60, rebal_freq=21, top_n=20
|
|
),
|
|
data[tickers]
|
|
),
|
|
"Improved MomQuality Top30": (
|
|
ImprovedMomentumQualityStrategy(
|
|
momentum_period=252, skip=21, quality_window=252,
|
|
recovery_window=63, vol_window=60, rebal_freq=21, top_n=30
|
|
),
|
|
data[tickers]
|
|
),
|
|
"Composite Alpha": (
|
|
CompositeAlphaStrategy(
|
|
tickers=tickers, benchmark=benchmark,
|
|
recovery_window=63, intermediate_period=147, skip=21,
|
|
quality_window=252, vol_window=60,
|
|
rebal_freq=10, top_n=20, regime_gate=True
|
|
),
|
|
data
|
|
),
|
|
"Composite Alpha Top30": (
|
|
CompositeAlphaStrategy(
|
|
tickers=tickers, benchmark=benchmark,
|
|
recovery_window=63, intermediate_period=147, skip=21,
|
|
quality_window=252, vol_window=60,
|
|
rebal_freq=10, top_n=30, regime_gate=True
|
|
),
|
|
data
|
|
),
|
|
"Composite Alpha NoRegime": (
|
|
CompositeAlphaStrategy(
|
|
tickers=tickers, benchmark=benchmark,
|
|
recovery_window=63, intermediate_period=147, skip=21,
|
|
quality_window=252, vol_window=60,
|
|
rebal_freq=10, top_n=20, regime_gate=False
|
|
),
|
|
data
|
|
),
|
|
}
|
|
|
|
# --- Run backtests ---
|
|
equity = {}
|
|
for name, (strat, strat_data) in strategies.items():
|
|
print(f"Running {name}...")
|
|
equity[name] = backtest(strat, strat_data, initial_capital=10_000)
|
|
|
|
# SPY benchmark
|
|
bench = data[benchmark].dropna()
|
|
equity["SPY"] = (bench / bench.iloc[0]) * 10_000
|
|
|
|
eq_df = pd.DataFrame(equity).sort_index()
|
|
|
|
# --- Yearly returns table ---
|
|
years = list(range(2016, 2027))
|
|
rows = []
|
|
for yr in years:
|
|
start = pd.Timestamp(f"{yr}-01-01")
|
|
end = pd.Timestamp(f"{yr}-12-31")
|
|
window = eq_df.loc[(eq_df.index >= start) & (eq_df.index <= end)].dropna(how="all")
|
|
if window.empty:
|
|
continue
|
|
row = {"Year": yr}
|
|
for col in eq_df.columns:
|
|
s = window[col].dropna()
|
|
if len(s) < 2:
|
|
row[col] = np.nan
|
|
else:
|
|
row[col] = annual_return(s)
|
|
rows.append(row)
|
|
|
|
yr_df = pd.DataFrame(rows).set_index("Year")
|
|
|
|
# --- Print results ---
|
|
print("\n" + "=" * 80)
|
|
print("YEARLY TOTAL RETURN (%)")
|
|
print("=" * 80)
|
|
print((yr_df * 100).round(2).to_string())
|
|
|
|
# Excess over SPY
|
|
excess = yr_df.sub(yr_df["SPY"], axis=0).drop(columns=["SPY"])
|
|
print("\n" + "=" * 80)
|
|
print("EXCESS vs SPY (percentage points)")
|
|
print("=" * 80)
|
|
print((excess * 100).round(2).to_string())
|
|
|
|
# --- Full-period summary ---
|
|
print("\n" + "=" * 80)
|
|
print("FULL-PERIOD METRICS")
|
|
print("=" * 80)
|
|
|
|
summary_rows = []
|
|
for col in eq_df.columns:
|
|
eq = eq_df[col].dropna()
|
|
if len(eq) < 252:
|
|
continue
|
|
summary_rows.append({
|
|
"Strategy": col,
|
|
"CAGR %": cagr(eq) * 100,
|
|
"Sharpe": sharpe(eq),
|
|
"Sortino": sortino(eq),
|
|
"Max DD %": max_dd(eq) * 100,
|
|
"Calmar": cagr(eq) / abs(max_dd(eq)) if max_dd(eq) < 0 else 0,
|
|
"Avg Ann Ret %": yr_df[col].mean() * 100 if col in yr_df.columns else np.nan,
|
|
"Win Rate vs SPY": (excess[col] > 0).mean() * 100 if col in excess.columns else np.nan,
|
|
})
|
|
|
|
summary = pd.DataFrame(summary_rows).sort_values("CAGR %", ascending=False)
|
|
pd.set_option('display.max_columns', None)
|
|
pd.set_option('display.width', 200)
|
|
print(summary.round(2).to_string(index=False))
|
|
|
|
# --- Comparison: Improved vs Original ---
|
|
print("\n" + "=" * 80)
|
|
print("IMPROVEMENT ANALYSIS (best improved vs best original)")
|
|
print("=" * 80)
|
|
|
|
orig_cols = [c for c in eq_df.columns if "(orig)" in c]
|
|
improved_cols = [c for c in eq_df.columns if c not in orig_cols and c != "SPY"]
|
|
|
|
if orig_cols and improved_cols:
|
|
best_orig = max(orig_cols, key=lambda c: cagr(eq_df[c].dropna()))
|
|
best_improved = max(improved_cols, key=lambda c: cagr(eq_df[c].dropna()))
|
|
|
|
orig_eq = eq_df[best_orig].dropna()
|
|
imp_eq = eq_df[best_improved].dropna()
|
|
|
|
print(f"\nBest original: {best_orig}")
|
|
print(f" CAGR={cagr(orig_eq)*100:.2f}% Sharpe={sharpe(orig_eq):.2f} "
|
|
f"MaxDD={max_dd(orig_eq)*100:.2f}% Calmar={cagr(orig_eq)/abs(max_dd(orig_eq)):.2f}")
|
|
print(f"\nBest improved: {best_improved}")
|
|
print(f" CAGR={cagr(imp_eq)*100:.2f}% Sharpe={sharpe(imp_eq):.2f} "
|
|
f"MaxDD={max_dd(imp_eq)*100:.2f}% Calmar={cagr(imp_eq)/abs(max_dd(imp_eq)):.2f}")
|
|
|
|
cagr_diff = (cagr(imp_eq) - cagr(orig_eq)) * 100
|
|
sharpe_diff = sharpe(imp_eq) - sharpe(orig_eq)
|
|
dd_diff = (max_dd(imp_eq) - max_dd(orig_eq)) * 100
|
|
print(f"\nDelta: CAGR {cagr_diff:+.2f}pp Sharpe {sharpe_diff:+.2f} MaxDD {dd_diff:+.2f}pp")
|
|
|
|
# --- Save results ---
|
|
out_path = "data/strategy_improvement_results.csv"
|
|
yr_df.to_csv(out_path)
|
|
print(f"\nSaved yearly returns to {out_path}")
|
|
|
|
summary_path = "data/strategy_improvement_summary.csv"
|
|
summary.to_csv(summary_path, index=False)
|
|
print(f"Saved summary to {summary_path}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|