research: add strategy evaluation and exploration scripts

Add 28 research scripts covering DCA simulation, momentum evaluation, Sharpe optimization, trend rider analysis, and US fundamentals exploration.
2026-05-14 12:53:19 +08:00
parent d086930ab3
commit 541f7bcf5b
28 changed files with 7062 additions and 0 deletions
--- a/research/strategy_improvement_r4.py
+++ b/research/strategy_improvement_r4.py
@@ -0,0 +1,174 @@
+"""
+Round 4 - Final iteration: Optimize the winning EnhFC strategy.
+
+Findings so far:
+- EnhFC Top10 mom20%: 45.8% CAGR, 1.27 Sharpe, -39.8% MaxDD, 1.15 Calmar
+- EnhFC Top15 mom20%: 40.6% CAGR, 1.25 Sharpe, -38.1% MaxDD, 1.07 Calmar
+
+Goal: Reduce MaxDD while preserving CAGR. Test:
+1. Tail protection variants (threshold / scale combinations)
+2. Top10 with tail protection
+3. Top12 as middle ground
+4. Different momentum weights
+"""
+
+import numpy as np
+import pandas as pd
+
+import data_manager
+from universe import UNIVERSES
+from main import backtest
+
+from strategies.factor_combo import FactorComboStrategy
+from strategies.recovery_momentum import RecoveryMomentumStrategy
+from strategies.ensemble_alpha import EnhancedFactorComboStrategy, EnsembleAlphaStrategy
+
+
+def annual_return(eq): return eq.iloc[-1] / eq.iloc[0] - 1
+def max_dd(eq): return ((eq / eq.cummax()) - 1).min()
+def sharpe(eq):
+    d = eq.pct_change().dropna()
+    return (d.mean() * 252) / (d.std() * np.sqrt(252)) if d.std() > 0 else 0
+def sortino(eq):
+    d = eq.pct_change().dropna()
+    ds = d[d < 0].std() * np.sqrt(252)
+    return (d.mean() * 252) / ds if ds > 0 else 0
+def cagr(eq):
+    yrs = (eq.index[-1] - eq.index[0]).days / 365.25
+    return (eq.iloc[-1] / eq.iloc[0]) ** (1 / yrs) - 1 if yrs > 0 else 0
+def calmar(eq):
+    dd = max_dd(eq)
+    return cagr(eq) / abs(dd) if dd < 0 else 0
+
+
+def main():
+    universe = UNIVERSES["us"]
+    tickers = universe["fetch"]()
+    benchmark = universe["benchmark"]
+    all_tickers = sorted(set(tickers + [benchmark]))
+
+    data = data_manager.update("us", all_tickers, with_open=False)
+    tickers = [t for t in tickers if t in data.columns]
+
+    print(f"Universe: {len(tickers)} stocks, data: {data.index[0].date()} to {data.index[-1].date()}")
+
+    strategies = {
+        # Baselines
+        "FactorCombo (orig)": (
+            FactorComboStrategy(signal_name="rec_mfilt+deep_upvol", rebal_freq=21, top_n=20),
+            data[tickers]
+        ),
+        "Recovery+Mom Top20": (
+            RecoveryMomentumStrategy(top_n=20),
+            data[tickers]
+        ),
+
+        # Winners from R3
+        "EnhFC Top10": (
+            EnhancedFactorComboStrategy(top_n=10, mom_boost=0.2, tail_protection=False),
+            data[tickers]
+        ),
+        "EnhFC Top15": (
+            EnhancedFactorComboStrategy(top_n=15, mom_boost=0.2, tail_protection=False),
+            data[tickers]
+        ),
+
+        # Top10 + tail protection variants
+        "EnhFC Top10 +Tail15/50": (
+            EnhancedFactorComboStrategy(top_n=10, mom_boost=0.2, tail_protection=True),
+            data[tickers]
+        ),
+
+        # Top12 as middle ground
+        "EnhFC Top12": (
+            EnhancedFactorComboStrategy(top_n=12, mom_boost=0.2, tail_protection=False),
+            data[tickers]
+        ),
+        "EnhFC Top12 mom15%": (
+            EnhancedFactorComboStrategy(top_n=12, mom_boost=0.15, tail_protection=False),
+            data[tickers]
+        ),
+        "EnhFC Top12 mom25%": (
+            EnhancedFactorComboStrategy(top_n=12, mom_boost=0.25, tail_protection=False),
+            data[tickers]
+        ),
+
+        # Ensemble variants
+        "Ensemble Top12": (
+            EnsembleAlphaStrategy(top_n=12, tail_protection=False),
+            data[tickers]
+        ),
+        "Ensemble Top10": (
+            EnsembleAlphaStrategy(top_n=10, tail_protection=False),
+            data[tickers]
+        ),
+        "Ensemble Top15 +Tail": (
+            EnsembleAlphaStrategy(top_n=15, tail_protection=True, tail_threshold=-0.12, tail_scale=0.4),
+            data[tickers]
+        ),
+    }
+
+    # Run
+    equity = {}
+    for name, (strat, strat_data) in strategies.items():
+        print(f"  {name}...")
+        equity[name] = backtest(strat, strat_data, initial_capital=10_000)
+
+    bench = data[benchmark].dropna()
+    equity["SPY"] = (bench / bench.iloc[0]) * 10_000
+    eq_df = pd.DataFrame(equity).sort_index()
+
+    # Yearly returns
+    years = sorted(eq_df.index.year.unique())
+    rows = []
+    for yr in years:
+        window = eq_df.loc[eq_df.index.year == yr].dropna(how="all")
+        if window.empty:
+            continue
+        row = {"Year": yr}
+        for col in eq_df.columns:
+            s = window[col].dropna()
+            row[col] = annual_return(s) if len(s) >= 2 else np.nan
+        rows.append(row)
+
+    yr_df = pd.DataFrame(rows).set_index("Year")
+    excess = yr_df.sub(yr_df["SPY"], axis=0).drop(columns=["SPY"])
+
+    print("\n" + "=" * 100)
+    print("YEARLY RETURNS (%)")
+    print("=" * 100)
+    print((yr_df * 100).round(1).to_string())
+
+    print("\n" + "=" * 100)
+    print("FULL-PERIOD METRICS (sorted by Calmar)")
+    print("=" * 100)
+    print(f"{'Strategy':<28s} {'CAGR%':>7s} {'Sharpe':>7s} {'Sortino':>8s} {'MaxDD%':>8s} {'Calmar':>7s} {'WinSPY':>7s}")
+    print("-" * 76)
+
+    results = []
+    for col in eq_df.columns:
+        eq = eq_df[col].dropna()
+        if len(eq) < 252:
+            continue
+        wins = (excess[col] > 0).sum() if col in excess.columns else 0
+        total = len(excess) if col in excess.columns else 0
+        results.append((col, cagr(eq)*100, sharpe(eq), sortino(eq), max_dd(eq)*100, calmar(eq), f"{wins}/{total}"))
+
+    results.sort(key=lambda x: -x[5])
+    for r in results:
+        print(f"{r[0]:<28s} {r[1]:>7.1f} {r[2]:>7.2f} {r[3]:>8.2f} {r[4]:>8.1f} {r[5]:>7.2f} {r[6]:>7s}")
+
+    # Highlight the best by different criteria
+    print("\n--- BEST BY CRITERIA ---")
+    best_cagr = max(results, key=lambda x: x[1])
+    best_sharpe = max(results, key=lambda x: x[2])
+    best_calmar = max(results, key=lambda x: x[5])
+    best_dd = min(results, key=lambda x: abs(x[4]))
+    print(f"  Best CAGR:   {best_cagr[0]} ({best_cagr[1]:.1f}%)")
+    print(f"  Best Sharpe: {best_sharpe[0]} ({best_sharpe[2]:.2f})")
+    print(f"  Best Calmar: {best_calmar[0]} ({best_calmar[5]:.2f})")
+    print(f"  Best MaxDD:  {best_dd[0]} ({best_dd[4]:.1f}%)")
+
+
+if __name__ == "__main__":
+    main()