Files
quant/research/strategy_sharpe_boost.py
Gahow Wang 541f7bcf5b research: add strategy evaluation and exploration scripts
Add 28 research scripts covering DCA simulation, momentum evaluation,
Sharpe optimization, trend rider analysis, and US fundamentals exploration.
2026-05-14 12:54:08 +08:00

292 lines
10 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Sharpe boost research: blend pure momentum into the Ensemble signal.
Root cause of Sharpe=1.32 (not 1.5+):
- 2021: recovery signals returned +3% vs SPY +30.5%
- In low-vol steady uptrends, "bouncing from bottom" stocks don't exist
- Pure 12-1 momentum captures "steady grinders" that do well in these regimes
Approach: Add a 3rd signal (pure momentum rank) to the ensemble with weight α,
reducing existing signals to (1-α)/2 each.
Test α{0.20, 0.25, 0.30, 0.35, 0.40} and pick the one that maximizes Sharpe
without materially hurting CAGR.
Also test: market-DD dampener ON TOP of the blended signal (risk-managed version).
"""
from __future__ import annotations
import os
import sys
import numpy as np
import pandas as pd
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from strategies.base import Strategy
def _rank(df):
return df.rank(axis=1, pct=True, na_option="keep")
class MomentumBlendEnsembleStrategy(Strategy):
"""
Ensemble of 3 signals: rec_mfilt+deep_upvol, recovery63+mom, pure momentum.
The pure momentum signal provides diversification in low-vol steady trends.
"""
def __init__(
self,
rebal_freq: int = 21,
top_n: int = 10,
mom_blend: float = 0.30, # weight on pure momentum signal
dd_floor: float = 0.40,
dd_denom: float = 0.20,
risk_managed: bool = True,
):
self.rebal_freq = rebal_freq
self.top_n = top_n
self.mom_blend = mom_blend
self.dd_floor = dd_floor
self.dd_denom = dd_denom
self.risk_managed = risk_managed
def generate_signals(self, data: pd.DataFrame) -> pd.DataFrame:
p = data
# === Signal A: rec_mfilt + deep_upvol ===
rec_126 = p / p.rolling(126, min_periods=126).min() - 1
mom_filter = p.shift(21).pct_change(105)
rec_mfilt = rec_126.where(mom_filter > 0, np.nan)
rec_mfilt_r = _rank(rec_mfilt)
ret = p.pct_change()
up_vol = ret.where(ret > 0, 0).rolling(20, min_periods=15).sum()
deep_upvol = _rank(rec_126) * _rank(up_vol)
deep_upvol_r = _rank(deep_upvol)
signal_a = 0.5 * rec_mfilt_r + 0.5 * deep_upvol_r
# === Signal B: Recovery 63d + 12-1 momentum ===
rec_63 = p / p.rolling(63, min_periods=63).min() - 1
mom_12_1 = p.shift(21).pct_change(231)
rec_63_r = _rank(rec_63)
mom_r = _rank(mom_12_1)
signal_b = 0.5 * rec_63_r + 0.5 * mom_r
# === Signal C: Pure 12-1 momentum (diversification in melt-ups) ===
signal_c = mom_r # already computed above
# === Ensemble: weighted average ===
α = self.mom_blend
ensemble = (1 - α) / 2.0 * signal_a + (1 - α) / 2.0 * signal_b + α * signal_c
# === Select top_n ===
rank = ensemble.rank(axis=1, ascending=False, na_option="bottom")
n_valid = ensemble.notna().sum(axis=1)
enough = n_valid >= self.top_n
top_mask = (rank <= self.top_n) & enough.values.reshape(-1, 1)
# Equal weight
raw = top_mask.astype(float)
row_sums = raw.sum(axis=1).replace(0, np.nan)
signals = raw.div(row_sums, axis=0).fillna(0.0)
# === Monthly rebalance ===
warmup = 252
rebal_mask = pd.Series(False, index=data.index)
rebal_indices = list(range(warmup, len(data), self.rebal_freq))
rebal_mask.iloc[rebal_indices] = True
signals[~rebal_mask] = np.nan
signals = signals.ffill().fillna(0.0)
signals.iloc[:warmup] = 0.0
signals = signals.shift(1).fillna(0.0) # PIT
# === Risk management: market-DD dampener ===
if self.risk_managed:
daily_rets = data.pct_change().fillna(0.0)
mkt_rets = daily_rets.mean(axis=1)
mkt_eq = (1 + mkt_rets).cumprod()
mkt_dd = mkt_eq / mkt_eq.cummax() - 1
dd_scale = (1.0 + mkt_dd / self.dd_denom).clip(
lower=self.dd_floor, upper=1.0
)
dd_scale_lagged = dd_scale.shift(1).fillna(1.0)
signals = signals.mul(dd_scale_lagged, axis=0)
return signals
# ---------------------------------------------------------------------------
# Evaluation
# ---------------------------------------------------------------------------
def compute_metrics(daily_rets: pd.Series) -> dict:
"""Compute standard performance metrics from daily returns."""
eq = (1 + daily_rets).cumprod()
n_years = len(daily_rets) / 252.0
cagr = eq.iloc[-1] ** (1.0 / n_years) - 1.0
vol = daily_rets.std() * np.sqrt(252)
sharpe = daily_rets.mean() / daily_rets.std() * np.sqrt(252) if daily_rets.std() > 0 else 0
running_max = eq.cummax()
dd = eq / running_max - 1
max_dd = dd.min()
calmar = cagr / abs(max_dd) if max_dd != 0 else 0
return {
"cagr": cagr,
"vol": vol,
"sharpe": sharpe,
"max_dd": max_dd,
"calmar": calmar,
}
def yearly_returns(daily_rets: pd.Series) -> pd.Series:
"""Compute annual returns."""
eq = (1 + daily_rets).cumprod()
yearly = eq.resample("YE").last().pct_change()
yearly.iloc[0] = eq.resample("YE").last().iloc[0] - 1
yearly.index = yearly.index.year
return yearly
_DATA_CACHE = {}
def backtest_strategy(strategy, start="2016-04-01", end="2026-05-13"):
"""Run backtest and return daily portfolio returns."""
import data_manager
if "data" not in _DATA_CACHE:
from universe import get_sp500
tickers = get_sp500()
data_manager.update("us", tickers)
_DATA_CACHE["data"] = data_manager.load("us")
data = _DATA_CACHE["data"]
if data is None:
raise RuntimeError("No data loaded")
weights = strategy.generate_signals(data)
daily_rets = (weights * data.pct_change().fillna(0.0)).sum(axis=1)
# Trim to evaluation period
daily_rets = daily_rets.loc[start:end]
return daily_rets
def main():
print("=" * 80)
print("SHARPE BOOST: Momentum blend into Ensemble signal")
print("=" * 80)
# --- Parameter sweep: mom_blend ---
blends = [0.0, 0.15, 0.20, 0.25, 0.30, 0.35, 0.40]
print("\n--- Sweep: mom_blend (risk_managed=False) ---")
print(f"{'blend':>6s} {'CAGR':>7s} {'Vol':>7s} {'Sharpe':>7s} {'MaxDD':>7s} {'Calmar':>7s}")
print("-" * 50)
results_no_rm = {}
for α in blends:
strat = MomentumBlendEnsembleStrategy(
top_n=10, mom_blend=α, risk_managed=False
)
rets = backtest_strategy(strat)
m = compute_metrics(rets)
results_no_rm[α] = {"rets": rets, "metrics": m}
print(
f"{α:>6.2f} {m['cagr']*100:>6.1f}% {m['vol']*100:>6.1f}% "
f"{m['sharpe']:>7.2f} {m['max_dd']*100:>6.1f}% {m['calmar']:>7.2f}"
)
print("\n--- Sweep: mom_blend (risk_managed=True, dd_floor=0.40, dd_denom=0.20) ---")
print(f"{'blend':>6s} {'CAGR':>7s} {'Vol':>7s} {'Sharpe':>7s} {'MaxDD':>7s} {'Calmar':>7s}")
print("-" * 50)
results_rm = {}
for α in blends:
strat = MomentumBlendEnsembleStrategy(
top_n=10, mom_blend=α, risk_managed=True
)
rets = backtest_strategy(strat)
m = compute_metrics(rets)
results_rm[α] = {"rets": rets, "metrics": m}
print(
f"{α:>6.2f} {m['cagr']*100:>6.1f}% {m['vol']*100:>6.1f}% "
f"{m['sharpe']:>7.2f} {m['max_dd']*100:>6.1f}% {m['calmar']:>7.2f}"
)
# --- Best config: yearly breakdown ---
best_α = max(results_rm, key=lambda k: results_rm[k]["metrics"]["sharpe"])
print(f"\n{'=' * 80}")
print(f"BEST CONFIG: mom_blend={best_α:.2f} + risk_managed=True")
print(f"{'=' * 80}")
best_rets = results_rm[best_α]["rets"]
best_m = results_rm[best_α]["metrics"]
print(f"CAGR: {best_m['cagr']*100:.1f}% Vol: {best_m['vol']*100:.1f}% "
f"Sharpe: {best_m['sharpe']:.2f} MaxDD: {best_m['max_dd']*100:.1f}% "
f"Calmar: {best_m['calmar']:.2f}")
print("\n--- Yearly returns ---")
yr = yearly_returns(best_rets)
for year, ret in yr.items():
print(f" {year}: {ret*100:>+7.1f}%")
# --- IS/OOS validation ---
print(f"\n{'=' * 80}")
print("IS/OOS VALIDATION")
print(f"{'=' * 80}")
strat_best = MomentumBlendEnsembleStrategy(
top_n=10, mom_blend=best_α, risk_managed=True
)
is_rets = backtest_strategy(strat_best, start="2016-04-01", end="2022-12-31")
oos_rets = backtest_strategy(strat_best, start="2023-01-01", end="2026-05-13")
is_m = compute_metrics(is_rets)
oos_m = compute_metrics(oos_rets)
print(f" IS (2016-2022): CAGR {is_m['cagr']*100:.1f}% Sharpe {is_m['sharpe']:.2f} MaxDD {is_m['max_dd']*100:.1f}%")
print(f" OOS (2023-2026): CAGR {oos_m['cagr']*100:.1f}% Sharpe {oos_m['sharpe']:.2f} MaxDD {oos_m['max_dd']*100:.1f}%")
print(f" OOS/IS CAGR ratio: {oos_m['cagr']/is_m['cagr']:.2f}")
print(f" OOS/IS Sharpe ratio: {oos_m['sharpe']/is_m['sharpe']:.2f}")
# --- Bootstrap confidence intervals ---
print(f"\n{'=' * 80}")
print("BLOCK BOOTSTRAP (5000 resamples, block=21 days)")
print(f"{'=' * 80}")
from research.trend_rider_p0 import block_bootstrap, bootstrap_summary
boot = block_bootstrap(best_rets, n_boot=5000, block_len=21)
summary = bootstrap_summary(boot)
print(summary[["p0250", "p0500", "mean", "p0500", "p0750", "p0950"]].to_string())
print(f"\n P(Sharpe < 1.0): {(boot['sharpe'] < 1.0).mean()*100:.1f}%")
print(f" P(Sharpe < 1.5): {(boot['sharpe'] < 1.5).mean()*100:.1f}%")
print(f" P(MaxDD > 30%): {(boot['max_drawdown'].abs() > 0.30).mean()*100:.1f}%")
print(f" P(MaxDD > 25%): {(boot['max_drawdown'].abs() > 0.25).mean()*100:.1f}%")
# --- Compare with baseline (no momentum blend) ---
print(f"\n{'=' * 80}")
print("COMPARISON: Baseline (α=0) vs Best (α={best_α:.2f})")
print(f"{'=' * 80}")
base_m = results_rm[0.0]["metrics"]
print(f" Baseline: CAGR {base_m['cagr']*100:.1f}% Sharpe {base_m['sharpe']:.2f} MaxDD {base_m['max_dd']*100:.1f}%")
print(f" Best: CAGR {best_m['cagr']*100:.1f}% Sharpe {best_m['sharpe']:.2f} MaxDD {best_m['max_dd']*100:.1f}%")
print(f" Δ Sharpe: {best_m['sharpe'] - base_m['sharpe']:+.2f}")
print(f" Δ CAGR: {(best_m['cagr'] - base_m['cagr'])*100:+.1f}pp")
if __name__ == "__main__":
main()