Files
quant/research/strategy_sharpe_boost_v3.py
Gahow Wang 541f7bcf5b research: add strategy evaluation and exploration scripts
Add 28 research scripts covering DCA simulation, momentum evaluation,
Sharpe optimization, trend rider analysis, and US fundamentals exploration.
2026-05-14 12:54:08 +08:00

277 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Sharpe boost v3: Concentration + rebalance frequency + trailing alpha.
Previous findings:
- Momentum blend: Sharpe 1.34 → 1.37 (marginal)
- Dispersion filter: Sharpe 1.34 → 1.31 (worse)
- 2021 problem is NOT about dispersion or vol — it's narrow mega-cap rally
New ideas to test:
1. Higher concentration (top_n=8) → more alpha per stock if signal is good
2. Shorter rebalance (14 days) → capture alpha faster, reduce stale positions
3. Trailing alpha gate: if strategy's 63-day return < market's 63-day return
by >20pp, reduce exposure (signal currently uninformative)
4. Asymmetric vol scaling: only scale down when vol is high AND returns negative
(high vol + positive = good! don't cut that)
"""
from __future__ import annotations
import os
import sys
import numpy as np
import pandas as pd
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from strategies.base import Strategy
def _rank(df):
return df.rank(axis=1, pct=True, na_option="keep")
def compute_metrics(daily_rets: pd.Series) -> dict:
eq = (1 + daily_rets).cumprod()
n_years = len(daily_rets) / 252.0
cagr = eq.iloc[-1] ** (1.0 / n_years) - 1.0
vol = daily_rets.std() * np.sqrt(252)
sharpe = daily_rets.mean() / daily_rets.std() * np.sqrt(252) if daily_rets.std() > 0 else 0
running_max = eq.cummax()
dd = eq / running_max - 1
max_dd = dd.min()
calmar = cagr / abs(max_dd) if max_dd != 0 else 0
return {"cagr": cagr, "vol": vol, "sharpe": sharpe, "max_dd": max_dd, "calmar": calmar}
def yearly_returns(daily_rets: pd.Series) -> pd.Series:
eq = (1 + daily_rets).cumprod()
yearly = eq.resample("YE").last().pct_change()
yearly.iloc[0] = eq.resample("YE").last().iloc[0] - 1
yearly.index = yearly.index.year
return yearly
class EnsembleV2(Strategy):
"""Parameterized ensemble for testing concentration / rebalance / alpha gate."""
def __init__(self, top_n=10, rebal_freq=21, mom_blend=0.0,
alpha_gate=False, alpha_gate_threshold=-0.20,
alpha_gate_window=63, alpha_gate_floor=0.50,
asym_vol=False, asym_vol_window=20, asym_vol_floor=0.50):
self.top_n = top_n
self.rebal_freq = rebal_freq
self.mom_blend = mom_blend
self.alpha_gate = alpha_gate
self.alpha_gate_threshold = alpha_gate_threshold
self.alpha_gate_window = alpha_gate_window
self.alpha_gate_floor = alpha_gate_floor
self.asym_vol = asym_vol
self.asym_vol_window = asym_vol_window
self.asym_vol_floor = asym_vol_floor
def generate_signals(self, data: pd.DataFrame) -> pd.DataFrame:
p = data
ret = p.pct_change()
# === Signal A: rec_mfilt + deep_upvol ===
rec_126 = p / p.rolling(126, min_periods=126).min() - 1
mom_filter = p.shift(21).pct_change(105)
rec_mfilt = rec_126.where(mom_filter > 0, np.nan)
rec_mfilt_r = _rank(rec_mfilt)
up_vol = ret.where(ret > 0, 0).rolling(20, min_periods=15).sum()
deep_upvol = _rank(rec_126) * _rank(up_vol)
deep_upvol_r = _rank(deep_upvol)
signal_a = 0.5 * rec_mfilt_r + 0.5 * deep_upvol_r
# === Signal B: Recovery 63d + 12-1 momentum ===
rec_63 = p / p.rolling(63, min_periods=63).min() - 1
mom_12_1 = p.shift(21).pct_change(231)
rec_63_r = _rank(rec_63)
mom_r = _rank(mom_12_1)
signal_b = 0.5 * rec_63_r + 0.5 * mom_r
# === Signal C: Pure momentum ===
signal_c = mom_r
# === Ensemble ===
α = self.mom_blend
if α > 0:
ensemble = (1 - α) / 2 * signal_a + (1 - α) / 2 * signal_b + α * signal_c
else:
ensemble = 0.5 * signal_a + 0.5 * signal_b
# === Select top_n ===
rank = ensemble.rank(axis=1, ascending=False, na_option="bottom")
n_valid = ensemble.notna().sum(axis=1)
enough = n_valid >= self.top_n
top_mask = (rank <= self.top_n) & enough.values.reshape(-1, 1)
raw = top_mask.astype(float)
row_sums = raw.sum(axis=1).replace(0, np.nan)
signals = raw.div(row_sums, axis=0).fillna(0.0)
# === Rebalance ===
warmup = 252
rebal_mask = pd.Series(False, index=data.index)
rebal_indices = list(range(warmup, len(data), self.rebal_freq))
rebal_mask.iloc[rebal_indices] = True
signals[~rebal_mask] = np.nan
signals = signals.ffill().fillna(0.0)
signals.iloc[:warmup] = 0.0
signals = signals.shift(1).fillna(0.0) # PIT
# === Alpha gate: reduce when trailing alpha is very negative ===
if self.alpha_gate:
daily_rets = data.pct_change().fillna(0.0)
port_rets = (signals * daily_rets).sum(axis=1)
mkt_rets = daily_rets.mean(axis=1)
# Trailing excess return over market
trail_port = port_rets.rolling(self.alpha_gate_window, min_periods=21).sum()
trail_mkt = mkt_rets.rolling(self.alpha_gate_window, min_periods=21).sum()
excess = trail_port - trail_mkt
# When deeply underperforming → scale down
gate_active = excess < self.alpha_gate_threshold
gate_scale = pd.Series(1.0, index=data.index)
gate_scale[gate_active] = self.alpha_gate_floor
gate_scale_lagged = gate_scale.shift(1).fillna(1.0) # PIT
signals = signals.mul(gate_scale_lagged, axis=0)
# === Asymmetric vol scaling ===
if self.asym_vol:
daily_rets = data.pct_change().fillna(0.0)
port_rets = (signals * daily_rets).sum(axis=1)
short_vol = port_rets.rolling(self.asym_vol_window, min_periods=10).std() * np.sqrt(252)
vol_median = short_vol.rolling(252, min_periods=126).median()
# Only scale down when vol is high AND recent returns are negative
recent_ret = port_rets.rolling(self.asym_vol_window, min_periods=10).sum()
high_vol_neg_ret = (short_vol > vol_median * 1.5) & (recent_ret < 0)
asym_scale = pd.Series(1.0, index=data.index)
asym_scale[high_vol_neg_ret] = self.asym_vol_floor
asym_scale_lagged = asym_scale.shift(1).fillna(1.0)
signals = signals.mul(asym_scale_lagged, axis=0)
return signals
_DATA_CACHE = {}
def backtest_strategy(strategy, start="2016-04-01", end="2026-05-13"):
import data_manager
if "data" not in _DATA_CACHE:
from universe import get_sp500
tickers = get_sp500()
data_manager.update("us", tickers)
_DATA_CACHE["data"] = data_manager.load("us")
data = _DATA_CACHE["data"]
weights = strategy.generate_signals(data)
daily_rets = (weights * data.pct_change().fillna(0.0)).sum(axis=1)
return daily_rets.loc[start:end]
def fmt_row(label, m):
return (f"{label:<40s} {m['cagr']*100:>6.1f}% {m['vol']*100:>6.1f}% "
f"{m['sharpe']:>6.2f} {m['max_dd']*100:>6.1f}% {m['calmar']:>6.2f}")
def main():
print("=" * 80)
print("SHARPE BOOST v3: Concentration / Rebalance / Alpha Gate / Asym Vol")
print("=" * 80)
header = f"{'Config':<40s} {'CAGR':>7s} {'Vol':>7s} {'Sharpe':>6s} {'MaxDD':>7s} {'Calmar':>6s}"
# --- Sweep 1: Concentration (top_n) ---
print(f"\n--- Concentration sweep (rebal=21, no risk mgmt) ---")
print(header)
print("-" * 80)
for n in [6, 8, 10, 12, 15]:
strat = EnsembleV2(top_n=n, rebal_freq=21)
rets = backtest_strategy(strat)
m = compute_metrics(rets)
print(fmt_row(f"top_n={n}", m))
# --- Sweep 2: Rebalance frequency ---
print(f"\n--- Rebalance frequency sweep (top_n=10) ---")
print(header)
print("-" * 80)
for freq in [5, 10, 14, 21, 42]:
strat = EnsembleV2(top_n=10, rebal_freq=freq)
rets = backtest_strategy(strat)
m = compute_metrics(rets)
print(fmt_row(f"rebal={freq}d", m))
# --- Sweep 3: Momentum blend + concentration ---
print(f"\n--- Momentum blend + concentration (rebal=14) ---")
print(header)
print("-" * 80)
for n in [8, 10]:
for α in [0.0, 0.20, 0.30]:
strat = EnsembleV2(top_n=n, rebal_freq=14, mom_blend=α)
rets = backtest_strategy(strat)
m = compute_metrics(rets)
print(fmt_row(f"top_n={n}, mom={α:.0%}, rebal=14", m))
# --- Sweep 4: Alpha gate ---
print(f"\n--- Alpha gate (top_n=10, rebal=21) ---")
print(header)
print("-" * 80)
for thresh in [-0.10, -0.15, -0.20]:
for floor in [0.30, 0.50]:
strat = EnsembleV2(top_n=10, rebal_freq=21, alpha_gate=True,
alpha_gate_threshold=thresh, alpha_gate_floor=floor)
rets = backtest_strategy(strat)
m = compute_metrics(rets)
print(fmt_row(f"alpha_gate thresh={thresh}, floor={floor}", m))
# --- Sweep 5: Asymmetric vol ---
print(f"\n--- Asymmetric vol (top_n=10, rebal=21) ---")
print(header)
print("-" * 80)
for floor in [0.30, 0.50, 0.70]:
strat = EnsembleV2(top_n=10, rebal_freq=21, asym_vol=True, asym_vol_floor=floor)
rets = backtest_strategy(strat)
m = compute_metrics(rets)
print(fmt_row(f"asym_vol floor={floor}", m))
# --- Best combo: everything together ---
print(f"\n{'=' * 80}")
print("COMBO: Best of each mechanism together")
print(f"{'=' * 80}")
print(header)
print("-" * 80)
combos = [
("top8 + rebal14 + mom20%", dict(top_n=8, rebal_freq=14, mom_blend=0.20)),
("top8 + rebal14 + mom20% + alpha_gate", dict(top_n=8, rebal_freq=14, mom_blend=0.20, alpha_gate=True, alpha_gate_threshold=-0.15, alpha_gate_floor=0.50)),
("top8 + rebal14 + mom20% + asym_vol", dict(top_n=8, rebal_freq=14, mom_blend=0.20, asym_vol=True, asym_vol_floor=0.50)),
("top8 + rebal14 + mom20% + both", dict(top_n=8, rebal_freq=14, mom_blend=0.20, alpha_gate=True, alpha_gate_threshold=-0.15, alpha_gate_floor=0.50, asym_vol=True, asym_vol_floor=0.50)),
("top10 + rebal14 + mom30%", dict(top_n=10, rebal_freq=14, mom_blend=0.30)),
("top10 + rebal14 + mom30% + alpha_gate", dict(top_n=10, rebal_freq=14, mom_blend=0.30, alpha_gate=True, alpha_gate_threshold=-0.15, alpha_gate_floor=0.50)),
]
best_sharpe = 0
best_label = ""
best_rets = None
for label, kwargs in combos:
strat = EnsembleV2(**kwargs)
rets = backtest_strategy(strat)
m = compute_metrics(rets)
print(fmt_row(label, m))
if m["sharpe"] > best_sharpe:
best_sharpe = m["sharpe"]
best_label = label
best_rets = rets
# --- Yearly for best combo ---
print(f"\n--- Best combo: {best_label} (Sharpe={best_sharpe:.2f}) ---")
yr = yearly_returns(best_rets)
for year, ret in yr.items():
print(f" {year}: {ret*100:>+7.1f}%")
if __name__ == "__main__":
main()