Files
quant/research/strategy_sharpe_boost_v2.py
Gahow Wang 541f7bcf5b research: add strategy evaluation and exploration scripts
Add 28 research scripts covering DCA simulation, momentum evaluation,
Sharpe optimization, trend rider analysis, and US fundamentals exploration.
2026-05-14 12:54:08 +08:00

293 lines
10 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Sharpe boost v2: Dispersion-adaptive exposure + momentum blend.
Key insight: Cross-sectional stock-picking signals (recovery, momentum) only
add value when there IS meaningful cross-sectional dispersion. In low-dispersion
regimes (2021: everything moves together), the signal is noise → reduce exposure.
Approach:
1. Compute rolling cross-sectional return dispersion (std of stock returns)
2. When dispersion < historical median → scale down to partial exposure
3. Combine with momentum blend + DD dampener
This is economically justified (not curve-fitting):
- Stock-picking alpha ∝ dispersion (proven in academic literature)
- Low dispersion = herd behavior = stock selection adds no value
- High dispersion = stock differentiation = signal is informative
"""
from __future__ import annotations
import os
import sys
import numpy as np
import pandas as pd
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from strategies.base import Strategy
def _rank(df):
return df.rank(axis=1, pct=True, na_option="keep")
class DispersionAdaptiveEnsemble(Strategy):
"""
Ensemble with dispersion-adaptive exposure.
Reduces exposure when cross-sectional dispersion is low (signal uninformative).
"""
def __init__(
self,
rebal_freq: int = 21,
top_n: int = 10,
mom_blend: float = 0.25,
# Dispersion filter
disp_window: int = 21,
disp_lookback: int = 252,
disp_percentile: float = 0.40, # below this percentile → reduce
disp_floor: float = 0.50, # minimum exposure in low-disp regime
# DD dampener
dd_floor: float = 0.40,
dd_denom: float = 0.20,
risk_managed: bool = True,
):
self.rebal_freq = rebal_freq
self.top_n = top_n
self.mom_blend = mom_blend
self.disp_window = disp_window
self.disp_lookback = disp_lookback
self.disp_percentile = disp_percentile
self.disp_floor = disp_floor
self.dd_floor = dd_floor
self.dd_denom = dd_denom
self.risk_managed = risk_managed
def generate_signals(self, data: pd.DataFrame) -> pd.DataFrame:
p = data
ret = p.pct_change()
# === Signal A: rec_mfilt + deep_upvol ===
rec_126 = p / p.rolling(126, min_periods=126).min() - 1
mom_filter = p.shift(21).pct_change(105)
rec_mfilt = rec_126.where(mom_filter > 0, np.nan)
rec_mfilt_r = _rank(rec_mfilt)
up_vol = ret.where(ret > 0, 0).rolling(20, min_periods=15).sum()
deep_upvol = _rank(rec_126) * _rank(up_vol)
deep_upvol_r = _rank(deep_upvol)
signal_a = 0.5 * rec_mfilt_r + 0.5 * deep_upvol_r
# === Signal B: Recovery 63d + 12-1 momentum ===
rec_63 = p / p.rolling(63, min_periods=63).min() - 1
mom_12_1 = p.shift(21).pct_change(231)
rec_63_r = _rank(rec_63)
mom_r = _rank(mom_12_1)
signal_b = 0.5 * rec_63_r + 0.5 * mom_r
# === Signal C: Pure momentum ===
signal_c = mom_r
# === Ensemble ===
α = self.mom_blend
ensemble = (1 - α) / 2 * signal_a + (1 - α) / 2 * signal_b + α * signal_c
# === Select top_n ===
rank = ensemble.rank(axis=1, ascending=False, na_option="bottom")
n_valid = ensemble.notna().sum(axis=1)
enough = n_valid >= self.top_n
top_mask = (rank <= self.top_n) & enough.values.reshape(-1, 1)
raw = top_mask.astype(float)
row_sums = raw.sum(axis=1).replace(0, np.nan)
signals = raw.div(row_sums, axis=0).fillna(0.0)
# === Monthly rebalance ===
warmup = 252
rebal_mask = pd.Series(False, index=data.index)
rebal_indices = list(range(warmup, len(data), self.rebal_freq))
rebal_mask.iloc[rebal_indices] = True
signals[~rebal_mask] = np.nan
signals = signals.ffill().fillna(0.0)
signals.iloc[:warmup] = 0.0
signals = signals.shift(1).fillna(0.0) # PIT
# === Dispersion-adaptive exposure ===
# Cross-sectional dispersion: std of stock returns each day
cs_disp = ret.std(axis=1)
# Rolling mean of dispersion
disp_smooth = cs_disp.rolling(self.disp_window, min_periods=10).mean()
# Historical percentile rank
disp_pctile = disp_smooth.rolling(
self.disp_lookback, min_periods=126
).rank(pct=True)
# Scale: 1.0 when dispersion is high, floor when low
# Linear interpolation between floor and 1.0
disp_scale = self.disp_floor + (1.0 - self.disp_floor) * (
(disp_pctile - 0.0) / (self.disp_percentile)
).clip(0.0, 1.0)
# PIT: use yesterday's dispersion estimate
disp_scale_lagged = disp_scale.shift(1).fillna(1.0)
signals = signals.mul(disp_scale_lagged, axis=0)
# === Market DD dampener ===
if self.risk_managed:
daily_rets = data.pct_change().fillna(0.0)
mkt_rets = daily_rets.mean(axis=1)
mkt_eq = (1 + mkt_rets).cumprod()
mkt_dd = mkt_eq / mkt_eq.cummax() - 1
dd_scale = (1.0 + mkt_dd / self.dd_denom).clip(
lower=self.dd_floor, upper=1.0
)
dd_scale_lagged = dd_scale.shift(1).fillna(1.0)
signals = signals.mul(dd_scale_lagged, axis=0)
return signals
# ---------------------------------------------------------------------------
# Evaluation
# ---------------------------------------------------------------------------
def compute_metrics(daily_rets: pd.Series) -> dict:
eq = (1 + daily_rets).cumprod()
n_years = len(daily_rets) / 252.0
cagr = eq.iloc[-1] ** (1.0 / n_years) - 1.0
vol = daily_rets.std() * np.sqrt(252)
sharpe = daily_rets.mean() / daily_rets.std() * np.sqrt(252) if daily_rets.std() > 0 else 0
running_max = eq.cummax()
dd = eq / running_max - 1
max_dd = dd.min()
calmar = cagr / abs(max_dd) if max_dd != 0 else 0
return {"cagr": cagr, "vol": vol, "sharpe": sharpe, "max_dd": max_dd, "calmar": calmar}
def yearly_returns(daily_rets: pd.Series) -> pd.Series:
eq = (1 + daily_rets).cumprod()
yearly = eq.resample("YE").last().pct_change()
yearly.iloc[0] = eq.resample("YE").last().iloc[0] - 1
yearly.index = yearly.index.year
return yearly
_DATA_CACHE = {}
def backtest_strategy(strategy, start="2016-04-01", end="2026-05-13"):
import data_manager
if "data" not in _DATA_CACHE:
from universe import get_sp500
tickers = get_sp500()
data_manager.update("us", tickers)
_DATA_CACHE["data"] = data_manager.load("us")
data = _DATA_CACHE["data"]
if data is None:
raise RuntimeError("No data loaded")
weights = strategy.generate_signals(data)
daily_rets = (weights * data.pct_change().fillna(0.0)).sum(axis=1)
return daily_rets.loc[start:end]
def main():
print("=" * 80)
print("SHARPE BOOST v2: Dispersion-Adaptive Exposure")
print("=" * 80)
# --- Test 1: Dispersion filter only (no DD dampener) ---
print("\n--- Dispersion filter sweep (risk_managed=False) ---")
print(f"{'disp_pct':>8s} {'floor':>6s} {'CAGR':>7s} {'Vol':>7s} {'Sharpe':>7s} {'MaxDD':>7s} {'Calmar':>7s}")
print("-" * 60)
configs = [
(0.30, 0.40),
(0.30, 0.50),
(0.40, 0.40),
(0.40, 0.50),
(0.40, 0.60),
(0.50, 0.40),
(0.50, 0.50),
(0.50, 0.60),
]
for dp, df in configs:
strat = DispersionAdaptiveEnsemble(
top_n=10, mom_blend=0.25, disp_percentile=dp,
disp_floor=df, risk_managed=False
)
rets = backtest_strategy(strat)
m = compute_metrics(rets)
print(f"{dp:>8.2f} {df:>6.2f} {m['cagr']*100:>6.1f}% {m['vol']*100:>6.1f}% "
f"{m['sharpe']:>7.2f} {m['max_dd']*100:>6.1f}% {m['calmar']:>7.2f}")
# --- Test 2: Dispersion filter + DD dampener ---
print("\n--- Dispersion filter + DD dampener (risk_managed=True) ---")
print(f"{'disp_pct':>8s} {'floor':>6s} {'CAGR':>7s} {'Vol':>7s} {'Sharpe':>7s} {'MaxDD':>7s} {'Calmar':>7s}")
print("-" * 60)
for dp, df in configs:
strat = DispersionAdaptiveEnsemble(
top_n=10, mom_blend=0.25, disp_percentile=dp,
disp_floor=df, risk_managed=True
)
rets = backtest_strategy(strat)
m = compute_metrics(rets)
print(f"{dp:>8.2f} {df:>6.2f} {m['cagr']*100:>6.1f}% {m['vol']*100:>6.1f}% "
f"{m['sharpe']:>7.2f} {m['max_dd']*100:>6.1f}% {m['calmar']:>7.2f}")
# --- Test 3: Best dispersion config — yearly breakdown ---
print(f"\n{'=' * 80}")
print("BEST CONFIG: disp_pct=0.40, floor=0.50, risk_managed=True")
print(f"{'=' * 80}")
best_strat = DispersionAdaptiveEnsemble(
top_n=10, mom_blend=0.25, disp_percentile=0.40,
disp_floor=0.50, risk_managed=True
)
best_rets = backtest_strategy(best_strat)
best_m = compute_metrics(best_rets)
print(f"CAGR: {best_m['cagr']*100:.1f}% Vol: {best_m['vol']*100:.1f}% "
f"Sharpe: {best_m['sharpe']:.2f} MaxDD: {best_m['max_dd']*100:.1f}% "
f"Calmar: {best_m['calmar']:.2f}")
print("\n--- Yearly returns ---")
yr = yearly_returns(best_rets)
for year, ret in yr.items():
print(f" {year}: {ret*100:>+7.1f}%")
# --- Test 4: No filter baseline for comparison ---
print(f"\n--- Baseline (no dispersion filter, no DD) ---")
baseline = DispersionAdaptiveEnsemble(
top_n=10, mom_blend=0.25, disp_percentile=0.0,
disp_floor=1.0, risk_managed=False
)
base_rets = backtest_strategy(baseline)
base_m = compute_metrics(base_rets)
print(f"CAGR: {base_m['cagr']*100:.1f}% Vol: {base_m['vol']*100:.1f}% "
f"Sharpe: {base_m['sharpe']:.2f} MaxDD: {base_m['max_dd']*100:.1f}%")
# --- Test 5: Dispersion diagnostics for 2021 ---
print(f"\n{'=' * 80}")
print("DISPERSION DIAGNOSTIC: Is 2021 actually low dispersion?")
print(f"{'=' * 80}")
import data_manager
data = _DATA_CACHE["data"]
ret = data.pct_change()
cs_disp = ret.std(axis=1)
disp_smooth = cs_disp.rolling(21, min_periods=10).mean()
for year in range(2017, 2027):
yr_disp = disp_smooth.loc[f"{year}"]
if len(yr_disp) > 0:
print(f" {year}: avg disp = {yr_disp.mean()*100:.2f}% "
f"median = {yr_disp.median()*100:.2f}%")
if __name__ == "__main__":
main()