Files
quant/research/v7_literature_alpha.py
Gahow Wang 0d983edfc0 research: individual stock swing, new frameworks, literature alpha, DCA
Four research directions beyond V7+VT36:

1. single_stock_swing: 20 famous stocks (Mag 7 + others), per-stock
   optimized swing trading. High-vol growth stocks (AMD Sharpe 1.55,
   TSLA 1.54) work best, but overfitting risk is extreme — universal
   params only TSLA is viable. Not competitive with V7.

2. v7_literature_alpha: 9 academic directions (VIX overlay, Kelly
   sizing, multi-MA, cross-asset, momentum acceleration, VIX mean-
   reversion, vol-adaptive PT, combined). V3's regime engine already
   implicitly captures most literature signals. MA130 marginally
   better than MA150 (+0.02 Sharpe, within noise).

3. new_frameworks_eval: volatility trading (SVXY risk-off) and
   calendar effects (turn-of-month). SVXY and V7 regime structurally
   conflict — SVXY crashes exactly when V7 goes risk-off.
   Turn-of-month has decent Sharpe (1.30) but only 28% annual.
   Nothing beats V7.

4. smart_dca_eval: fixed/VIX-scaled/MA-deviation/value-averaging/RSI
   DCA into SPY/QQQ/TQQQ/UPRO + V7 hybrids. Smart DCA barely beats
   fixed DCA. Any DCA hybrid dilutes V7's alpha. DCA only useful for
   new monthly contributions that can't lump-sum into V7.

Conclusion: V7+VT36 remains SOTA across all tested frameworks.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-23 00:45:44 +08:00

402 lines
16 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Literature-informed alpha research: can we beat V7+VT36?
Grounded in specific academic/industry research:
1. VIX regime overlay — Simon & Campasano (2014): VIX level as exogenous fear signal
2. Kelly-optimal sizing — Kelly (1956), Thorp (2006): return-aware position sizing
3. Multi-timeframe voting — Faber (2007): multiple MAs reduce false signals
4. Cross-asset confirmation — Asness et al. (2013): correlated asset agreement
5. Momentum acceleration — Moskowitz et al. (2012): 2nd derivative of trend
6. VIX mean-reversion entry — Whaley (2009): buy panic, sell complacency
7. Carry-enhanced risk-off — Koijen et al. (2018): hold yield during defensive periods
8. Regime-dependent PT — Optimal stopping theory: vol-drag-aware thresholds
"""
from __future__ import annotations
import sys
sys.path.insert(0, ".")
import numpy as np
import pandas as pd
import data_manager
import metrics
from strategies.base import Strategy
from strategies.permanent import TrendRiderV3
from strategies.trend_rider_v7 import TrendRiderV7
from main import backtest
YEARS = 10
CAPITAL = 100_000
TX_COST = 0.001
FIXED_FEE = 2.0
class V7Enhanced(Strategy):
"""V7 with pluggable regime enhancer and sizing model."""
def __init__(
self,
regime_enhancer=None,
sizing_model="vol_target",
pt_model="fixed",
target_vol=0.36, min_lev=0.75, max_lev=1.0,
pt_threshold=0.30, pt_band=0.10, pt_park="SHY",
ma_long=150, **v3_kw,
):
self.regime_enhancer = regime_enhancer
self.sizing_model = sizing_model
self.pt_model = pt_model
self.target_vol = target_vol
self.min_lev = min_lev
self.max_lev = max_lev
self.pt_threshold = pt_threshold
self.pt_band = pt_band
self.pt_park = pt_park
self.v3 = TrendRiderV3(
signal="SPY", risk_on=("TQQQ", "UPRO"), risk_off=("GLD", "DBC"),
ma_long=ma_long, **v3_kw,
)
def generate_signals(self, data):
w = self.v3.generate_signals(data)
if self.pt_park and self.pt_park in data.columns and self.pt_park not in w.columns:
w[self.pt_park] = 0.0
# Regime enhancement: override V3's decision in specific conditions
if self.regime_enhancer:
w = self.regime_enhancer(w, data)
# Sizing
daily_ret = data.pct_change(fill_method=None).fillna(0.0)
common = w.columns.intersection(daily_ret.columns)
port_rets = (w[common] * daily_ret[common]).sum(axis=1)
if self.sizing_model == "kelly":
# Kelly: scale = E[r] / Var[r], clipped
roll_mean = port_rets.rolling(60, min_periods=21).mean() * 252
roll_var = port_rets.rolling(60, min_periods=21).var() * 252
kelly_f = (roll_mean / roll_var.clip(lower=0.01)).clip(-1, 2)
scale = kelly_f.clip(lower=self.min_lev, upper=self.max_lev)
scale = scale.shift(1).fillna(1.0)
else:
realized_vol = port_rets.rolling(60, min_periods=21).std() * np.sqrt(252)
scale = (self.target_vol / realized_vol).clip(
lower=self.min_lev, upper=self.max_lev)
scale = scale.shift(1).fillna(1.0)
w = w.mul(scale, axis=0)
# Profit-take
if self.pt_threshold <= 0:
return w
risk_on_set = set(self.v3.risk_on)
held = w.idxmax(axis=1)
max_w = w.max(axis=1)
held[max_w < 1e-8] = ""
park_col = self.pt_park if self.pt_park in w.columns else ""
ep, cs, stopped = None, None, False
rl = self.pt_threshold - self.pt_band
if self.pt_model == "vol_adaptive":
# PT threshold inversely proportional to vol drag
# Vol drag ≈ leverage² × σ² / 2; for 3x: 9σ²/2
# Optimal PT ≈ base / (1 + k * σ²)
realized_vol_arr = port_rets.rolling(60, min_periods=21).std().to_numpy() * np.sqrt(252)
for i in range(len(w)):
sym = held.iloc[i]
if not sym or max_w.iloc[i] < 1e-8:
cs, ep, stopped = None, None, False
continue
if sym != cs:
cs = sym
ep = float(data[sym].iloc[i-1]) if i > 0 and sym in data.columns else None
stopped = False
continue
if sym not in risk_on_set:
continue
if ep is None or ep <= 0 or sym not in data.columns:
continue
y = float(data[sym].iloc[i-1]) if i > 0 else float(data[sym].iloc[i])
g = y / ep - 1.0
if self.pt_model == "vol_adaptive":
rv = realized_vol_arr[i] if i < len(realized_vol_arr) and not np.isnan(realized_vol_arr[i]) else 0.25
# Higher vol → lower threshold (take profits faster)
t = self.pt_threshold * (0.25 / max(rv, 0.10))
t = np.clip(t, 0.15, 0.50)
r = t * (1 - self.pt_band / self.pt_threshold)
else:
t = self.pt_threshold
r = rl
if stopped:
if g < r: stopped = False
else:
w.iloc[i] = 0.0
if park_col: w.at[w.index[i], park_col] = scale.iloc[i]
elif g >= t:
stopped = True
w.iloc[i] = 0.0
if park_col: w.at[w.index[i], park_col] = scale.iloc[i]
return w
# =========================================================================
# Regime enhancers
# =========================================================================
def vix_overlay(vix_high=25, vix_low=15):
"""Force risk-off when VIX > threshold. Simon & Campasano (2014)."""
def enhancer(w, data):
if "^VIX" not in data.columns:
return w
vix = data["^VIX"].shift(1).fillna(20)
risk_on_cols = [c for c in ["TQQQ", "UPRO"] if c in w.columns]
risk_off_cols = [c for c in ["GLD", "DBC"] if c in w.columns]
park = "SHY" if "SHY" in w.columns else ""
for i in range(len(w)):
v = vix.iloc[i]
if np.isnan(v): continue
ron_w = sum(float(w.iat[i, w.columns.get_loc(c)]) for c in risk_on_cols)
if ron_w > 0.01 and v > vix_high:
for c in risk_on_cols:
w.iat[i, w.columns.get_loc(c)] = 0.0
if risk_off_cols:
w.iat[i, w.columns.get_loc(risk_off_cols[0])] = ron_w
return w
return enhancer
def multi_timeframe(windows=(50, 150, 200), min_agree=2):
"""Multi-MA voting. Faber (2007). Need majority of MAs bullish."""
def enhancer(w, data):
if "SPY" not in data.columns:
return w
spy = data["SPY"]
votes = pd.DataFrame(index=data.index)
for win in windows:
ma = spy.rolling(win).mean()
votes[f"ma{win}"] = (spy > ma).astype(int)
total_votes = votes.sum(axis=1).shift(2) # PIT: shift 2 to match V3
risk_on_cols = [c for c in ["TQQQ", "UPRO"] if c in w.columns]
risk_off_cols = [c for c in ["GLD", "DBC"] if c in w.columns]
for i in range(len(w)):
ron_w = sum(float(w.iat[i, w.columns.get_loc(c)]) for c in risk_on_cols)
if ron_w > 0.01 and total_votes.iloc[i] < min_agree:
for c in risk_on_cols:
w.iat[i, w.columns.get_loc(c)] = 0.0
if risk_off_cols:
w.iat[i, w.columns.get_loc(risk_off_cols[0])] = ron_w
return w
return enhancer
def cross_asset_confirm():
"""Require both SPY and QQQ trends to agree. Asness et al. (2013)."""
def enhancer(w, data):
if "SPY" not in data.columns or "QQQ" not in data.columns:
return w
spy_bull = (data["SPY"] > data["SPY"].rolling(150).mean()).shift(2).fillna(False)
qqq_bull = (data["QQQ"] > data["QQQ"].rolling(150).mean()).shift(2).fillna(False)
both_bull = spy_bull & qqq_bull
risk_on_cols = [c for c in ["TQQQ", "UPRO"] if c in w.columns]
risk_off_cols = [c for c in ["GLD", "DBC"] if c in w.columns]
for i in range(len(w)):
ron_w = sum(float(w.iat[i, w.columns.get_loc(c)]) for c in risk_on_cols)
if ron_w > 0.01 and not both_bull.iloc[i]:
for c in risk_on_cols:
w.iat[i, w.columns.get_loc(c)] = 0.0
if risk_off_cols:
w.iat[i, w.columns.get_loc(risk_off_cols[0])] = ron_w
return w
return enhancer
def momentum_accel(accel_window=20):
"""Only risk-on when trend is accelerating. Moskowitz et al. (2012)."""
def enhancer(w, data):
if "SPY" not in data.columns:
return w
spy = data["SPY"]
ma150 = spy.rolling(150).mean()
ma_slope = ma150.diff(accel_window)
accel_positive = (ma_slope > 0).shift(2).fillna(False)
risk_on_cols = [c for c in ["TQQQ", "UPRO"] if c in w.columns]
risk_off_cols = [c for c in ["GLD", "DBC"] if c in w.columns]
for i in range(len(w)):
ron_w = sum(float(w.iat[i, w.columns.get_loc(c)]) for c in risk_on_cols)
if ron_w > 0.01 and not accel_positive.iloc[i]:
for c in risk_on_cols:
w.iat[i, w.columns.get_loc(c)] = 0.0
if risk_off_cols:
w.iat[i, w.columns.get_loc(risk_off_cols[0])] = ron_w
return w
return enhancer
def vix_mean_revert_entry(vix_spike=30, lookback=5):
"""After VIX spike + revert, force risk-on. Whaley (2009) mean-reversion."""
def enhancer(w, data):
if "^VIX" not in data.columns:
return w
vix = data["^VIX"].shift(1).fillna(20)
vix_was_high = vix.rolling(lookback).max() > vix_spike
vix_now_falling = vix < vix.rolling(lookback).mean()
buy_signal = vix_was_high & vix_now_falling
risk_on_cols = [c for c in ["TQQQ", "UPRO"] if c in w.columns]
risk_off_cols = [c for c in ["GLD", "DBC"] if c in w.columns]
for i in range(len(w)):
roff_w = sum(float(w.iat[i, w.columns.get_loc(c)]) for c in risk_off_cols)
if roff_w > 0.01 and buy_signal.iloc[i]:
for c in risk_off_cols:
w.iat[i, w.columns.get_loc(c)] = 0.0
if risk_on_cols:
w.iat[i, w.columns.get_loc(risk_on_cols[0])] = roff_w
return w
return enhancer
def combined_enhancer(*enhancers):
"""Chain multiple enhancers."""
def enhancer(w, data):
for e in enhancers:
w = e(w, data)
return w
return enhancer
# =========================================================================
# Main
# =========================================================================
def main():
print("=" * 100)
print(" LITERATURE-INFORMED ALPHA RESEARCH")
print("=" * 100)
all_etfs = sorted(set([
"SPY", "QQQ", "TQQQ", "UPRO", "GLD", "DBC", "SHY", "TLT",
"^VIX",
]))
data = data_manager.update("etfs", all_etfs, with_open=False)
if isinstance(data, tuple):
data = data[0]
cutoff = data.index[-1] - pd.DateOffset(years=YEARS)
data = data[data.index >= cutoff]
has_vix = "^VIX" in data.columns
has_qqq = "QQQ" in data.columns
print(f"Period: {data.index[0].date()}{data.index[-1].date()}")
print(f"VIX available: {has_vix}, QQQ available: {has_qqq}")
results = []
def run(label, strategy):
eq = backtest(strategy, data, initial_capital=CAPITAL,
transaction_cost=TX_COST, fixed_fee=FIXED_FEE)
m = metrics.raw_summary(eq)
results.append((label, m))
print(f" {label:<55} Ann={m['annualizedReturn']*100:>5.1f}% "
f"Sharpe={m['sharpeRatio']:.2f} MaxDD={m['maxDrawdown']*100:.1f}% "
f"Calmar={m['calmarRatio']:.2f}")
# Baseline
print("\n--- Baseline ---")
run("V7+VT36 baseline", V7Enhanced())
# === Idea 1: VIX overlay ===
print("\n--- Idea 1: VIX regime overlay (Simon & Campasano 2014) ---")
if has_vix:
for hi in (20, 25, 30):
run(f"VIX overlay (force off >VIX{hi})", V7Enhanced(regime_enhancer=vix_overlay(hi)))
else:
print(" VIX not available")
# === Idea 2: Kelly sizing ===
print("\n--- Idea 2: Kelly-optimal sizing (Kelly 1956, Thorp 2006) ---")
run("Kelly sizing", V7Enhanced(sizing_model="kelly"))
run("Kelly + VIX>25", V7Enhanced(sizing_model="kelly",
regime_enhancer=vix_overlay(25) if has_vix else None))
# === Idea 3: Multi-timeframe voting ===
print("\n--- Idea 3: Multi-MA voting (Faber 2007) ---")
run("Multi-MA 2/3 (50,150,200)", V7Enhanced(regime_enhancer=multi_timeframe()))
run("Multi-MA 3/3 (all agree)", V7Enhanced(regime_enhancer=multi_timeframe(min_agree=3)))
# === Idea 4: Cross-asset confirmation ===
print("\n--- Idea 4: Cross-asset (Asness et al. 2013) ---")
if has_qqq:
run("SPY+QQQ both bullish", V7Enhanced(regime_enhancer=cross_asset_confirm()))
# === Idea 5: Momentum acceleration ===
print("\n--- Idea 5: Momentum acceleration (Moskowitz et al. 2012) ---")
for w in (10, 20, 40):
run(f"MA150 slope rising ({w}d)", V7Enhanced(regime_enhancer=momentum_accel(w)))
# === Idea 6: VIX mean-reversion entry ===
print("\n--- Idea 6: VIX mean-reversion entry (Whaley 2009) ---")
if has_vix:
for spike in (25, 30, 35):
run(f"VIX spike>{spike} + revert → buy",
V7Enhanced(regime_enhancer=vix_mean_revert_entry(spike)))
# === Idea 7: Vol-adaptive PT ===
print("\n--- Idea 7: Vol-drag-aware PT (optimal stopping theory) ---")
run("Vol-adaptive PT (base=30%)", V7Enhanced(pt_model="vol_adaptive"))
run("Vol-adaptive PT (base=35%)", V7Enhanced(pt_model="vol_adaptive", pt_threshold=0.35))
# === Idea 8: Combined best ideas ===
print("\n--- Idea 8: Combinations ---")
if has_vix:
run("VIX>25 + multi-MA 2/3",
V7Enhanced(regime_enhancer=combined_enhancer(
vix_overlay(25), multi_timeframe())))
run("VIX>25 + cross-asset",
V7Enhanced(regime_enhancer=combined_enhancer(
vix_overlay(25), cross_asset_confirm())) if has_qqq else None)
run("VIX>30 + accel(20d)",
V7Enhanced(regime_enhancer=combined_enhancer(
vix_overlay(30), momentum_accel(20))))
# VIX mean-revert + normal V3
run("V7 + VIX mean-revert entry (>30)",
V7Enhanced(regime_enhancer=vix_mean_revert_entry(30)))
# === Idea 9: Different MA for V3 regime ===
print("\n--- Idea 9: Alternative MA windows ---")
for ma in (100, 120, 130, 150, 170, 200):
run(f"V3 MA{ma} + VT36", V7Enhanced(ma_long=ma))
# Final ranking
results.sort(key=lambda x: x[1]["sharpeRatio"], reverse=True)
print(f"\n{'=' * 110}")
print(" FINAL RANKING (by Sharpe)")
print(f"{'=' * 110}")
print(f"{'#':<4} {'Strategy':<55} {'Ann%':>6} {'Vol%':>6} {'Sharpe':>7} "
f"{'Sortino':>8} {'MaxDD%':>7} {'Calmar':>7}")
print("-" * 110)
for i, (label, m) in enumerate(results, 1):
marker = "" if i <= 3 else ""
print(f"{i:<4} {label:<55} "
f"{m['annualizedReturn']*100:>5.1f}% "
f"{m['annualizedVolatility']*100:>5.1f}% "
f"{m['sharpeRatio']:>7.2f} {m['sortinoRatio']:>8.2f} "
f"{m['maxDrawdown']*100:>6.1f}% {m['calmarRatio']:>7.2f}{marker}")
print(f"{'=' * 110}")
# Top by Ann Return
results.sort(key=lambda x: x[1]["annualizedReturn"], reverse=True)
print(f"\n Top 5 by Ann Return:")
for i, (label, m) in enumerate(results[:5], 1):
print(f" {i}. {label:<50} Ann={m['annualizedReturn']*100:.1f}% "
f"Sharpe={m['sharpeRatio']:.2f}")
if __name__ == "__main__":
main()