quant/research/v7_literature_alpha.py

"""Literature-informed alpha research: can we beat V7+VT36?

Grounded in specific academic/industry research:

1. VIX regime overlay — Simon & Campasano (2014): VIX level as exogenous fear signal
2. Kelly-optimal sizing — Kelly (1956), Thorp (2006): return-aware position sizing
3. Multi-timeframe voting — Faber (2007): multiple MAs reduce false signals
4. Cross-asset confirmation — Asness et al. (2013): correlated asset agreement
5. Momentum acceleration — Moskowitz et al. (2012): 2nd derivative of trend
6. VIX mean-reversion entry — Whaley (2009): buy panic, sell complacency
7. Carry-enhanced risk-off — Koijen et al. (2018): hold yield during defensive periods
8. Regime-dependent PT — Optimal stopping theory: vol-drag-aware thresholds
"""
from __future__ import annotations

import sys
sys.path.insert(0, ".")

import numpy as np
import pandas as pd

import data_manager
import metrics
from strategies.base import Strategy
from strategies.permanent import TrendRiderV3
from strategies.trend_rider_v7 import TrendRiderV7
from main import backtest

YEARS = 10
CAPITAL = 100_000
TX_COST = 0.001
FIXED_FEE = 2.0


class V7Enhanced(Strategy):
    """V7 with pluggable regime enhancer and sizing model."""

    def __init__(
        self,
        regime_enhancer=None,
        sizing_model="vol_target",
        pt_model="fixed",
        target_vol=0.36, min_lev=0.75, max_lev=1.0,
        pt_threshold=0.30, pt_band=0.10, pt_park="SHY",
        ma_long=150, **v3_kw,
    ):
        self.regime_enhancer = regime_enhancer
        self.sizing_model = sizing_model
        self.pt_model = pt_model
        self.target_vol = target_vol
        self.min_lev = min_lev
        self.max_lev = max_lev
        self.pt_threshold = pt_threshold
        self.pt_band = pt_band
        self.pt_park = pt_park
        self.v3 = TrendRiderV3(
            signal="SPY", risk_on=("TQQQ", "UPRO"), risk_off=("GLD", "DBC"),
            ma_long=ma_long, **v3_kw,
        )

    def generate_signals(self, data):
        w = self.v3.generate_signals(data)
        if self.pt_park and self.pt_park in data.columns and self.pt_park not in w.columns:
            w[self.pt_park] = 0.0

        # Regime enhancement: override V3's decision in specific conditions
        if self.regime_enhancer:
            w = self.regime_enhancer(w, data)

        # Sizing
        daily_ret = data.pct_change(fill_method=None).fillna(0.0)
        common = w.columns.intersection(daily_ret.columns)
        port_rets = (w[common] * daily_ret[common]).sum(axis=1)

        if self.sizing_model == "kelly":
            # Kelly: scale = E[r] / Var[r], clipped
            roll_mean = port_rets.rolling(60, min_periods=21).mean() * 252
            roll_var = port_rets.rolling(60, min_periods=21).var() * 252
            kelly_f = (roll_mean / roll_var.clip(lower=0.01)).clip(-1, 2)
            scale = kelly_f.clip(lower=self.min_lev, upper=self.max_lev)
            scale = scale.shift(1).fillna(1.0)
        else:
            realized_vol = port_rets.rolling(60, min_periods=21).std() * np.sqrt(252)
            scale = (self.target_vol / realized_vol).clip(
                lower=self.min_lev, upper=self.max_lev)
            scale = scale.shift(1).fillna(1.0)

        w = w.mul(scale, axis=0)

        # Profit-take
        if self.pt_threshold <= 0:
            return w

        risk_on_set = set(self.v3.risk_on)
        held = w.idxmax(axis=1)
        max_w = w.max(axis=1)
        held[max_w < 1e-8] = ""
        park_col = self.pt_park if self.pt_park in w.columns else ""
        ep, cs, stopped = None, None, False
        rl = self.pt_threshold - self.pt_band

        if self.pt_model == "vol_adaptive":
            # PT threshold inversely proportional to vol drag
            # Vol drag ≈ leverage² × σ² / 2; for 3x: 9σ²/2
            # Optimal PT ≈ base / (1 + k * σ²)
            realized_vol_arr = port_rets.rolling(60, min_periods=21).std().to_numpy() * np.sqrt(252)

        for i in range(len(w)):
            sym = held.iloc[i]
            if not sym or max_w.iloc[i] < 1e-8:
                cs, ep, stopped = None, None, False
                continue
            if sym != cs:
                cs = sym
                ep = float(data[sym].iloc[i-1]) if i > 0 and sym in data.columns else None
                stopped = False
                continue
            if sym not in risk_on_set:
                continue
            if ep is None or ep <= 0 or sym not in data.columns:
                continue
            y = float(data[sym].iloc[i-1]) if i > 0 else float(data[sym].iloc[i])
            g = y / ep - 1.0

            if self.pt_model == "vol_adaptive":
                rv = realized_vol_arr[i] if i < len(realized_vol_arr) and not np.isnan(realized_vol_arr[i]) else 0.25
                # Higher vol → lower threshold (take profits faster)
                t = self.pt_threshold * (0.25 / max(rv, 0.10))
                t = np.clip(t, 0.15, 0.50)
                r = t * (1 - self.pt_band / self.pt_threshold)
            else:
                t = self.pt_threshold
                r = rl

            if stopped:
                if g < r: stopped = False
                else:
                    w.iloc[i] = 0.0
                    if park_col: w.at[w.index[i], park_col] = scale.iloc[i]
            elif g >= t:
                stopped = True
                w.iloc[i] = 0.0
                if park_col: w.at[w.index[i], park_col] = scale.iloc[i]
        return w


# =========================================================================
# Regime enhancers
# =========================================================================

def vix_overlay(vix_high=25, vix_low=15):
    """Force risk-off when VIX > threshold. Simon & Campasano (2014)."""
    def enhancer(w, data):
        if "^VIX" not in data.columns:
            return w
        vix = data["^VIX"].shift(1).fillna(20)
        risk_on_cols = [c for c in ["TQQQ", "UPRO"] if c in w.columns]
        risk_off_cols = [c for c in ["GLD", "DBC"] if c in w.columns]
        park = "SHY" if "SHY" in w.columns else ""
        for i in range(len(w)):
            v = vix.iloc[i]
            if np.isnan(v): continue
            ron_w = sum(float(w.iat[i, w.columns.get_loc(c)]) for c in risk_on_cols)
            if ron_w > 0.01 and v > vix_high:
                for c in risk_on_cols:
                    w.iat[i, w.columns.get_loc(c)] = 0.0
                if risk_off_cols:
                    w.iat[i, w.columns.get_loc(risk_off_cols[0])] = ron_w
        return w
    return enhancer


def multi_timeframe(windows=(50, 150, 200), min_agree=2):
    """Multi-MA voting. Faber (2007). Need majority of MAs bullish."""
    def enhancer(w, data):
        if "SPY" not in data.columns:
            return w
        spy = data["SPY"]
        votes = pd.DataFrame(index=data.index)
        for win in windows:
            ma = spy.rolling(win).mean()
            votes[f"ma{win}"] = (spy > ma).astype(int)
        total_votes = votes.sum(axis=1).shift(2)  # PIT: shift 2 to match V3

        risk_on_cols = [c for c in ["TQQQ", "UPRO"] if c in w.columns]
        risk_off_cols = [c for c in ["GLD", "DBC"] if c in w.columns]
        for i in range(len(w)):
            ron_w = sum(float(w.iat[i, w.columns.get_loc(c)]) for c in risk_on_cols)
            if ron_w > 0.01 and total_votes.iloc[i] < min_agree:
                for c in risk_on_cols:
                    w.iat[i, w.columns.get_loc(c)] = 0.0
                if risk_off_cols:
                    w.iat[i, w.columns.get_loc(risk_off_cols[0])] = ron_w
        return w
    return enhancer


def cross_asset_confirm():
    """Require both SPY and QQQ trends to agree. Asness et al. (2013)."""
    def enhancer(w, data):
        if "SPY" not in data.columns or "QQQ" not in data.columns:
            return w
        spy_bull = (data["SPY"] > data["SPY"].rolling(150).mean()).shift(2).fillna(False)
        qqq_bull = (data["QQQ"] > data["QQQ"].rolling(150).mean()).shift(2).fillna(False)
        both_bull = spy_bull & qqq_bull

        risk_on_cols = [c for c in ["TQQQ", "UPRO"] if c in w.columns]
        risk_off_cols = [c for c in ["GLD", "DBC"] if c in w.columns]
        for i in range(len(w)):
            ron_w = sum(float(w.iat[i, w.columns.get_loc(c)]) for c in risk_on_cols)
            if ron_w > 0.01 and not both_bull.iloc[i]:
                for c in risk_on_cols:
                    w.iat[i, w.columns.get_loc(c)] = 0.0
                if risk_off_cols:
                    w.iat[i, w.columns.get_loc(risk_off_cols[0])] = ron_w
        return w
    return enhancer


def momentum_accel(accel_window=20):
    """Only risk-on when trend is accelerating. Moskowitz et al. (2012)."""
    def enhancer(w, data):
        if "SPY" not in data.columns:
            return w
        spy = data["SPY"]
        ma150 = spy.rolling(150).mean()
        ma_slope = ma150.diff(accel_window)
        accel_positive = (ma_slope > 0).shift(2).fillna(False)

        risk_on_cols = [c for c in ["TQQQ", "UPRO"] if c in w.columns]
        risk_off_cols = [c for c in ["GLD", "DBC"] if c in w.columns]
        for i in range(len(w)):
            ron_w = sum(float(w.iat[i, w.columns.get_loc(c)]) for c in risk_on_cols)
            if ron_w > 0.01 and not accel_positive.iloc[i]:
                for c in risk_on_cols:
                    w.iat[i, w.columns.get_loc(c)] = 0.0
                if risk_off_cols:
                    w.iat[i, w.columns.get_loc(risk_off_cols[0])] = ron_w
        return w
    return enhancer


def vix_mean_revert_entry(vix_spike=30, lookback=5):
    """After VIX spike + revert, force risk-on. Whaley (2009) mean-reversion."""
    def enhancer(w, data):
        if "^VIX" not in data.columns:
            return w
        vix = data["^VIX"].shift(1).fillna(20)
        vix_was_high = vix.rolling(lookback).max() > vix_spike
        vix_now_falling = vix < vix.rolling(lookback).mean()
        buy_signal = vix_was_high & vix_now_falling

        risk_on_cols = [c for c in ["TQQQ", "UPRO"] if c in w.columns]
        risk_off_cols = [c for c in ["GLD", "DBC"] if c in w.columns]
        for i in range(len(w)):
            roff_w = sum(float(w.iat[i, w.columns.get_loc(c)]) for c in risk_off_cols)
            if roff_w > 0.01 and buy_signal.iloc[i]:
                for c in risk_off_cols:
                    w.iat[i, w.columns.get_loc(c)] = 0.0
                if risk_on_cols:
                    w.iat[i, w.columns.get_loc(risk_on_cols[0])] = roff_w
        return w
    return enhancer


def combined_enhancer(*enhancers):
    """Chain multiple enhancers."""
    def enhancer(w, data):
        for e in enhancers:
            w = e(w, data)
        return w
    return enhancer


# =========================================================================
# Main
# =========================================================================

def main():
    print("=" * 100)
    print("  LITERATURE-INFORMED ALPHA RESEARCH")
    print("=" * 100)

    all_etfs = sorted(set([
        "SPY", "QQQ", "TQQQ", "UPRO", "GLD", "DBC", "SHY", "TLT",
        "^VIX",
    ]))
    data = data_manager.update("etfs", all_etfs, with_open=False)
    if isinstance(data, tuple):
        data = data[0]
    cutoff = data.index[-1] - pd.DateOffset(years=YEARS)
    data = data[data.index >= cutoff]

    has_vix = "^VIX" in data.columns
    has_qqq = "QQQ" in data.columns
    print(f"Period: {data.index[0].date()} → {data.index[-1].date()}")
    print(f"VIX available: {has_vix}, QQQ available: {has_qqq}")

    results = []
    def run(label, strategy):
        eq = backtest(strategy, data, initial_capital=CAPITAL,
                      transaction_cost=TX_COST, fixed_fee=FIXED_FEE)
        m = metrics.raw_summary(eq)
        results.append((label, m))
        print(f"  {label:<55} Ann={m['annualizedReturn']*100:>5.1f}%  "
              f"Sharpe={m['sharpeRatio']:.2f}  MaxDD={m['maxDrawdown']*100:.1f}%  "
              f"Calmar={m['calmarRatio']:.2f}")

    # Baseline
    print("\n--- Baseline ---")
    run("V7+VT36 baseline", V7Enhanced())

    # === Idea 1: VIX overlay ===
    print("\n--- Idea 1: VIX regime overlay (Simon & Campasano 2014) ---")
    if has_vix:
        for hi in (20, 25, 30):
            run(f"VIX overlay (force off >VIX{hi})", V7Enhanced(regime_enhancer=vix_overlay(hi)))
    else:
        print("  VIX not available")

    # === Idea 2: Kelly sizing ===
    print("\n--- Idea 2: Kelly-optimal sizing (Kelly 1956, Thorp 2006) ---")
    run("Kelly sizing", V7Enhanced(sizing_model="kelly"))
    run("Kelly + VIX>25", V7Enhanced(sizing_model="kelly",
        regime_enhancer=vix_overlay(25) if has_vix else None))

    # === Idea 3: Multi-timeframe voting ===
    print("\n--- Idea 3: Multi-MA voting (Faber 2007) ---")
    run("Multi-MA 2/3 (50,150,200)", V7Enhanced(regime_enhancer=multi_timeframe()))
    run("Multi-MA 3/3 (all agree)", V7Enhanced(regime_enhancer=multi_timeframe(min_agree=3)))

    # === Idea 4: Cross-asset confirmation ===
    print("\n--- Idea 4: Cross-asset (Asness et al. 2013) ---")
    if has_qqq:
        run("SPY+QQQ both bullish", V7Enhanced(regime_enhancer=cross_asset_confirm()))

    # === Idea 5: Momentum acceleration ===
    print("\n--- Idea 5: Momentum acceleration (Moskowitz et al. 2012) ---")
    for w in (10, 20, 40):
        run(f"MA150 slope rising ({w}d)", V7Enhanced(regime_enhancer=momentum_accel(w)))

    # === Idea 6: VIX mean-reversion entry ===
    print("\n--- Idea 6: VIX mean-reversion entry (Whaley 2009) ---")
    if has_vix:
        for spike in (25, 30, 35):
            run(f"VIX spike>{spike} + revert → buy",
                V7Enhanced(regime_enhancer=vix_mean_revert_entry(spike)))

    # === Idea 7: Vol-adaptive PT ===
    print("\n--- Idea 7: Vol-drag-aware PT (optimal stopping theory) ---")
    run("Vol-adaptive PT (base=30%)", V7Enhanced(pt_model="vol_adaptive"))
    run("Vol-adaptive PT (base=35%)", V7Enhanced(pt_model="vol_adaptive", pt_threshold=0.35))

    # === Idea 8: Combined best ideas ===
    print("\n--- Idea 8: Combinations ---")
    if has_vix:
        run("VIX>25 + multi-MA 2/3",
            V7Enhanced(regime_enhancer=combined_enhancer(
                vix_overlay(25), multi_timeframe())))
        run("VIX>25 + cross-asset",
            V7Enhanced(regime_enhancer=combined_enhancer(
                vix_overlay(25), cross_asset_confirm())) if has_qqq else None)
        run("VIX>30 + accel(20d)",
            V7Enhanced(regime_enhancer=combined_enhancer(
                vix_overlay(30), momentum_accel(20))))
        # VIX mean-revert + normal V3
        run("V7 + VIX mean-revert entry (>30)",
            V7Enhanced(regime_enhancer=vix_mean_revert_entry(30)))

    # === Idea 9: Different MA for V3 regime ===
    print("\n--- Idea 9: Alternative MA windows ---")
    for ma in (100, 120, 130, 150, 170, 200):
        run(f"V3 MA{ma} + VT36", V7Enhanced(ma_long=ma))

    # Final ranking
    results.sort(key=lambda x: x[1]["sharpeRatio"], reverse=True)
    print(f"\n{'=' * 110}")
    print("  FINAL RANKING (by Sharpe)")
    print(f"{'=' * 110}")
    print(f"{'#':<4} {'Strategy':<55} {'Ann%':>6} {'Vol%':>6} {'Sharpe':>7} "
          f"{'Sortino':>8} {'MaxDD%':>7} {'Calmar':>7}")
    print("-" * 110)
    for i, (label, m) in enumerate(results, 1):
        marker = " ★" if i <= 3 else ""
        print(f"{i:<4} {label:<55} "
              f"{m['annualizedReturn']*100:>5.1f}% "
              f"{m['annualizedVolatility']*100:>5.1f}% "
              f"{m['sharpeRatio']:>7.2f} {m['sortinoRatio']:>8.2f} "
              f"{m['maxDrawdown']*100:>6.1f}% {m['calmarRatio']:>7.2f}{marker}")
    print(f"{'=' * 110}")

    # Top by Ann Return
    results.sort(key=lambda x: x[1]["annualizedReturn"], reverse=True)
    print(f"\n  Top 5 by Ann Return:")
    for i, (label, m) in enumerate(results[:5], 1):
        print(f"  {i}. {label:<50} Ann={m['annualizedReturn']*100:.1f}%  "
              f"Sharpe={m['sharpeRatio']:.2f}")


if __name__ == "__main__":
    main()