quant/research/v7_synthetic_leverage_eval.py

"""Direction 2: V7 regime + synthetic 2x/3x leveraged individual stocks.

Hypothesis: replacing TQQQ/UPRO with synthetic 2x-leveraged top-momentum
S&P 500 stocks could beat V7 by combining stock-picking alpha with leverage.

Synthetic leverage model:
  daily_return_Nx = N * stock_daily_return - (N-1) * daily_borrow_cost
  daily_borrow_cost ≈ risk_free_rate / 252 (conservative: 5% annualized)

This captures:
  - Leverage amplification
  - Financing cost
  - Volatility drag (emerges naturally from daily compounding of leveraged returns)

Variants tested:
  A. V7 regime + synth 2x top-5 momentum stocks
  B. V7 regime + synth 2x top-10 momentum stocks
  C. V7 regime + synth 2x top-1 momentum stock (concentrated)
  D. V7 regime + synth 3x top-5 (compare to real TQQQ)
  E. V7 regime + synth 2x recovery-momentum top-5
  F. V7+VT36 baseline (current SOTA)
"""
from __future__ import annotations

import sys
sys.path.insert(0, ".")

import numpy as np
import pandas as pd

import data_manager
import metrics
import universe_history as uh
from main import backtest
from strategies.base import Strategy
from strategies.permanent import TrendRiderV3
from universe import UNIVERSES

YEARS = 10
CAPITAL = 100_000
TX_COST = 0.001
FIXED_FEE = 2.0
BORROW_RATE = 0.05  # 5% annualized


# ---------------------------------------------------------------------------
# Synthetic leveraged returns
# ---------------------------------------------------------------------------

def synthetic_leveraged_prices(prices: pd.DataFrame, leverage: float,
                               borrow_rate: float = BORROW_RATE) -> pd.DataFrame:
    """Create synthetic leveraged price series from daily returns.

    Models daily-rebalanced leverage: each day's return is
      r_lev = leverage * r_stock - (leverage - 1) * r_borrow
    where r_borrow = borrow_rate / 252.

    This captures vol drag naturally (daily compounding of amplified returns).
    """
    daily_ret = prices.pct_change(fill_method=None).fillna(0.0)
    daily_borrow = borrow_rate / 252
    lev_ret = leverage * daily_ret - (leverage - 1) * daily_borrow
    lev_prices = (1 + lev_ret).cumprod() * 100  # normalize to 100 start
    lev_prices.iloc[0] = 100
    return lev_prices


# ---------------------------------------------------------------------------
# Strategy: V7 regime + synthetic leveraged stock picking
# ---------------------------------------------------------------------------

class V7SynthLeverage(Strategy):
    """V7 architecture with synthetic leveraged individual stocks as risk-on.

    Layer 1: V3 regime engine on SPY → risk-on vs risk-off
    Layer 2: Vol-target overlay
    Layer 3: Profit-take with hysteresis

    Risk-on: top-N stocks by momentum, synthetically leveraged, equal weight.
    Risk-off: momentum leader of (GLD, DBC).
    """

    def __init__(
        self,
        stock_tickers: list[str],
        leverage: float = 2.0,
        top_n: int = 5,
        signal: str = "SPY",
        defensive: tuple[str, ...] = ("GLD", "DBC"),
        # Momentum ranking
        mom_lookback: int = 63,
        rebal_every: int = 21,
        # Selection method
        selection: str = "momentum",  # "momentum" or "recovery_momentum"
        recovery_window: int = 63,
        long_mom_lookback: int = 252,
        long_mom_skip: int = 21,
        # V3 regime
        ma_long: int = 150,
        # Vol-target
        target_vol: float = 0.36,
        vol_window: int = 60,
        min_lev: float = 0.75,
        max_lev: float = 1.0,
        # Profit-take
        pt_threshold: float = 0.30,
        pt_band: float = 0.10,
        pt_park: str = "SHY",
    ):
        self.stock_tickers = stock_tickers
        self.leverage = leverage
        self.top_n = top_n
        self.signal = signal
        self.defensive = defensive
        self.mom_lookback = mom_lookback
        self.rebal_every = rebal_every
        self.selection = selection
        self.recovery_window = recovery_window
        self.long_mom_lookback = long_mom_lookback
        self.long_mom_skip = long_mom_skip
        self.target_vol = target_vol
        self.vol_window = vol_window
        self.min_lev = min_lev
        self.max_lev = max_lev
        self.pt_threshold = pt_threshold
        self.pt_band = pt_band
        self.pt_park = pt_park
        self._v3 = TrendRiderV3(
            signal=signal, risk_on=("TQQQ", "UPRO"),
            risk_off=defensive, ma_long=ma_long,
        )

    def _rank_stocks(self, data: pd.DataFrame) -> pd.DataFrame:
        """Return cross-sectional rank (higher = better)."""
        avail = [t for t in self.stock_tickers if t in data.columns]
        panel = data[avail]

        if self.selection == "recovery_momentum":
            recovery = panel / panel.rolling(self.recovery_window).min() - 1
            momentum = panel.shift(self.long_mom_skip).pct_change(
                self.long_mom_lookback - self.long_mom_skip, fill_method=None,
            )
            rec_r = recovery.rank(axis=1, pct=True, na_option="keep")
            mom_r = momentum.rank(axis=1, pct=True, na_option="keep")
            composite = 0.5 * rec_r + 0.5 * mom_r
            return composite
        else:
            mom = panel.pct_change(self.mom_lookback, fill_method=None)
            return mom

    def generate_signals(self, data: pd.DataFrame) -> pd.DataFrame:
        """Build weights on ORIGINAL (unleveraged) price columns.

        The backtest engine will track returns using the original data.
        We transform the returns in a wrapper (see run_synth_backtest below).
        Actually — we build a SYNTHETIC price panel and run the strategy
        on that. So weights here are on synthetic-leverage columns.
        """
        # This is called on the synthetic data panel.
        # Columns: stock tickers (synthetic leveraged) + ETFs (original)
        w = pd.DataFrame(0.0, index=data.index, columns=data.columns)

        if self.signal not in data.columns:
            return w

        sig_arr = data[self.signal].to_numpy()
        avail_stocks = [t for t in self.stock_tickers if t in data.columns]
        avail_def = [t for t in self.defensive if t in data.columns]
        park_col = self.pt_park if self.pt_park in data.columns else ""

        # Rank using the ORIGINAL unleveraged data — NOT passed here.
        # We'll precompute ranks externally and attach them.
        # For now, rank on the synthetic data (momentum on leveraged prices
        # preserves ranking since leverage is monotone on return).
        mom = data[avail_stocks].pct_change(self.mom_lookback, fill_method=None)

        if self.selection == "recovery_momentum":
            panel = data[avail_stocks]
            recovery = panel / panel.rolling(self.recovery_window).min() - 1
            long_mom = panel.shift(self.long_mom_skip).pct_change(
                self.long_mom_lookback - self.long_mom_skip, fill_method=None,
            )
            rec_r = recovery.rank(axis=1, pct=True, na_option="keep")
            mom_r = long_mom.rank(axis=1, pct=True, na_option="keep")
            score = 0.5 * rec_r + 0.5 * mom_r
        else:
            score = mom

        need = max(150, self.mom_lookback + 1, self._v3.vol_window + 1,
                   self._v3.dd_window, self._v3.peak_window,
                   self.long_mom_lookback + 1 if self.selection == "recovery_momentum" else 0,
                   self.recovery_window + 1 if self.selection == "recovery_momentum" else 0) + 1

        regime: str | None = None
        bars = 0

        # Phase 1: build raw weights (regime + stock selection)
        raw_w = pd.DataFrame(np.nan, index=data.index, columns=data.columns)

        for i in range(len(data)):
            if i < need:
                continue

            closes = sig_arr[:i]
            if np.isnan(closes[-1]):
                continue

            desired = self._v3._desired_regime(closes, regime)
            changed = False
            if regime is None:
                regime, bars, changed = desired, 0, True
            else:
                bars += 1
                if desired != regime and bars >= 15:
                    regime, bars, changed = desired, 0, True

            if not changed and (i - need) % self.rebal_every != 0:
                continue

            row = {c: 0.0 for c in data.columns}
            dt = data.index[i]

            if regime == "risk_on":
                s = score.iloc[i][avail_stocks].dropna()
                valid = s.index[data.loc[dt, s.index].notna()]
                s = s[valid]
                if self.selection == "momentum":
                    s = s[s > 0]
                top = s.nlargest(min(self.top_n, len(s)))
                if len(top) > 0:
                    wt = 1.0 / len(top)
                    for t in top.index:
                        row[t] = wt
                elif avail_def:
                    row[avail_def[0]] = 1.0
            else:
                if avail_def:
                    dm = data[avail_def].pct_change(63, fill_method=None).iloc[i].dropna()
                    best = dm.idxmax() if len(dm) > 0 else avail_def[0]
                    row[best] = 1.0

            for c, v in row.items():
                raw_w.at[dt, c] = v

        raw_w = raw_w.ffill().fillna(0.0)
        raw_w = raw_w.shift(1).fillna(0.0)

        # Phase 2: Vol-target overlay
        daily_ret = data.pct_change(fill_method=None).fillna(0.0)
        port_rets = (raw_w * daily_ret).sum(axis=1)
        realized_vol = (
            port_rets.rolling(self.vol_window, min_periods=21).std() * np.sqrt(252)
        )
        scale = (self.target_vol / realized_vol).clip(lower=self.min_lev, upper=self.max_lev)
        scale = scale.shift(1).fillna(1.0)
        w = raw_w.mul(scale, axis=0)

        # Phase 3: Profit-take
        if self.pt_threshold <= 0:
            return w

        held = w.idxmax(axis=1)
        max_w = w.max(axis=1)
        held[max_w < 1e-8] = ""

        entry_price: float | None = None
        current_sym: str | None = None
        is_stopped = False
        restore_level = self.pt_threshold - self.pt_band

        for i in range(len(w)):
            sym = held.iloc[i]
            if not sym or max_w.iloc[i] < 1e-8:
                current_sym = None
                entry_price = None
                is_stopped = False
                continue

            if sym != current_sym:
                current_sym = sym
                entry_price = (
                    float(data[sym].iloc[i - 1])
                    if i > 0 and sym in data.columns else None
                )
                is_stopped = False
                continue

            if entry_price is None or entry_price <= 0 or sym not in data.columns:
                continue

            yesterday = float(data[sym].iloc[i - 1]) if i > 0 else float(data[sym].iloc[i])
            gain = yesterday / entry_price - 1.0

            if is_stopped:
                if gain < restore_level:
                    is_stopped = False
                else:
                    w.iloc[i] = 0.0
                    if park_col:
                        w.at[w.index[i], park_col] = scale.iloc[i]
            else:
                if gain >= self.pt_threshold:
                    is_stopped = True
                    w.iloc[i] = 0.0
                    if park_col:
                        w.at[w.index[i], park_col] = scale.iloc[i]

        return w


# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------

def main():
    print("=" * 95)
    print("  DIRECTION 2: V7 + SYNTHETIC LEVERAGED INDIVIDUAL STOCKS")
    print("=" * 95)

    # Load S&P 500 + PIT + ETFs
    print("\n[1] Loading data...")
    universe = UNIVERSES["us"]
    tickers = universe["fetch"]()
    pit_intervals = uh.load_sp500_history()
    hist_tickers = uh.all_tickers_ever(pit_intervals)
    etfs = ["SPY", "GLD", "DBC", "SHY", "TQQQ", "UPRO", "TLT"]
    all_tickers = sorted(set(tickers + hist_tickers + etfs))

    raw_data = data_manager.update("us", all_tickers, with_open=False)
    if isinstance(raw_data, tuple):
        raw_data = raw_data[0]
    cutoff = raw_data.index[-1] - pd.DateOffset(years=YEARS)
    raw_data = raw_data[raw_data.index >= cutoff]
    raw_data = uh.mask_prices(raw_data, pit_intervals)

    stock_tickers = [t for t in raw_data.columns
                     if t not in etfs and raw_data[t].notna().any()]
    print(f"   Stocks: {len(stock_tickers)}, Period: {raw_data.index[0].date()} → {raw_data.index[-1].date()}")

    # Build synthetic leveraged price panels
    print("\n[2] Building synthetic leveraged prices...")
    stock_prices = raw_data[stock_tickers]
    synth_2x = synthetic_leveraged_prices(stock_prices, 2.0)
    synth_3x = synthetic_leveraged_prices(stock_prices, 3.0)

    # Combine synthetic stocks with real ETF prices for each variant
    etf_prices = raw_data[etfs]

    results: list[tuple[str, dict]] = []

    def run(label: str, strategy: Strategy, data_panel: pd.DataFrame):
        print(f"   {label}...", end=" ", flush=True)
        try:
            eq = backtest(strategy, data_panel, initial_capital=CAPITAL,
                          transaction_cost=TX_COST, fixed_fee=FIXED_FEE)
            m = metrics.raw_summary(eq)
            results.append((label, m))
            print(f"Ann={m['annualizedReturn']*100:.1f}%  Sharpe={m['sharpeRatio']:.2f}  "
                  f"MaxDD={m['maxDrawdown']*100:.1f}%")
        except Exception as e:
            print(f"FAILED: {e}")

    # =====================================================================
    # Run variants
    # =====================================================================
    print("\n[3] Running strategies...")

    # --- V7+VT36 baseline (real TQQQ/UPRO) ---
    from strategies.trend_rider_v7 import TrendRiderV7
    etf_only = [t for t in ["SPY", "TQQQ", "UPRO", "GLD", "DBC", "SHY"] if t in etf_prices.columns]
    run("V7+VT36 baseline (TQQQ/UPRO)",
        TrendRiderV7(target_vol=0.36, min_lev=0.75),
        etf_prices[etf_only])

    # --- Synth 2x: momentum, various top-N ---
    for n in (1, 3, 5, 10):
        panel_2x = pd.concat([synth_2x, etf_prices], axis=1)
        panel_2x = panel_2x.loc[:, ~panel_2x.columns.duplicated()]
        run(f"Synth 2x Mom top-{n} (VT36+PT30)",
            V7SynthLeverage(stock_tickers=stock_tickers, leverage=2.0,
                            top_n=n, target_vol=0.36, min_lev=0.75),
            panel_2x)

    # --- Synth 2x: recovery-momentum ---
    for n in (3, 5, 10):
        panel_2x = pd.concat([synth_2x, etf_prices], axis=1)
        panel_2x = panel_2x.loc[:, ~panel_2x.columns.duplicated()]
        run(f"Synth 2x RecMom top-{n} (VT36+PT30)",
            V7SynthLeverage(stock_tickers=stock_tickers, leverage=2.0,
                            top_n=n, selection="recovery_momentum",
                            target_vol=0.36, min_lev=0.75),
            panel_2x)

    # --- Synth 3x: direct comparison with real TQQQ ---
    for n in (1, 3, 5):
        panel_3x = pd.concat([synth_3x, etf_prices], axis=1)
        panel_3x = panel_3x.loc[:, ~panel_3x.columns.duplicated()]
        run(f"Synth 3x Mom top-{n} (VT36+PT30)",
            V7SynthLeverage(stock_tickers=stock_tickers, leverage=3.0,
                            top_n=n, target_vol=0.36, min_lev=0.75),
            panel_3x)

    # --- Synth 2x without vol-target (see if raw 2x stocks need less VT) ---
    for n in (3, 5):
        panel_2x = pd.concat([synth_2x, etf_prices], axis=1)
        panel_2x = panel_2x.loc[:, ~panel_2x.columns.duplicated()]
        run(f"Synth 2x Mom top-{n} (no VT, PT30)",
            V7SynthLeverage(stock_tickers=stock_tickers, leverage=2.0,
                            top_n=n, target_vol=1.0, min_lev=1.0, max_lev=1.0),
            panel_2x)

    # --- Synth 2x with higher PT threshold (2x has less vol drag → let profits run) ---
    for pt in (0.40, 0.50):
        panel_2x = pd.concat([synth_2x, etf_prices], axis=1)
        panel_2x = panel_2x.loc[:, ~panel_2x.columns.duplicated()]
        run(f"Synth 2x Mom top-5 (VT36+PT{int(pt*100)})",
            V7SynthLeverage(stock_tickers=stock_tickers, leverage=2.0,
                            top_n=5, target_vol=0.36, min_lev=0.75,
                            pt_threshold=pt, pt_band=pt*0.33),
            panel_2x)

    # --- Synth 2x: no profit-take (2x might not need it) ---
    panel_2x = pd.concat([synth_2x, etf_prices], axis=1)
    panel_2x = panel_2x.loc[:, ~panel_2x.columns.duplicated()]
    run("Synth 2x Mom top-5 (VT36, no PT)",
        V7SynthLeverage(stock_tickers=stock_tickers, leverage=2.0,
                        top_n=5, target_vol=0.36, min_lev=0.75,
                        pt_threshold=0),
        panel_2x)

    # --- SPY benchmark ---
    spy = raw_data["SPY"].dropna()
    spy_eq = (spy / spy.iloc[0]) * CAPITAL
    results.append(("SPY benchmark", metrics.raw_summary(spy_eq)))

    # =====================================================================
    # Report
    # =====================================================================
    results.sort(key=lambda x: x[1]["annualizedReturn"], reverse=True)

    print(f"\n{'=' * 110}")
    print("  RANKING")
    print(f"{'=' * 110}")
    print(f"{'#':<4} {'Strategy':<45} {'Ann%':>7} {'Vol%':>7} {'Sharpe':>7} "
          f"{'Sortino':>8} {'MaxDD%':>7} {'Calmar':>7}")
    print("-" * 110)

    for i, (label, m) in enumerate(results, 1):
        marker = " ★" if i <= 3 else ""
        print(f"{i:<4} {label:<45} "
              f"{m['annualizedReturn']*100:>6.1f}% "
              f"{m['annualizedVolatility']*100:>6.1f}% "
              f"{m['sharpeRatio']:>7.2f} "
              f"{m['sortinoRatio']:>8.2f} "
              f"{m['maxDrawdown']*100:>6.1f}% "
              f"{m['calmarRatio']:>7.2f}{marker}")
    print(f"{'=' * 110}")


if __name__ == "__main__":
    main()