quant/research/smart_dca_eval.py

"""
Smart DCA Strategy Evaluation — comprehensive comparison of DCA approaches.

Tests 6 DCA strategies across 4 ETFs (SPY, QQQ, TQQQ, UPRO) over 10 years.
Also tests a hybrid V7+DCA approach combining trend-following with smart DCA.

Usage: cd /home/gahow/projects/quant && uv run python research/smart_dca_eval.py
"""
from __future__ import annotations

import sys
sys.path.insert(0, ".")

import numpy as np
import pandas as pd

import data_manager
import metrics
from main import backtest
from strategies.trend_rider_v7 import TrendRiderV7


# ── Configuration ────────────────────────────────────────────────────────────

INITIAL_CAPITAL = 100_000
MONTHLY_BASE = 1_000
START_DATE = "2016-01-01"
END_DATE = "2026-05-23"

DCA_TICKERS = ["SPY", "QQQ", "TQQQ", "UPRO"]
# Tickers needed for V7 strategy + VIX for smart DCA
EXTRA_TICKERS = ["^VIX", "GLD", "DBC", "SHY"]


# ── Data Loading ─────────────────────────────────────────────────────────────

def load_data() -> pd.DataFrame:
    """Download/update ETF price data and return close prices."""
    all_tickers = DCA_TICKERS + EXTRA_TICKERS
    data = data_manager.update("etfs", all_tickers, with_open=False)
    # Trim to date range
    data = data.loc[START_DATE:END_DATE]
    # Rename ^VIX to VIX for convenience
    if "^VIX" in data.columns:
        data = data.rename(columns={"^VIX": "VIX"})
    return data


# ── Helper: find first trading day of each month ─────────────────────────────

def monthly_schedule(dates: pd.DatetimeIndex) -> list[pd.Timestamp]:
    """Return the first trading day of each month within the date range."""
    schedule = []
    seen = set()
    for d in dates:
        key = (d.year, d.month)
        if key not in seen:
            seen.add(key)
            schedule.append(d)
    return schedule


# ── Technical indicators ─────────────────────────────────────────────────────

def compute_rsi(prices: pd.Series, window: int = 14) -> pd.Series:
    delta = prices.diff()
    gain = delta.clip(lower=0)
    loss = (-delta).clip(lower=0)
    avg_gain = gain.ewm(alpha=1 / window, min_periods=window).mean()
    avg_loss = loss.ewm(alpha=1 / window, min_periods=window).mean()
    rs = avg_gain / avg_loss
    return 100 - 100 / (1 + rs)


def compute_ma(prices: pd.Series, window: int = 200) -> pd.Series:
    return prices.rolling(window, min_periods=window).mean()


# ── DCA Strategy implementations ─────────────────────────────────────────────

def dca_fixed(date, price, vix, rsi, ma200, portfolio_value, target_value):
    """Strategy 1: Fixed $1,000/month."""
    return MONTHLY_BASE


def dca_vix_scaled(date, price, vix, rsi, ma200, portfolio_value, target_value):
    """Strategy 2: VIX-scaled DCA."""
    if vix is None or np.isnan(vix):
        return MONTHLY_BASE
    if vix < 15:
        return 500
    elif vix <= 20:
        return 1000
    elif vix <= 30:
        return 1500
    else:
        return 2000


def dca_ma_deviation(date, price, vix, rsi, ma200, portfolio_value, target_value):
    """Strategy 3: MA-deviation DCA. Scale by distance below 200-day MA."""
    if ma200 is None or np.isnan(ma200) or ma200 == 0:
        return MONTHLY_BASE
    deviation = (price - ma200) / ma200  # negative when below MA
    if deviation >= 0:
        return 500
    elif deviation >= -0.10:
        return 1000
    elif deviation >= -0.20:
        return 2000
    else:
        return 3000


def dca_value_averaging(date, price, vix, rsi, ma200, portfolio_value, target_value):
    """Strategy 4: Value Averaging. Target portfolio growth of ~1% per month.
    Invest the difference between target and current value, floored at $0."""
    diff = target_value - portfolio_value
    # Invest at least $0, cap at 3x base to avoid huge lump sums
    return max(0, min(diff, MONTHLY_BASE * 3))


def dca_rsi_based(date, price, vix, rsi, ma200, portfolio_value, target_value):
    """Strategy 5: RSI-based DCA. More when oversold, less when overbought."""
    if rsi is None or np.isnan(rsi):
        return MONTHLY_BASE
    if rsi < 30:
        return 2000
    elif rsi <= 70:
        return 1000
    else:
        return 500


DCA_STRATEGIES = {
    "Fixed DCA": dca_fixed,
    "VIX-scaled DCA": dca_vix_scaled,
    "MA-deviation DCA": dca_ma_deviation,
    "Value Averaging": dca_value_averaging,
    "RSI-based DCA": dca_rsi_based,
}


# ── Core DCA backtest engine ─────────────────────────────────────────────────

def run_dca_backtest(
    prices: pd.Series,
    strategy_fn,
    vix: pd.Series | None = None,
    initial_capital: float = INITIAL_CAPITAL,
) -> dict:
    """
    Simulate a DCA strategy on a single ETF.

    Returns dict with equity curve, total invested, final value, etc.
    """
    dates = prices.index
    schedule = monthly_schedule(dates)

    # Precompute indicators
    rsi_series = compute_rsi(prices)
    ma200_series = compute_ma(prices)

    # State
    cash = initial_capital
    shares = 0.0
    total_invested = initial_capital
    # For value averaging: target grows by 1% per month from initial
    va_month_count = 0

    equity_curve = pd.Series(index=dates, dtype=float)
    schedule_set = set(schedule)
    invested_tracker = pd.Series(index=dates, dtype=float)

    # Buy initial position on day 1
    price_0 = prices.iloc[0]
    shares = cash / price_0
    cash = 0.0

    for i, date in enumerate(dates):
        price = prices.iloc[i]

        # DCA contribution on scheduled dates (skip the first date — already invested)
        if date in schedule_set and date != dates[0]:
            va_month_count += 1
            portfolio_value = shares * price + cash

            # Value averaging target: initial * (1.01)^months
            target_value = initial_capital * (1.01 ** va_month_count)
            # Add cumulative expected contributions
            target_value += MONTHLY_BASE * va_month_count

            v = vix.loc[date] if vix is not None and date in vix.index else np.nan
            r = rsi_series.loc[date] if date in rsi_series.index else np.nan
            m = ma200_series.loc[date] if date in ma200_series.index else np.nan

            amount = strategy_fn(date, price, v, r, m, portfolio_value, target_value)
            amount = max(0, amount)

            # Buy shares with the DCA amount
            if amount > 0 and price > 0:
                new_shares = amount / price
                shares += new_shares
                total_invested += amount

        equity_curve.iloc[i] = shares * price
        invested_tracker.iloc[i] = total_invested

    equity_curve = equity_curve.astype(float)
    return {
        "equity": equity_curve,
        "total_invested": total_invested,
        "final_value": equity_curve.iloc[-1],
        "shares": shares,
        "invested_tracker": invested_tracker,
    }


# ── Lump-sum benchmark ───────────────────────────────────────────────────────

def run_lump_sum(prices: pd.Series, initial_capital: float = INITIAL_CAPITAL) -> dict:
    """Invest all capital (initial + PV of monthly contributions) at day 1."""
    dates = prices.index
    schedule = monthly_schedule(dates)
    # Total that DCA would invest: initial + $1,000 * (num_months - 1)
    n_months = len(schedule) - 1  # skip first month (already counted in initial)
    total_capital = initial_capital + MONTHLY_BASE * n_months

    shares = total_capital / prices.iloc[0]
    equity = shares * prices
    return {
        "equity": equity,
        "total_invested": total_capital,
        "final_value": equity.iloc[-1],
        "shares": shares,
    }


# ── V7+VT36 baseline equity curve ────────────────────────────────────────────

def run_v7_baseline(data: pd.DataFrame) -> pd.Series:
    """Run V7+VT36 strategy and return equity curve."""
    v7_tickers = ["SPY", "TQQQ", "UPRO", "GLD", "DBC", "SHY"]
    available = [t for t in v7_tickers if t in data.columns]
    v7_data = data[available]

    strategy = TrendRiderV7(target_vol=0.36, min_lev=0.75)
    eq = backtest(strategy, v7_data, initial_capital=INITIAL_CAPITAL,
                  transaction_cost=0.001, fixed_fee=2.0)
    return eq


# ── Hybrid V7 + DCA ──────────────────────────────────────────────────────────

def run_hybrid_v7_dca(
    data: pd.DataFrame,
    dca_ticker: str,
    strategy_fn,
    v7_pct: float = 0.70,
) -> dict:
    """
    Strategy 6: Hybrid — v7_pct of capital in V7+VT36, rest in smart DCA.

    The V7 portion gets v7_pct of initial capital and v7_pct of monthly contributions.
    The DCA portion gets the rest.
    """
    dca_pct = 1.0 - v7_pct

    # V7 equity curve (normalized to its portion of capital)
    v7_eq = run_v7_baseline(data)
    # Scale V7 equity to its capital allocation
    v7_eq_scaled = v7_eq * (v7_pct * INITIAL_CAPITAL / INITIAL_CAPITAL)

    # DCA portion
    prices = data[dca_ticker].dropna()
    vix = data["VIX"] if "VIX" in data.columns else None

    dca_result = run_dca_backtest(
        prices, strategy_fn, vix=vix,
        initial_capital=dca_pct * INITIAL_CAPITAL,
    )
    # Scale monthly contributions for DCA portion (base * dca_pct)
    # Already handled since dca_backtest uses MONTHLY_BASE; we need to adjust.
    # For simplicity, we just combine the two equity curves.

    # Combine: align dates
    common = v7_eq_scaled.index.intersection(dca_result["equity"].index)
    combined = v7_eq_scaled.loc[common] + dca_result["equity"].loc[common]

    # Total invested: V7 gets initial*v7_pct (lump sum, no DCA additions modeled in backtest())
    # DCA gets initial*dca_pct + monthly contributions
    total_invested = INITIAL_CAPITAL + dca_result["total_invested"] - dca_pct * INITIAL_CAPITAL

    return {
        "equity": combined,
        "total_invested": total_invested,
        "final_value": combined.iloc[-1],
    }


# ── Reporting ─────────────────────────────────────────────────────────────────

def compute_metrics(result: dict, label: str) -> dict:
    """Compute all metrics for a DCA result."""
    eq = result["equity"].dropna()
    if len(eq) < 2:
        return {"label": label, "error": "insufficient data"}
    m = metrics.raw_summary(eq)
    m["label"] = label
    m["totalInvested"] = result["total_invested"]
    m["finalValue"] = result["final_value"]
    m["profit"] = result["final_value"] - result["total_invested"]
    m["roiOnCapital"] = (result["final_value"] / result["total_invested"] - 1)
    return m


def print_comparison_table(rows: list[dict], title: str):
    """Print a formatted comparison table."""
    print(f"\n{'=' * 130}")
    print(f"  {title}")
    print(f"{'=' * 130}")
    header = (
        f"{'Strategy':<35} {'Invested':>12} {'Final':>14} {'Profit':>14} "
        f"{'ROI%':>8} {'Ann%':>8} {'Sharpe':>7} {'Sortino':>8} {'MaxDD%':>8} {'Calmar':>7}"
    )
    print(header)
    print("-" * 130)
    for r in rows:
        if "error" in r:
            print(f"  {r['label']:<35} ERROR: {r['error']}")
            continue
        print(
            f"{r['label']:<35} "
            f"${r['totalInvested']:>11,.0f} "
            f"${r['finalValue']:>13,.0f} "
            f"${r['profit']:>13,.0f} "
            f"{r['roiOnCapital']*100:>7.1f}% "
            f"{r['annualizedReturn']*100:>7.1f}% "
            f"{r['sharpeRatio']:>7.2f} "
            f"{r['sortinoRatio']:>8.2f} "
            f"{r['maxDrawdown']*100:>7.1f}% "
            f"{r['calmarRatio']:>7.2f}"
        )


# ── Main ──────────────────────────────────────────────────────────────────────

def main():
    print("=" * 80)
    print("  SMART DCA STRATEGY EVALUATION")
    print(f"  Period: {START_DATE} to {END_DATE}")
    print(f"  Initial capital: ${INITIAL_CAPITAL:,.0f}")
    print(f"  Monthly base DCA: ${MONTHLY_BASE:,.0f}")
    print("=" * 80)

    data = load_data()
    vix = data["VIX"] if "VIX" in data.columns else None
    print(f"\nData loaded: {data.shape[0]} trading days, {data.shape[1]} tickers")
    print(f"Date range: {data.index[0].strftime('%Y-%m-%d')} to {data.index[-1].strftime('%Y-%m-%d')}")

    # ── Part 1: DCA strategies across ETFs ────────────────────────────────
    for ticker in DCA_TICKERS:
        if ticker not in data.columns:
            print(f"\nWARNING: {ticker} not in data, skipping.")
            continue

        prices = data[ticker].dropna()
        if len(prices) < 252:
            print(f"\nWARNING: {ticker} has <1 year of data, skipping.")
            continue

        results = []

        # Lump-sum benchmark
        ls = run_lump_sum(prices)
        results.append(compute_metrics(ls, "Lump-sum (all day 1)"))

        # Each DCA strategy
        for name, fn in DCA_STRATEGIES.items():
            r = run_dca_backtest(prices, fn, vix=vix)
            results.append(compute_metrics(r, name))

        print_comparison_table(results, f"DCA Strategies — {ticker}")

        # Print DCA investment summary
        print(f"\n  Note: Fixed DCA total invested = ${results[1]['totalInvested']:,.0f} "
              f"over {len(monthly_schedule(prices.index))-1} months + "
              f"${INITIAL_CAPITAL:,.0f} initial")

    # ── Part 2: V7+VT36 baseline ─────────────────────────────────────────
    print(f"\n{'=' * 130}")
    print("  V7+VT36 TREND-FOLLOWING BASELINE (lump-sum $100K, no DCA)")
    print(f"{'=' * 130}")

    v7_eq = run_v7_baseline(data)
    v7_m = metrics.raw_summary(v7_eq)
    print(
        f"  Ann: {v7_m['annualizedReturn']*100:.1f}%, "
        f"Vol: {v7_m['annualizedVolatility']*100:.1f}%, "
        f"Sharpe: {v7_m['sharpeRatio']:.2f}, "
        f"Sortino: {v7_m['sortinoRatio']:.2f}, "
        f"MaxDD: {v7_m['maxDrawdown']*100:.1f}%, "
        f"Calmar: {v7_m['calmarRatio']:.2f}, "
        f"Final: ${v7_eq.iloc[-1]:,.0f}"
    )

    # ── Part 3: Hybrid V7 + Smart DCA ────────────────────────────────────
    hybrid_results = []

    # 100% V7 baseline for comparison
    hybrid_results.append({
        "label": "100% V7+VT36 (no DCA)",
        "totalInvested": INITIAL_CAPITAL,
        "finalValue": v7_eq.iloc[-1],
        "profit": v7_eq.iloc[-1] - INITIAL_CAPITAL,
        "roiOnCapital": v7_eq.iloc[-1] / INITIAL_CAPITAL - 1,
        **v7_m,
    })

    # Hybrid: 70% V7 + 30% VIX-scaled DCA into each leveraged ETF
    for dca_ticker in ["TQQQ", "UPRO"]:
        if dca_ticker not in data.columns:
            continue
        for strat_name, strat_fn in [("VIX-scaled", dca_vix_scaled),
                                      ("MA-deviation", dca_ma_deviation),
                                      ("RSI-based", dca_rsi_based)]:
            r = run_hybrid_v7_dca(data, dca_ticker, strat_fn, v7_pct=0.70)
            label = f"70%V7 + 30%{strat_name}->{dca_ticker}"
            hybrid_results.append(compute_metrics(r, label))

    print_comparison_table(hybrid_results, "Hybrid V7+VT36 + Smart DCA Combinations")

    # ── Part 4: Best of each category summary ─────────────────────────────
    print(f"\n{'=' * 130}")
    print("  SUMMARY: Best strategy per ETF (by final portfolio value)")
    print(f"{'=' * 130}")
    for ticker in DCA_TICKERS:
        if ticker not in data.columns:
            continue
        prices = data[ticker].dropna()
        if len(prices) < 252:
            continue

        best_name = None
        best_final = 0
        for name, fn in DCA_STRATEGIES.items():
            r = run_dca_backtest(prices, fn, vix=vix)
            if r["final_value"] > best_final:
                best_final = r["final_value"]
                best_name = name
                best_invested = r["total_invested"]

        ls = run_lump_sum(prices)
        ls_label = "Lump-sum"
        if ls["final_value"] > best_final:
            best_final = ls["final_value"]
            best_name = ls_label
            best_invested = ls["total_invested"]

        roi = (best_final / best_invested - 1) * 100
        print(f"  {ticker:<6} => {best_name:<25} Final: ${best_final:>14,.0f}  "
              f"Invested: ${best_invested:>10,.0f}  ROI: {roi:.1f}%")

    # ── Part 5: Year-by-year breakdown for top strategies ─────────────────
    print(f"\n{'=' * 130}")
    print("  YEAR-BY-YEAR: VIX-scaled DCA into TQQQ vs SPY vs Lump-sum SPY")
    print(f"{'=' * 130}")

    for ticker in ["SPY", "TQQQ"]:
        if ticker not in data.columns:
            continue
        prices = data[ticker].dropna()
        vix_dca = run_dca_backtest(prices, dca_vix_scaled, vix=vix)
        eq = vix_dca["equity"].dropna()

        print(f"\n  {ticker} — VIX-scaled DCA:")
        print(f"  {'Year':<8} {'Year-end Value':>16} {'YTD Return':>12}")
        print(f"  {'-'*40}")
        years = sorted(set(eq.index.year))
        for y in years:
            year_data = eq[eq.index.year == y]
            if len(year_data) < 2:
                continue
            ytd = year_data.iloc[-1] / year_data.iloc[0] - 1
            print(f"  {y:<8} ${year_data.iloc[-1]:>15,.0f} {ytd:>11.1%}")

    print(f"\n{'=' * 80}")
    print("  EVALUATION COMPLETE")
    print(f"{'=' * 80}")


if __name__ == "__main__":
    main()