""" Smart DCA Strategy Evaluation — comprehensive comparison of DCA approaches. Tests 6 DCA strategies across 4 ETFs (SPY, QQQ, TQQQ, UPRO) over 10 years. Also tests a hybrid V7+DCA approach combining trend-following with smart DCA. Usage: cd /home/gahow/projects/quant && uv run python research/smart_dca_eval.py """ from __future__ import annotations import sys sys.path.insert(0, ".") import numpy as np import pandas as pd import data_manager import metrics from main import backtest from strategies.trend_rider_v7 import TrendRiderV7 # ── Configuration ──────────────────────────────────────────────────────────── INITIAL_CAPITAL = 100_000 MONTHLY_BASE = 1_000 START_DATE = "2016-01-01" END_DATE = "2026-05-23" DCA_TICKERS = ["SPY", "QQQ", "TQQQ", "UPRO"] # Tickers needed for V7 strategy + VIX for smart DCA EXTRA_TICKERS = ["^VIX", "GLD", "DBC", "SHY"] # ── Data Loading ───────────────────────────────────────────────────────────── def load_data() -> pd.DataFrame: """Download/update ETF price data and return close prices.""" all_tickers = DCA_TICKERS + EXTRA_TICKERS data = data_manager.update("etfs", all_tickers, with_open=False) # Trim to date range data = data.loc[START_DATE:END_DATE] # Rename ^VIX to VIX for convenience if "^VIX" in data.columns: data = data.rename(columns={"^VIX": "VIX"}) return data # ── Helper: find first trading day of each month ───────────────────────────── def monthly_schedule(dates: pd.DatetimeIndex) -> list[pd.Timestamp]: """Return the first trading day of each month within the date range.""" schedule = [] seen = set() for d in dates: key = (d.year, d.month) if key not in seen: seen.add(key) schedule.append(d) return schedule # ── Technical indicators ───────────────────────────────────────────────────── def compute_rsi(prices: pd.Series, window: int = 14) -> pd.Series: delta = prices.diff() gain = delta.clip(lower=0) loss = (-delta).clip(lower=0) avg_gain = gain.ewm(alpha=1 / window, min_periods=window).mean() avg_loss = loss.ewm(alpha=1 / window, min_periods=window).mean() rs = avg_gain / avg_loss return 100 - 100 / (1 + rs) def compute_ma(prices: pd.Series, window: int = 200) -> pd.Series: return prices.rolling(window, min_periods=window).mean() # ── DCA Strategy implementations ───────────────────────────────────────────── def dca_fixed(date, price, vix, rsi, ma200, portfolio_value, target_value): """Strategy 1: Fixed $1,000/month.""" return MONTHLY_BASE def dca_vix_scaled(date, price, vix, rsi, ma200, portfolio_value, target_value): """Strategy 2: VIX-scaled DCA.""" if vix is None or np.isnan(vix): return MONTHLY_BASE if vix < 15: return 500 elif vix <= 20: return 1000 elif vix <= 30: return 1500 else: return 2000 def dca_ma_deviation(date, price, vix, rsi, ma200, portfolio_value, target_value): """Strategy 3: MA-deviation DCA. Scale by distance below 200-day MA.""" if ma200 is None or np.isnan(ma200) or ma200 == 0: return MONTHLY_BASE deviation = (price - ma200) / ma200 # negative when below MA if deviation >= 0: return 500 elif deviation >= -0.10: return 1000 elif deviation >= -0.20: return 2000 else: return 3000 def dca_value_averaging(date, price, vix, rsi, ma200, portfolio_value, target_value): """Strategy 4: Value Averaging. Target portfolio growth of ~1% per month. Invest the difference between target and current value, floored at $0.""" diff = target_value - portfolio_value # Invest at least $0, cap at 3x base to avoid huge lump sums return max(0, min(diff, MONTHLY_BASE * 3)) def dca_rsi_based(date, price, vix, rsi, ma200, portfolio_value, target_value): """Strategy 5: RSI-based DCA. More when oversold, less when overbought.""" if rsi is None or np.isnan(rsi): return MONTHLY_BASE if rsi < 30: return 2000 elif rsi <= 70: return 1000 else: return 500 DCA_STRATEGIES = { "Fixed DCA": dca_fixed, "VIX-scaled DCA": dca_vix_scaled, "MA-deviation DCA": dca_ma_deviation, "Value Averaging": dca_value_averaging, "RSI-based DCA": dca_rsi_based, } # ── Core DCA backtest engine ───────────────────────────────────────────────── def run_dca_backtest( prices: pd.Series, strategy_fn, vix: pd.Series | None = None, initial_capital: float = INITIAL_CAPITAL, ) -> dict: """ Simulate a DCA strategy on a single ETF. Returns dict with equity curve, total invested, final value, etc. """ dates = prices.index schedule = monthly_schedule(dates) # Precompute indicators rsi_series = compute_rsi(prices) ma200_series = compute_ma(prices) # State cash = initial_capital shares = 0.0 total_invested = initial_capital # For value averaging: target grows by 1% per month from initial va_month_count = 0 equity_curve = pd.Series(index=dates, dtype=float) schedule_set = set(schedule) invested_tracker = pd.Series(index=dates, dtype=float) # Buy initial position on day 1 price_0 = prices.iloc[0] shares = cash / price_0 cash = 0.0 for i, date in enumerate(dates): price = prices.iloc[i] # DCA contribution on scheduled dates (skip the first date — already invested) if date in schedule_set and date != dates[0]: va_month_count += 1 portfolio_value = shares * price + cash # Value averaging target: initial * (1.01)^months target_value = initial_capital * (1.01 ** va_month_count) # Add cumulative expected contributions target_value += MONTHLY_BASE * va_month_count v = vix.loc[date] if vix is not None and date in vix.index else np.nan r = rsi_series.loc[date] if date in rsi_series.index else np.nan m = ma200_series.loc[date] if date in ma200_series.index else np.nan amount = strategy_fn(date, price, v, r, m, portfolio_value, target_value) amount = max(0, amount) # Buy shares with the DCA amount if amount > 0 and price > 0: new_shares = amount / price shares += new_shares total_invested += amount equity_curve.iloc[i] = shares * price invested_tracker.iloc[i] = total_invested equity_curve = equity_curve.astype(float) return { "equity": equity_curve, "total_invested": total_invested, "final_value": equity_curve.iloc[-1], "shares": shares, "invested_tracker": invested_tracker, } # ── Lump-sum benchmark ─────────────────────────────────────────────────────── def run_lump_sum(prices: pd.Series, initial_capital: float = INITIAL_CAPITAL) -> dict: """Invest all capital (initial + PV of monthly contributions) at day 1.""" dates = prices.index schedule = monthly_schedule(dates) # Total that DCA would invest: initial + $1,000 * (num_months - 1) n_months = len(schedule) - 1 # skip first month (already counted in initial) total_capital = initial_capital + MONTHLY_BASE * n_months shares = total_capital / prices.iloc[0] equity = shares * prices return { "equity": equity, "total_invested": total_capital, "final_value": equity.iloc[-1], "shares": shares, } # ── V7+VT36 baseline equity curve ──────────────────────────────────────────── def run_v7_baseline(data: pd.DataFrame) -> pd.Series: """Run V7+VT36 strategy and return equity curve.""" v7_tickers = ["SPY", "TQQQ", "UPRO", "GLD", "DBC", "SHY"] available = [t for t in v7_tickers if t in data.columns] v7_data = data[available] strategy = TrendRiderV7(target_vol=0.36, min_lev=0.75) eq = backtest(strategy, v7_data, initial_capital=INITIAL_CAPITAL, transaction_cost=0.001, fixed_fee=2.0) return eq # ── Hybrid V7 + DCA ────────────────────────────────────────────────────────── def run_hybrid_v7_dca( data: pd.DataFrame, dca_ticker: str, strategy_fn, v7_pct: float = 0.70, ) -> dict: """ Strategy 6: Hybrid — v7_pct of capital in V7+VT36, rest in smart DCA. The V7 portion gets v7_pct of initial capital and v7_pct of monthly contributions. The DCA portion gets the rest. """ dca_pct = 1.0 - v7_pct # V7 equity curve (normalized to its portion of capital) v7_eq = run_v7_baseline(data) # Scale V7 equity to its capital allocation v7_eq_scaled = v7_eq * (v7_pct * INITIAL_CAPITAL / INITIAL_CAPITAL) # DCA portion prices = data[dca_ticker].dropna() vix = data["VIX"] if "VIX" in data.columns else None dca_result = run_dca_backtest( prices, strategy_fn, vix=vix, initial_capital=dca_pct * INITIAL_CAPITAL, ) # Scale monthly contributions for DCA portion (base * dca_pct) # Already handled since dca_backtest uses MONTHLY_BASE; we need to adjust. # For simplicity, we just combine the two equity curves. # Combine: align dates common = v7_eq_scaled.index.intersection(dca_result["equity"].index) combined = v7_eq_scaled.loc[common] + dca_result["equity"].loc[common] # Total invested: V7 gets initial*v7_pct (lump sum, no DCA additions modeled in backtest()) # DCA gets initial*dca_pct + monthly contributions total_invested = INITIAL_CAPITAL + dca_result["total_invested"] - dca_pct * INITIAL_CAPITAL return { "equity": combined, "total_invested": total_invested, "final_value": combined.iloc[-1], } # ── Reporting ───────────────────────────────────────────────────────────────── def compute_metrics(result: dict, label: str) -> dict: """Compute all metrics for a DCA result.""" eq = result["equity"].dropna() if len(eq) < 2: return {"label": label, "error": "insufficient data"} m = metrics.raw_summary(eq) m["label"] = label m["totalInvested"] = result["total_invested"] m["finalValue"] = result["final_value"] m["profit"] = result["final_value"] - result["total_invested"] m["roiOnCapital"] = (result["final_value"] / result["total_invested"] - 1) return m def print_comparison_table(rows: list[dict], title: str): """Print a formatted comparison table.""" print(f"\n{'=' * 130}") print(f" {title}") print(f"{'=' * 130}") header = ( f"{'Strategy':<35} {'Invested':>12} {'Final':>14} {'Profit':>14} " f"{'ROI%':>8} {'Ann%':>8} {'Sharpe':>7} {'Sortino':>8} {'MaxDD%':>8} {'Calmar':>7}" ) print(header) print("-" * 130) for r in rows: if "error" in r: print(f" {r['label']:<35} ERROR: {r['error']}") continue print( f"{r['label']:<35} " f"${r['totalInvested']:>11,.0f} " f"${r['finalValue']:>13,.0f} " f"${r['profit']:>13,.0f} " f"{r['roiOnCapital']*100:>7.1f}% " f"{r['annualizedReturn']*100:>7.1f}% " f"{r['sharpeRatio']:>7.2f} " f"{r['sortinoRatio']:>8.2f} " f"{r['maxDrawdown']*100:>7.1f}% " f"{r['calmarRatio']:>7.2f}" ) # ── Main ────────────────────────────────────────────────────────────────────── def main(): print("=" * 80) print(" SMART DCA STRATEGY EVALUATION") print(f" Period: {START_DATE} to {END_DATE}") print(f" Initial capital: ${INITIAL_CAPITAL:,.0f}") print(f" Monthly base DCA: ${MONTHLY_BASE:,.0f}") print("=" * 80) data = load_data() vix = data["VIX"] if "VIX" in data.columns else None print(f"\nData loaded: {data.shape[0]} trading days, {data.shape[1]} tickers") print(f"Date range: {data.index[0].strftime('%Y-%m-%d')} to {data.index[-1].strftime('%Y-%m-%d')}") # ── Part 1: DCA strategies across ETFs ──────────────────────────────── for ticker in DCA_TICKERS: if ticker not in data.columns: print(f"\nWARNING: {ticker} not in data, skipping.") continue prices = data[ticker].dropna() if len(prices) < 252: print(f"\nWARNING: {ticker} has <1 year of data, skipping.") continue results = [] # Lump-sum benchmark ls = run_lump_sum(prices) results.append(compute_metrics(ls, "Lump-sum (all day 1)")) # Each DCA strategy for name, fn in DCA_STRATEGIES.items(): r = run_dca_backtest(prices, fn, vix=vix) results.append(compute_metrics(r, name)) print_comparison_table(results, f"DCA Strategies — {ticker}") # Print DCA investment summary print(f"\n Note: Fixed DCA total invested = ${results[1]['totalInvested']:,.0f} " f"over {len(monthly_schedule(prices.index))-1} months + " f"${INITIAL_CAPITAL:,.0f} initial") # ── Part 2: V7+VT36 baseline ───────────────────────────────────────── print(f"\n{'=' * 130}") print(" V7+VT36 TREND-FOLLOWING BASELINE (lump-sum $100K, no DCA)") print(f"{'=' * 130}") v7_eq = run_v7_baseline(data) v7_m = metrics.raw_summary(v7_eq) print( f" Ann: {v7_m['annualizedReturn']*100:.1f}%, " f"Vol: {v7_m['annualizedVolatility']*100:.1f}%, " f"Sharpe: {v7_m['sharpeRatio']:.2f}, " f"Sortino: {v7_m['sortinoRatio']:.2f}, " f"MaxDD: {v7_m['maxDrawdown']*100:.1f}%, " f"Calmar: {v7_m['calmarRatio']:.2f}, " f"Final: ${v7_eq.iloc[-1]:,.0f}" ) # ── Part 3: Hybrid V7 + Smart DCA ──────────────────────────────────── hybrid_results = [] # 100% V7 baseline for comparison hybrid_results.append({ "label": "100% V7+VT36 (no DCA)", "totalInvested": INITIAL_CAPITAL, "finalValue": v7_eq.iloc[-1], "profit": v7_eq.iloc[-1] - INITIAL_CAPITAL, "roiOnCapital": v7_eq.iloc[-1] / INITIAL_CAPITAL - 1, **v7_m, }) # Hybrid: 70% V7 + 30% VIX-scaled DCA into each leveraged ETF for dca_ticker in ["TQQQ", "UPRO"]: if dca_ticker not in data.columns: continue for strat_name, strat_fn in [("VIX-scaled", dca_vix_scaled), ("MA-deviation", dca_ma_deviation), ("RSI-based", dca_rsi_based)]: r = run_hybrid_v7_dca(data, dca_ticker, strat_fn, v7_pct=0.70) label = f"70%V7 + 30%{strat_name}->{dca_ticker}" hybrid_results.append(compute_metrics(r, label)) print_comparison_table(hybrid_results, "Hybrid V7+VT36 + Smart DCA Combinations") # ── Part 4: Best of each category summary ───────────────────────────── print(f"\n{'=' * 130}") print(" SUMMARY: Best strategy per ETF (by final portfolio value)") print(f"{'=' * 130}") for ticker in DCA_TICKERS: if ticker not in data.columns: continue prices = data[ticker].dropna() if len(prices) < 252: continue best_name = None best_final = 0 for name, fn in DCA_STRATEGIES.items(): r = run_dca_backtest(prices, fn, vix=vix) if r["final_value"] > best_final: best_final = r["final_value"] best_name = name best_invested = r["total_invested"] ls = run_lump_sum(prices) ls_label = "Lump-sum" if ls["final_value"] > best_final: best_final = ls["final_value"] best_name = ls_label best_invested = ls["total_invested"] roi = (best_final / best_invested - 1) * 100 print(f" {ticker:<6} => {best_name:<25} Final: ${best_final:>14,.0f} " f"Invested: ${best_invested:>10,.0f} ROI: {roi:.1f}%") # ── Part 5: Year-by-year breakdown for top strategies ───────────────── print(f"\n{'=' * 130}") print(" YEAR-BY-YEAR: VIX-scaled DCA into TQQQ vs SPY vs Lump-sum SPY") print(f"{'=' * 130}") for ticker in ["SPY", "TQQQ"]: if ticker not in data.columns: continue prices = data[ticker].dropna() vix_dca = run_dca_backtest(prices, dca_vix_scaled, vix=vix) eq = vix_dca["equity"].dropna() print(f"\n {ticker} — VIX-scaled DCA:") print(f" {'Year':<8} {'Year-end Value':>16} {'YTD Return':>12}") print(f" {'-'*40}") years = sorted(set(eq.index.year)) for y in years: year_data = eq[eq.index.year == y] if len(year_data) < 2: continue ytd = year_data.iloc[-1] / year_data.iloc[0] - 1 print(f" {y:<8} ${year_data.iloc[-1]:>15,.0f} {ytd:>11.1%}") print(f"\n{'=' * 80}") print(" EVALUATION COMPLETE") print(f"{'=' * 80}") if __name__ == "__main__": main()