Files
quant/research/smart_dca_eval.py
Gahow Wang 0d983edfc0 research: individual stock swing, new frameworks, literature alpha, DCA
Four research directions beyond V7+VT36:

1. single_stock_swing: 20 famous stocks (Mag 7 + others), per-stock
   optimized swing trading. High-vol growth stocks (AMD Sharpe 1.55,
   TSLA 1.54) work best, but overfitting risk is extreme — universal
   params only TSLA is viable. Not competitive with V7.

2. v7_literature_alpha: 9 academic directions (VIX overlay, Kelly
   sizing, multi-MA, cross-asset, momentum acceleration, VIX mean-
   reversion, vol-adaptive PT, combined). V3's regime engine already
   implicitly captures most literature signals. MA130 marginally
   better than MA150 (+0.02 Sharpe, within noise).

3. new_frameworks_eval: volatility trading (SVXY risk-off) and
   calendar effects (turn-of-month). SVXY and V7 regime structurally
   conflict — SVXY crashes exactly when V7 goes risk-off.
   Turn-of-month has decent Sharpe (1.30) but only 28% annual.
   Nothing beats V7.

4. smart_dca_eval: fixed/VIX-scaled/MA-deviation/value-averaging/RSI
   DCA into SPY/QQQ/TQQQ/UPRO + V7 hybrids. Smart DCA barely beats
   fixed DCA. Any DCA hybrid dilutes V7's alpha. DCA only useful for
   new monthly contributions that can't lump-sum into V7.

Conclusion: V7+VT36 remains SOTA across all tested frameworks.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-23 00:45:44 +08:00

494 lines
18 KiB
Python

"""
Smart DCA Strategy Evaluation — comprehensive comparison of DCA approaches.
Tests 6 DCA strategies across 4 ETFs (SPY, QQQ, TQQQ, UPRO) over 10 years.
Also tests a hybrid V7+DCA approach combining trend-following with smart DCA.
Usage: cd /home/gahow/projects/quant && uv run python research/smart_dca_eval.py
"""
from __future__ import annotations
import sys
sys.path.insert(0, ".")
import numpy as np
import pandas as pd
import data_manager
import metrics
from main import backtest
from strategies.trend_rider_v7 import TrendRiderV7
# ── Configuration ────────────────────────────────────────────────────────────
INITIAL_CAPITAL = 100_000
MONTHLY_BASE = 1_000
START_DATE = "2016-01-01"
END_DATE = "2026-05-23"
DCA_TICKERS = ["SPY", "QQQ", "TQQQ", "UPRO"]
# Tickers needed for V7 strategy + VIX for smart DCA
EXTRA_TICKERS = ["^VIX", "GLD", "DBC", "SHY"]
# ── Data Loading ─────────────────────────────────────────────────────────────
def load_data() -> pd.DataFrame:
"""Download/update ETF price data and return close prices."""
all_tickers = DCA_TICKERS + EXTRA_TICKERS
data = data_manager.update("etfs", all_tickers, with_open=False)
# Trim to date range
data = data.loc[START_DATE:END_DATE]
# Rename ^VIX to VIX for convenience
if "^VIX" in data.columns:
data = data.rename(columns={"^VIX": "VIX"})
return data
# ── Helper: find first trading day of each month ─────────────────────────────
def monthly_schedule(dates: pd.DatetimeIndex) -> list[pd.Timestamp]:
"""Return the first trading day of each month within the date range."""
schedule = []
seen = set()
for d in dates:
key = (d.year, d.month)
if key not in seen:
seen.add(key)
schedule.append(d)
return schedule
# ── Technical indicators ─────────────────────────────────────────────────────
def compute_rsi(prices: pd.Series, window: int = 14) -> pd.Series:
delta = prices.diff()
gain = delta.clip(lower=0)
loss = (-delta).clip(lower=0)
avg_gain = gain.ewm(alpha=1 / window, min_periods=window).mean()
avg_loss = loss.ewm(alpha=1 / window, min_periods=window).mean()
rs = avg_gain / avg_loss
return 100 - 100 / (1 + rs)
def compute_ma(prices: pd.Series, window: int = 200) -> pd.Series:
return prices.rolling(window, min_periods=window).mean()
# ── DCA Strategy implementations ─────────────────────────────────────────────
def dca_fixed(date, price, vix, rsi, ma200, portfolio_value, target_value):
"""Strategy 1: Fixed $1,000/month."""
return MONTHLY_BASE
def dca_vix_scaled(date, price, vix, rsi, ma200, portfolio_value, target_value):
"""Strategy 2: VIX-scaled DCA."""
if vix is None or np.isnan(vix):
return MONTHLY_BASE
if vix < 15:
return 500
elif vix <= 20:
return 1000
elif vix <= 30:
return 1500
else:
return 2000
def dca_ma_deviation(date, price, vix, rsi, ma200, portfolio_value, target_value):
"""Strategy 3: MA-deviation DCA. Scale by distance below 200-day MA."""
if ma200 is None or np.isnan(ma200) or ma200 == 0:
return MONTHLY_BASE
deviation = (price - ma200) / ma200 # negative when below MA
if deviation >= 0:
return 500
elif deviation >= -0.10:
return 1000
elif deviation >= -0.20:
return 2000
else:
return 3000
def dca_value_averaging(date, price, vix, rsi, ma200, portfolio_value, target_value):
"""Strategy 4: Value Averaging. Target portfolio growth of ~1% per month.
Invest the difference between target and current value, floored at $0."""
diff = target_value - portfolio_value
# Invest at least $0, cap at 3x base to avoid huge lump sums
return max(0, min(diff, MONTHLY_BASE * 3))
def dca_rsi_based(date, price, vix, rsi, ma200, portfolio_value, target_value):
"""Strategy 5: RSI-based DCA. More when oversold, less when overbought."""
if rsi is None or np.isnan(rsi):
return MONTHLY_BASE
if rsi < 30:
return 2000
elif rsi <= 70:
return 1000
else:
return 500
DCA_STRATEGIES = {
"Fixed DCA": dca_fixed,
"VIX-scaled DCA": dca_vix_scaled,
"MA-deviation DCA": dca_ma_deviation,
"Value Averaging": dca_value_averaging,
"RSI-based DCA": dca_rsi_based,
}
# ── Core DCA backtest engine ─────────────────────────────────────────────────
def run_dca_backtest(
prices: pd.Series,
strategy_fn,
vix: pd.Series | None = None,
initial_capital: float = INITIAL_CAPITAL,
) -> dict:
"""
Simulate a DCA strategy on a single ETF.
Returns dict with equity curve, total invested, final value, etc.
"""
dates = prices.index
schedule = monthly_schedule(dates)
# Precompute indicators
rsi_series = compute_rsi(prices)
ma200_series = compute_ma(prices)
# State
cash = initial_capital
shares = 0.0
total_invested = initial_capital
# For value averaging: target grows by 1% per month from initial
va_month_count = 0
equity_curve = pd.Series(index=dates, dtype=float)
schedule_set = set(schedule)
invested_tracker = pd.Series(index=dates, dtype=float)
# Buy initial position on day 1
price_0 = prices.iloc[0]
shares = cash / price_0
cash = 0.0
for i, date in enumerate(dates):
price = prices.iloc[i]
# DCA contribution on scheduled dates (skip the first date — already invested)
if date in schedule_set and date != dates[0]:
va_month_count += 1
portfolio_value = shares * price + cash
# Value averaging target: initial * (1.01)^months
target_value = initial_capital * (1.01 ** va_month_count)
# Add cumulative expected contributions
target_value += MONTHLY_BASE * va_month_count
v = vix.loc[date] if vix is not None and date in vix.index else np.nan
r = rsi_series.loc[date] if date in rsi_series.index else np.nan
m = ma200_series.loc[date] if date in ma200_series.index else np.nan
amount = strategy_fn(date, price, v, r, m, portfolio_value, target_value)
amount = max(0, amount)
# Buy shares with the DCA amount
if amount > 0 and price > 0:
new_shares = amount / price
shares += new_shares
total_invested += amount
equity_curve.iloc[i] = shares * price
invested_tracker.iloc[i] = total_invested
equity_curve = equity_curve.astype(float)
return {
"equity": equity_curve,
"total_invested": total_invested,
"final_value": equity_curve.iloc[-1],
"shares": shares,
"invested_tracker": invested_tracker,
}
# ── Lump-sum benchmark ───────────────────────────────────────────────────────
def run_lump_sum(prices: pd.Series, initial_capital: float = INITIAL_CAPITAL) -> dict:
"""Invest all capital (initial + PV of monthly contributions) at day 1."""
dates = prices.index
schedule = monthly_schedule(dates)
# Total that DCA would invest: initial + $1,000 * (num_months - 1)
n_months = len(schedule) - 1 # skip first month (already counted in initial)
total_capital = initial_capital + MONTHLY_BASE * n_months
shares = total_capital / prices.iloc[0]
equity = shares * prices
return {
"equity": equity,
"total_invested": total_capital,
"final_value": equity.iloc[-1],
"shares": shares,
}
# ── V7+VT36 baseline equity curve ────────────────────────────────────────────
def run_v7_baseline(data: pd.DataFrame) -> pd.Series:
"""Run V7+VT36 strategy and return equity curve."""
v7_tickers = ["SPY", "TQQQ", "UPRO", "GLD", "DBC", "SHY"]
available = [t for t in v7_tickers if t in data.columns]
v7_data = data[available]
strategy = TrendRiderV7(target_vol=0.36, min_lev=0.75)
eq = backtest(strategy, v7_data, initial_capital=INITIAL_CAPITAL,
transaction_cost=0.001, fixed_fee=2.0)
return eq
# ── Hybrid V7 + DCA ──────────────────────────────────────────────────────────
def run_hybrid_v7_dca(
data: pd.DataFrame,
dca_ticker: str,
strategy_fn,
v7_pct: float = 0.70,
) -> dict:
"""
Strategy 6: Hybrid — v7_pct of capital in V7+VT36, rest in smart DCA.
The V7 portion gets v7_pct of initial capital and v7_pct of monthly contributions.
The DCA portion gets the rest.
"""
dca_pct = 1.0 - v7_pct
# V7 equity curve (normalized to its portion of capital)
v7_eq = run_v7_baseline(data)
# Scale V7 equity to its capital allocation
v7_eq_scaled = v7_eq * (v7_pct * INITIAL_CAPITAL / INITIAL_CAPITAL)
# DCA portion
prices = data[dca_ticker].dropna()
vix = data["VIX"] if "VIX" in data.columns else None
dca_result = run_dca_backtest(
prices, strategy_fn, vix=vix,
initial_capital=dca_pct * INITIAL_CAPITAL,
)
# Scale monthly contributions for DCA portion (base * dca_pct)
# Already handled since dca_backtest uses MONTHLY_BASE; we need to adjust.
# For simplicity, we just combine the two equity curves.
# Combine: align dates
common = v7_eq_scaled.index.intersection(dca_result["equity"].index)
combined = v7_eq_scaled.loc[common] + dca_result["equity"].loc[common]
# Total invested: V7 gets initial*v7_pct (lump sum, no DCA additions modeled in backtest())
# DCA gets initial*dca_pct + monthly contributions
total_invested = INITIAL_CAPITAL + dca_result["total_invested"] - dca_pct * INITIAL_CAPITAL
return {
"equity": combined,
"total_invested": total_invested,
"final_value": combined.iloc[-1],
}
# ── Reporting ─────────────────────────────────────────────────────────────────
def compute_metrics(result: dict, label: str) -> dict:
"""Compute all metrics for a DCA result."""
eq = result["equity"].dropna()
if len(eq) < 2:
return {"label": label, "error": "insufficient data"}
m = metrics.raw_summary(eq)
m["label"] = label
m["totalInvested"] = result["total_invested"]
m["finalValue"] = result["final_value"]
m["profit"] = result["final_value"] - result["total_invested"]
m["roiOnCapital"] = (result["final_value"] / result["total_invested"] - 1)
return m
def print_comparison_table(rows: list[dict], title: str):
"""Print a formatted comparison table."""
print(f"\n{'=' * 130}")
print(f" {title}")
print(f"{'=' * 130}")
header = (
f"{'Strategy':<35} {'Invested':>12} {'Final':>14} {'Profit':>14} "
f"{'ROI%':>8} {'Ann%':>8} {'Sharpe':>7} {'Sortino':>8} {'MaxDD%':>8} {'Calmar':>7}"
)
print(header)
print("-" * 130)
for r in rows:
if "error" in r:
print(f" {r['label']:<35} ERROR: {r['error']}")
continue
print(
f"{r['label']:<35} "
f"${r['totalInvested']:>11,.0f} "
f"${r['finalValue']:>13,.0f} "
f"${r['profit']:>13,.0f} "
f"{r['roiOnCapital']*100:>7.1f}% "
f"{r['annualizedReturn']*100:>7.1f}% "
f"{r['sharpeRatio']:>7.2f} "
f"{r['sortinoRatio']:>8.2f} "
f"{r['maxDrawdown']*100:>7.1f}% "
f"{r['calmarRatio']:>7.2f}"
)
# ── Main ──────────────────────────────────────────────────────────────────────
def main():
print("=" * 80)
print(" SMART DCA STRATEGY EVALUATION")
print(f" Period: {START_DATE} to {END_DATE}")
print(f" Initial capital: ${INITIAL_CAPITAL:,.0f}")
print(f" Monthly base DCA: ${MONTHLY_BASE:,.0f}")
print("=" * 80)
data = load_data()
vix = data["VIX"] if "VIX" in data.columns else None
print(f"\nData loaded: {data.shape[0]} trading days, {data.shape[1]} tickers")
print(f"Date range: {data.index[0].strftime('%Y-%m-%d')} to {data.index[-1].strftime('%Y-%m-%d')}")
# ── Part 1: DCA strategies across ETFs ────────────────────────────────
for ticker in DCA_TICKERS:
if ticker not in data.columns:
print(f"\nWARNING: {ticker} not in data, skipping.")
continue
prices = data[ticker].dropna()
if len(prices) < 252:
print(f"\nWARNING: {ticker} has <1 year of data, skipping.")
continue
results = []
# Lump-sum benchmark
ls = run_lump_sum(prices)
results.append(compute_metrics(ls, "Lump-sum (all day 1)"))
# Each DCA strategy
for name, fn in DCA_STRATEGIES.items():
r = run_dca_backtest(prices, fn, vix=vix)
results.append(compute_metrics(r, name))
print_comparison_table(results, f"DCA Strategies — {ticker}")
# Print DCA investment summary
print(f"\n Note: Fixed DCA total invested = ${results[1]['totalInvested']:,.0f} "
f"over {len(monthly_schedule(prices.index))-1} months + "
f"${INITIAL_CAPITAL:,.0f} initial")
# ── Part 2: V7+VT36 baseline ─────────────────────────────────────────
print(f"\n{'=' * 130}")
print(" V7+VT36 TREND-FOLLOWING BASELINE (lump-sum $100K, no DCA)")
print(f"{'=' * 130}")
v7_eq = run_v7_baseline(data)
v7_m = metrics.raw_summary(v7_eq)
print(
f" Ann: {v7_m['annualizedReturn']*100:.1f}%, "
f"Vol: {v7_m['annualizedVolatility']*100:.1f}%, "
f"Sharpe: {v7_m['sharpeRatio']:.2f}, "
f"Sortino: {v7_m['sortinoRatio']:.2f}, "
f"MaxDD: {v7_m['maxDrawdown']*100:.1f}%, "
f"Calmar: {v7_m['calmarRatio']:.2f}, "
f"Final: ${v7_eq.iloc[-1]:,.0f}"
)
# ── Part 3: Hybrid V7 + Smart DCA ────────────────────────────────────
hybrid_results = []
# 100% V7 baseline for comparison
hybrid_results.append({
"label": "100% V7+VT36 (no DCA)",
"totalInvested": INITIAL_CAPITAL,
"finalValue": v7_eq.iloc[-1],
"profit": v7_eq.iloc[-1] - INITIAL_CAPITAL,
"roiOnCapital": v7_eq.iloc[-1] / INITIAL_CAPITAL - 1,
**v7_m,
})
# Hybrid: 70% V7 + 30% VIX-scaled DCA into each leveraged ETF
for dca_ticker in ["TQQQ", "UPRO"]:
if dca_ticker not in data.columns:
continue
for strat_name, strat_fn in [("VIX-scaled", dca_vix_scaled),
("MA-deviation", dca_ma_deviation),
("RSI-based", dca_rsi_based)]:
r = run_hybrid_v7_dca(data, dca_ticker, strat_fn, v7_pct=0.70)
label = f"70%V7 + 30%{strat_name}->{dca_ticker}"
hybrid_results.append(compute_metrics(r, label))
print_comparison_table(hybrid_results, "Hybrid V7+VT36 + Smart DCA Combinations")
# ── Part 4: Best of each category summary ─────────────────────────────
print(f"\n{'=' * 130}")
print(" SUMMARY: Best strategy per ETF (by final portfolio value)")
print(f"{'=' * 130}")
for ticker in DCA_TICKERS:
if ticker not in data.columns:
continue
prices = data[ticker].dropna()
if len(prices) < 252:
continue
best_name = None
best_final = 0
for name, fn in DCA_STRATEGIES.items():
r = run_dca_backtest(prices, fn, vix=vix)
if r["final_value"] > best_final:
best_final = r["final_value"]
best_name = name
best_invested = r["total_invested"]
ls = run_lump_sum(prices)
ls_label = "Lump-sum"
if ls["final_value"] > best_final:
best_final = ls["final_value"]
best_name = ls_label
best_invested = ls["total_invested"]
roi = (best_final / best_invested - 1) * 100
print(f" {ticker:<6} => {best_name:<25} Final: ${best_final:>14,.0f} "
f"Invested: ${best_invested:>10,.0f} ROI: {roi:.1f}%")
# ── Part 5: Year-by-year breakdown for top strategies ─────────────────
print(f"\n{'=' * 130}")
print(" YEAR-BY-YEAR: VIX-scaled DCA into TQQQ vs SPY vs Lump-sum SPY")
print(f"{'=' * 130}")
for ticker in ["SPY", "TQQQ"]:
if ticker not in data.columns:
continue
prices = data[ticker].dropna()
vix_dca = run_dca_backtest(prices, dca_vix_scaled, vix=vix)
eq = vix_dca["equity"].dropna()
print(f"\n {ticker} — VIX-scaled DCA:")
print(f" {'Year':<8} {'Year-end Value':>16} {'YTD Return':>12}")
print(f" {'-'*40}")
years = sorted(set(eq.index.year))
for y in years:
year_data = eq[eq.index.year == y]
if len(year_data) < 2:
continue
ytd = year_data.iloc[-1] / year_data.iloc[0] - 1
print(f" {y:<8} ${year_data.iloc[-1]:>15,.0f} {ytd:>11.1%}")
print(f"\n{'=' * 80}")
print(" EVALUATION COMPLETE")
print(f"{'=' * 80}")
if __name__ == "__main__":
main()