Files
quant/research/trend_rider_v6_eval.py
Gahow Wang 541f7bcf5b research: add strategy evaluation and exploration scripts
Add 28 research scripts covering DCA simulation, momentum evaluation,
Sharpe optimization, trend rider analysis, and US fundamentals exploration.
2026-05-14 12:54:08 +08:00

198 lines
7.9 KiB
Python

"""Evaluate TrendRiderV6 vs V5 baseline.
Run:
uv run python -m research.trend_rider_v6_eval
"""
from __future__ import annotations
import argparse
import os
import sys
from dataclasses import asdict
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from research.permanent_yearly import load_long_stock_history, load_etfs, ETF_CACHE
from research.trend_rider_robustness import (
buy_hold_weights,
evaluate_weights,
portfolio_returns,
)
from strategies.permanent import TrendRiderV3, ETF_UNIVERSE
from strategies.trend_rider_v5 import TrendRiderV5
from strategies.trend_rider_v6 import TrendRiderV6
from strategies.factor_combo import FactorComboStrategy, SIGNAL_REGISTRY
from strategies.recovery_momentum import RecoveryMomentumStrategy
IS_START = "2015-01-02"
IS_END = "2020-12-31"
OOS_START = "2021-01-01"
OOS_END = "2026-05-07"
def _fmt(x: float) -> str:
return f"{x*100:7.2f}%"
def print_eval(label: str, ev) -> None:
print(
f" {label:<42s} "
f"CAGR {_fmt(ev.cagr)} Vol {_fmt(ev.volatility)} "
f"Sharpe {ev.sharpe:5.2f} MDD {_fmt(ev.max_drawdown)} "
f"Calmar {ev.calmar:5.2f} X {ev.final_multiple:6.2f} "
f"Sw {ev.switches:5d} Turn {ev.avg_daily_turnover*100:5.2f}%"
)
def load_combined_panel() -> pd.DataFrame:
"""ETFs + S&P 500 stock panel anchored to SPY trading calendar."""
# ETFs
etf_tickers = sorted(set(ETF_UNIVERSE) | {"SPY", "QQQ", "TQQQ", "UPRO",
"GLD", "DBC", "SHY"})
etfs = load_etfs(etf_tickers, start="2013-06-01")
nyse = etfs["SPY"].dropna().index
# Stocks (large local cache: data/us_long.csv)
stock_cache = "data/us_long.csv"
if not os.path.exists(stock_cache):
raise FileNotFoundError(f"Missing {stock_cache} — run RecoveryMomentum once first.")
stocks = pd.read_csv(stock_cache, index_col=0, parse_dates=True)
# Drop any stock columns that overlap with ETF columns to avoid clash
overlap = set(stocks.columns) & set(etfs.columns)
if overlap:
stocks = stocks.drop(columns=list(overlap))
panel = etfs.reindex(nyse).ffill()
panel = panel.join(stocks.reindex(nyse).ffill(), how="left")
return panel
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--transaction-cost", type=float, default=0.001)
parser.add_argument("--out-dir", default="data")
args = parser.parse_args()
os.makedirs(args.out_dir, exist_ok=True)
panel = load_combined_panel()
print(f"Combined panel: {panel.index.min().date()}{panel.index.max().date()}, "
f"{panel.shape[1]} columns ({len([c for c in panel.columns if c not in ETF_UNIVERSE])} stocks)")
# Stock-only universe (drop ETFs from the picking universe)
etf_set = set(ETF_UNIVERSE) | {"QQQ", "TQQQ", "UPRO", "GLD", "DBC", "SHY", "SPY",
"YINN", "CHAU", "7200.HK", "7500.HK"}
stock_universe = [c for c in panel.columns if c not in etf_set]
candidates = {}
candidates["V5 (ETF-only baseline)"] = TrendRiderV5()
# V6 regime mode: tier 2 = TQQQ, tier 1 = stocks
candidates["V6 regime_mode top5"] = TrendRiderV6(
signal_name="rec_mfilt+deep_upvol", top_n=5, tier_mode="regime",
stock_universe=stock_universe,
)
candidates["V6 regime_mode top10"] = TrendRiderV6(
signal_name="rec_mfilt+deep_upvol", top_n=10, tier_mode="regime",
stock_universe=stock_universe,
)
candidates["V6 regime_mode mom7m top10"] = TrendRiderV6(
signal_name="mom7m+rec126", top_n=10, tier_mode="regime",
stock_universe=stock_universe,
)
candidates["V6 regime_mode ma200+mom7m top10"] = TrendRiderV6(
signal_name="ma200+mom7m+rec126", top_n=10, tier_mode="regime",
stock_universe=stock_universe,
)
# V6 blend mode best (rec_mfilt top10 + 50% TQQQ)
candidates["V6 blend rec_mfilt top10 +50%TQQQ"] = TrendRiderV6(
signal_name="rec_mfilt+deep_upvol", top_n=10,
tier2_leverage_overlay=0.50,
stock_universe=stock_universe,
)
# Concentrated stock pick: top 5
candidates["V6 blend top5 +50%TQQQ"] = TrendRiderV6(
signal_name="rec_mfilt+deep_upvol", top_n=5,
tier2_leverage_overlay=0.50,
stock_universe=stock_universe,
)
print("\n=== Generating signals ===")
weights_map = {}
for name, strat in candidates.items():
print(f" ... {name}")
weights_map[name] = strat.generate_signals(panel)
print("\n=== FULL period (2015-01 → 2026-05) ===")
rows = []
for name, w in weights_map.items():
ev = evaluate_weights(name, w, panel[w.columns], args.transaction_cost,
IS_START, OOS_END)
rows.append({**asdict(ev), "name": name})
print_eval(name, ev)
spy_w = buy_hold_weights(panel, "SPY")
qqq_w = buy_hold_weights(panel, "QQQ")
print_eval("SPY B&H", evaluate_weights("SPY", spy_w, panel[spy_w.columns], 0.0, IS_START, OOS_END))
print_eval("QQQ B&H", evaluate_weights("QQQ", qqq_w, panel[qqq_w.columns], 0.0, IS_START, OOS_END))
print("\n=== IS (2015 → 2020) ===")
for name, w in weights_map.items():
ev = evaluate_weights(name, w, panel[w.columns], args.transaction_cost, IS_START, IS_END)
print_eval(name, ev)
print("\n=== OOS (2021 → 2026-05) ===")
for name, w in weights_map.items():
ev = evaluate_weights(name, w, panel[w.columns], args.transaction_cost, OOS_START, OOS_END)
print_eval(name, ev)
# ----- V5 + V6 blends — uncorrelated alpha mixing -----
print("\n=== V5 + V6 BLENDS (risk-parity-ish 50/50 and 70/30) ===")
v5_w = weights_map["V5 (ETF-only baseline)"]
best_v6_name = "V6 regime_mode top10"
if best_v6_name in weights_map:
v6_w = weights_map[best_v6_name]
all_cols = sorted(set(v5_w.columns) | set(v6_w.columns))
v5_a = v5_w.reindex(columns=all_cols).fillna(0.0)
v6_a = v6_w.reindex(index=v5_a.index, columns=all_cols).fillna(0.0)
for w5, w6 in [(0.50, 0.50), (0.30, 0.70), (0.70, 0.30), (0.40, 0.60)]:
blend = v5_a * w5 + v6_a * w6
label = f"Blend V5={w5:.0%} + V6={w6:.0%}"
for window_name, (s, e) in {"FULL": (IS_START, OOS_END),
"IS": (IS_START, IS_END),
"OOS": (OOS_START, OOS_END)}.items():
ev = evaluate_weights(label, blend, panel[blend.columns],
args.transaction_cost, s, e)
print(f" [{window_name}] ", end="")
print_eval(label, ev)
# Correlation between V5 and V6 daily returns (full)
v5_rets = portfolio_returns(v5_a, panel[v5_a.columns], args.transaction_cost)
v6_rets = portfolio_returns(v6_a, panel[v6_a.columns], args.transaction_cost)
common = v5_rets.index.intersection(v6_rets.index)
v5_rets, v6_rets = v5_rets.loc[common], v6_rets.loc[common]
v5_rets = v5_rets[(v5_rets.index >= IS_START) & (v5_rets.index <= OOS_END)]
v6_rets = v6_rets[(v6_rets.index >= IS_START) & (v6_rets.index <= OOS_END)]
corr = float(v5_rets.corr(v6_rets))
print(f"\n V5 vs {best_v6_name} daily-return correlation = {corr:.3f}")
print("\n=== Annual returns ===")
annuals = {}
for name, w in weights_map.items():
rets = portfolio_returns(w, panel[w.columns], args.transaction_cost)
rets = rets[(rets.index >= IS_START) & (rets.index <= OOS_END)]
annuals[name] = (1.0 + rets).groupby(rets.index.year).prod() - 1.0
annual_df = pd.DataFrame(annuals)
annual_df = annual_df.map(lambda x: f"{x*100:6.1f}%")
print(annual_df.to_string())
pd.DataFrame(rows).to_csv(os.path.join(args.out_dir, "v6_eval_full.csv"), index=False)
if __name__ == "__main__":
main()