quant/research/trend_rider_robustness.py

"""Robustness analysis for TrendRiderV3.

Run:
    uv run python -m research.trend_rider_robustness

The module is import-safe for tests; price loading only happens in ``main``.
"""
from __future__ import annotations

import argparse
import os
from dataclasses import asdict, dataclass
from itertools import product
from typing import Iterable

import numpy as np
import pandas as pd

from strategies.permanent import (
    ETF_UNIVERSE,
    GLOBAL_ETF_UNIVERSE,
    HK_ETF_UNIVERSE,
    PermanentV4,
    TREND_RIDER_V4_UNIVERSE,
    TrendRiderV3,
    TrendRiderV4,
)


@dataclass
class Evaluation:
    name: str
    start: str
    end: str
    days: int
    cagr: float
    volatility: float
    sharpe: float
    max_drawdown: float
    calmar: float
    final_multiple: float
    switches: int
    avg_daily_turnover: float
    avg_gross_exposure: float


def portfolio_returns(
    weights: pd.DataFrame,
    prices: pd.DataFrame,
    transaction_cost: float = 0.001,
) -> pd.Series:
    aligned = weights.reindex(index=prices.index, columns=prices.columns).fillna(0.0)
    returns = prices.pct_change(fill_method=None).fillna(0.0)
    gross = (returns * aligned).sum(axis=1)
    turnover = aligned.diff().abs().sum(axis=1).fillna(0.0)
    return gross - turnover * transaction_cost


def evaluate_weights(
    name: str,
    weights: pd.DataFrame,
    prices: pd.DataFrame,
    transaction_cost: float = 0.001,
    start: str | None = None,
    end: str | None = None,
) -> Evaluation:
    prices = prices.reindex(columns=weights.columns).dropna(how="all")
    returns = portfolio_returns(weights, prices, transaction_cost=transaction_cost)
    if start:
        returns = returns[returns.index >= start]
        weights = weights[weights.index >= start]
    if end:
        returns = returns[returns.index <= end]
        weights = weights[weights.index <= end]
    if returns.empty:
        raise ValueError(f"No returns available for {name}")

    equity = (1.0 + returns).cumprod()
    span_years = max((returns.index[-1] - returns.index[0]).days / 365.25, 1 / 252)
    cagr = float(equity.iloc[-1] ** (1 / span_years) - 1)
    vol = float(returns.std(ddof=1) * np.sqrt(252)) if len(returns) > 1 else 0.0
    sharpe = float(returns.mean() / returns.std(ddof=1) * np.sqrt(252)) if returns.std(ddof=1) > 0 else 0.0
    drawdown = equity / equity.cummax() - 1.0
    max_dd = float(drawdown.min())
    turnover = weights.reindex(returns.index).fillna(0.0).diff().abs().sum(axis=1).fillna(0.0)
    gross_exposure = weights.reindex(returns.index).fillna(0.0).abs().sum(axis=1)

    return Evaluation(
        name=name,
        start=str(returns.index[0].date()),
        end=str(returns.index[-1].date()),
        days=int(len(returns)),
        cagr=cagr,
        volatility=vol,
        sharpe=sharpe,
        max_drawdown=max_dd,
        calmar=float(cagr / abs(max_dd)) if max_dd < 0 else 0.0,
        final_multiple=float(equity.iloc[-1]),
        switches=int((turnover > 0.01).sum()),
        avg_daily_turnover=float(turnover.mean()),
        avg_gross_exposure=float(gross_exposure.mean()),
    )


def evaluate_strategy(
    name: str,
    strategy: TrendRiderV3,
    prices: pd.DataFrame,
    transaction_cost: float = 0.001,
    start: str | None = None,
    end: str | None = None,
) -> tuple[Evaluation, pd.DataFrame]:
    weights = strategy.generate_signals(prices)
    result = evaluate_weights(
        name,
        weights,
        prices[weights.columns],
        transaction_cost=transaction_cost,
        start=start,
        end=end,
    )
    return result, weights


def default_parameter_grid() -> list[dict]:
    return [
        {
            "vol_enter": vol_enter,
            "dd_stop": dd_stop,
            "peak_enter": peak_enter,
            "mom_lookback": mom,
        }
        for vol_enter, dd_stop, peak_enter, mom in product(
            [0.12, 0.14, 0.16],
            [0.04, 0.05, 0.07],
            [0.01, 0.02, 0.03],
            [42, 63, 84],
        )
    ]


def parameter_sweep(
    prices: pd.DataFrame,
    variants: Iterable[dict] | None = None,
    transaction_cost: float = 0.001,
    start: str | None = None,
    end: str | None = None,
) -> pd.DataFrame:
    rows = []
    for kwargs in variants or default_parameter_grid():
        strategy = TrendRiderV3(**kwargs)
        result, _ = evaluate_strategy(
            "param",
            strategy,
            prices,
            transaction_cost=transaction_cost,
            start=start,
            end=end,
        )
        row = asdict(result)
        row.update(kwargs)
        rows.append(row)
    return pd.DataFrame(rows).sort_values("cagr", ascending=False).reset_index(drop=True)


def annual_returns(returns: pd.Series) -> pd.Series:
    return (1.0 + returns).groupby(returns.index.year).prod() - 1.0


def buy_hold_weights(prices: pd.DataFrame, symbol: str) -> pd.DataFrame:
    weights = pd.DataFrame(0.0, index=prices.index, columns=[symbol])
    if symbol in prices.columns:
        first_valid = prices[symbol].first_valid_index()
        if first_valid is not None:
            weights.loc[weights.index >= first_valid, symbol] = 1.0
    return weights


def candidate_weights(prices: pd.DataFrame) -> dict[str, pd.DataFrame]:
    baseline = TrendRiderV3().generate_signals(prices)
    diversified = TrendRiderV4().generate_signals(prices)
    shy_defense = TrendRiderV3(risk_off=("GLD", "DBC", "SHY")).generate_signals(prices)
    cash_defense = TrendRiderV3(risk_off=("SHY",)).generate_signals(prices)
    permanent = PermanentV4().generate_signals(prices)

    cols = sorted(set(baseline.columns) | set(permanent.columns))
    base_aligned = baseline.reindex(columns=cols).fillna(0.0)
    perm_aligned = permanent.reindex(index=baseline.index, columns=cols).fillna(0.0)

    return {
        "TrendRiderV3-US": baseline,
        "TrendRiderV4": diversified,
        "RiskOff+SHY": shy_defense,
        "RiskOff=SHY": cash_defense,
        "Blend75_TR25_PermanentV4": base_aligned * 0.75 + perm_aligned * 0.25,
        "Blend50_TR50_PermanentV4": base_aligned * 0.50 + perm_aligned * 0.50,
        "SPY Buy&Hold": buy_hold_weights(prices, "SPY"),
        "QQQ Buy&Hold": buy_hold_weights(prices, "QQQ"),
    }


def load_price_panel() -> pd.DataFrame:
    from research.permanent_yearly import load_etfs

    tickers = sorted(set(ETF_UNIVERSE + GLOBAL_ETF_UNIVERSE + HK_ETF_UNIVERSE + TREND_RIDER_V4_UNIVERSE))
    etfs = load_etfs(tickers, start="2013-06-01")
    nyse_index = etfs["SPY"].dropna().index
    return etfs.reindex(nyse_index).ffill()


def _format_percent_frame(df: pd.DataFrame, cols: list[str]) -> pd.DataFrame:
    out = df.copy()
    for col in cols:
        out[col] = out[col].map(lambda x: f"{x * 100:,.2f}%")
    return out


def main() -> None:
    parser = argparse.ArgumentParser(description="TrendRiderV3 robustness report")
    parser.add_argument("--start", default="2015-01-01")
    parser.add_argument("--end", default=None)
    parser.add_argument("--transaction-cost", type=float, default=0.001)
    parser.add_argument("--out-dir", default="data")
    args = parser.parse_args()

    prices = load_price_panel()
    if args.end:
        prices = prices[prices.index <= args.end]

    print(f"ETF panel: {prices.index.min().date()} to {prices.index.max().date()} | {prices.shape[1]} columns")

    rows = []
    weight_map = candidate_weights(prices)
    for name, weights in weight_map.items():
        rows.append(asdict(evaluate_weights(
            name,
            weights,
            prices[weights.columns],
            transaction_cost=args.transaction_cost,
            start=args.start,
            end=args.end,
        )))
    summary = pd.DataFrame(rows).sort_values(["max_drawdown", "cagr"], ascending=[False, False])

    annual_map = {}
    for name, weights in weight_map.items():
        returns = portfolio_returns(
            weights,
            prices[weights.columns],
            transaction_cost=args.transaction_cost,
        )
        returns = returns[returns.index >= args.start]
        if args.end:
            returns = returns[returns.index <= args.end]
        annual_map[name] = annual_returns(returns)
    years = pd.DataFrame(annual_map)

    sweep = parameter_sweep(
        prices,
        transaction_cost=args.transaction_cost,
        start=args.start,
        end=args.end,
    )
    cost_rows = []
    baseline_weights = weight_map["TrendRiderV3-US"]
    for cost in [0.0, 0.001, 0.002, 0.005, 0.01]:
        result = evaluate_weights(
            f"cost_{cost:.3f}",
            baseline_weights,
            prices[baseline_weights.columns],
            transaction_cost=cost,
            start=args.start,
            end=args.end,
        )
        row = asdict(result)
        row["transaction_cost"] = cost
        cost_rows.append(row)
    costs = pd.DataFrame(cost_rows)

    os.makedirs(args.out_dir, exist_ok=True)
    summary_path = os.path.join(args.out_dir, "trend_rider_robustness_summary.csv")
    years_path = os.path.join(args.out_dir, "trend_rider_robustness_years.csv")
    sweep_path = os.path.join(args.out_dir, "trend_rider_robustness_params.csv")
    costs_path = os.path.join(args.out_dir, "trend_rider_robustness_costs.csv")
    summary.to_csv(summary_path, index=False)
    years.to_csv(years_path)
    sweep.to_csv(sweep_path, index=False)
    costs.to_csv(costs_path, index=False)

    metric_cols = ["cagr", "volatility", "sharpe", "max_drawdown", "calmar", "final_multiple", "switches"]
    print("\nCandidate summary")
    print(_format_percent_frame(summary[["name", *metric_cols]], ["cagr", "volatility", "max_drawdown"]).to_string(index=False))

    print("\nAnnual returns")
    annual_cols = [c for c in ["TrendRiderV3-US", "TrendRiderV4", "SPY Buy&Hold", "QQQ Buy&Hold"] if c in years.columns]
    print(_format_percent_frame(years[annual_cols].reset_index(), annual_cols).to_string(index=False))

    quant = sweep[["cagr", "max_drawdown", "sharpe", "final_multiple"]].quantile([0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0])
    print("\nParameter-neighborhood quantiles")
    print(_format_percent_frame(quant, ["cagr", "max_drawdown"]).to_string())

    print("\nCost sensitivity")
    print(_format_percent_frame(costs[["transaction_cost", "cagr", "max_drawdown", "final_multiple"]], ["transaction_cost", "cagr", "max_drawdown"]).to_string(index=False))

    print(f"\nSaved: {summary_path}")
    print(f"Saved: {years_path}")
    print(f"Saved: {sweep_path}")
    print(f"Saved: {costs_path}")


if __name__ == "__main__":
    main()