quant/main.py

import argparse

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import data_manager
import factor_attribution
import metrics
import universe_history as uh
from strategies.adaptive_momentum import AdaptiveMomentumStrategy
from strategies.buy_and_hold import BuyAndHoldStrategy
from strategies.dual_momentum import DualMomentumStrategy
from strategies.inverse_vol import InverseVolatilityStrategy
from strategies.mean_reversion import MeanReversionStrategy
from strategies.momentum import MomentumStrategy
from strategies.momentum_quality import MomentumQualityStrategy
from strategies.multi_factor import MultiFactorStrategy
from strategies.recovery_momentum import RecoveryMomentumStrategy
from strategies.trend_following import TrendFollowingStrategy
from universe import UNIVERSES


# ---------------------------------------------------------------------------
# Backtest engine
# ---------------------------------------------------------------------------

def backtest(
    strategy,
    data: pd.DataFrame,
    initial_capital: float = 100_000,
    transaction_cost: float = 0.001,
    fixed_fee: float = 0.0,
    fee_base: float = 0.0,
    fee_per_share: float = 0.0,
    open_data: pd.DataFrame | None = None,
) -> pd.Series:
    """
    Vectorized backtest.

    Parameters
    ----------
    strategy : Strategy
        Any class implementing generate_signals(data) → DataFrame of weights.
    data : pd.DataFrame
        Adjusted close prices, one column per asset.
    initial_capital : float
        Starting portfolio value.
    transaction_cost : float
        One-way cost per unit of turnover (e.g. 0.001 = 10 bps).
    fixed_fee : float
        Floor of the per-trade fee (e.g. 2.0 = $2 minimum per buy/sell).
        With fee_per_share=0 (default), this is also the actual per-trade fee.
    fee_base : float
        Fixed component of a per-share tiered fee schedule. The actual
        per-trade fee is ``max(fixed_fee, fee_base + fee_per_share * shares)``.
    fee_per_share : float
        Per-share variable component of the tiered fee (e.g. 0.009 = $0.009/share).
        With fee_base=1.88 + fee_per_share=0.009 + fixed_fee=2.0 you get an
        IBKR-style schedule: max(2, 1.88 + 0.009 * shares).
    open_data : pd.DataFrame, optional
        Open prices. When provided, enables open-to-close execution mode.

    Returns
    -------
    pd.Series
        Daily equity curve.
    """
    if open_data is not None:
        # Open-to-close mode:
        # Strategy's shift(1) on open prices gives: weights[t] = f(open_{t-1})
        # But open_t is known at morning of day t, so undo shift to get f(open_t)
        # Then execute at close_t, earning close_t → close_{t+1}
        weights = strategy.generate_signals(open_data)
        weights = weights.shift(-1).fillna(0.0)
        weights = weights.reindex(data.index).fillna(0.0)
        positions = weights

        # Returns earned: close[t] → close[t+1], weighted by positions decided at open[t]
        close_returns = data.pct_change().fillna(0.0)
        portfolio_returns = (close_returns * positions.shift(1).fillna(0.0)).sum(axis=1)
    else:
        # Classic close-to-close mode
        weights = strategy.generate_signals(data)
        weights = weights.reindex(data.index).fillna(0.0)
        positions = weights

        daily_returns = data.pct_change().fillna(0.0)
        portfolio_returns = (daily_returns * positions).sum(axis=1)

    # Turnover cost: sum of absolute weight changes each day
    turnover = positions.diff().abs().sum(axis=1).fillna(0.0)
    portfolio_returns -= turnover * transaction_cost

    # Per-trade fee. Supports both flat ($2/trade) and tiered (IBKR-style)
    # schedules: fee = max(fixed_fee, fee_base + fee_per_share * shares).
    if fixed_fee > 0 or fee_base > 0 or fee_per_share > 0:
        weight_changes = positions.diff().fillna(0.0)
        equity_running = (1 + portfolio_returns).cumprod() * initial_capital
        eq_prev = equity_running.shift(1).fillna(initial_capital)

        if fee_per_share > 0:
            # Convert per-ticker weight change into share count traded.
            # dollar_traded[i, t] = |w[i,t] - w[i,t-1]| * equity[t-1]
            # shares_traded[i, t] = dollar_traded / price[i, t]
            dollar_traded = weight_changes.abs().mul(eq_prev, axis=0)
            shares_traded = dollar_traded.div(data).replace(
                [np.inf, -np.inf], 0.0,
            ).fillna(0.0)
            per_trade_fee = (fee_base + fee_per_share * shares_traded).clip(
                lower=fixed_fee,
            )
            trade_mask = weight_changes.abs() > 1e-8
            per_trade_fee = per_trade_fee.where(trade_mask, 0.0)
            daily_fee = per_trade_fee.sum(axis=1)
        else:
            n_trades = (weight_changes.abs() > 1e-8).sum(axis=1)
            daily_fee = n_trades * fixed_fee

        fee_impact = daily_fee / eq_prev
        portfolio_returns -= fee_impact

    equity = (1 + portfolio_returns).cumprod() * initial_capital
    return equity


# ---------------------------------------------------------------------------
# Visualization
# ---------------------------------------------------------------------------

def plot_results(results: pd.DataFrame) -> None:
    """Two-panel chart: equity curves (top) + drawdowns (bottom)."""
    # Compute drawdowns
    drawdowns = results.div(results.cummax()) - 1

    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 9), sharex=True,
                                    gridspec_kw={"height_ratios": [3, 1]})

    for col in results.columns:
        ax1.plot(results.index, results[col], label=col, linewidth=1.5)
    ax1.set_title("Strategy Comparison — Equity Curves", fontsize=14)
    ax1.set_ylabel("Portfolio Value ($)")
    ax1.legend(loc="upper left")
    ax1.grid(True, alpha=0.3)

    for col in drawdowns.columns:
        ax2.plot(drawdowns.index, drawdowns[col] * 100, label=col, linewidth=1.0)
    ax2.set_title("Drawdowns")
    ax2.set_ylabel("Drawdown (%)")
    ax2.set_xlabel("Date")
    ax2.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()


# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------

def main() -> None:
    parser = argparse.ArgumentParser(description="Run strategy backtest")
    parser.add_argument(
        "--market", choices=UNIVERSES.keys(), default="us",
        help="Market universe to backtest (default: us)",
    )
    parser.add_argument(
        "--capital", type=float, default=None,
        help="Initial capital (default: 10000)",
    )
    parser.add_argument(
        "--top-n", type=int, default=None,
        help="Number of stocks for selective strategies (default: ~10%% of universe)",
    )
    parser.add_argument(
        "--years", type=int, default=None,
        help="Limit backtest to last N years of data",
    )
    parser.add_argument(
        "--no-plot", action="store_true",
        help="Skip plotting charts",
    )
    parser.add_argument(
        "--fixed-fee", type=float, default=0.0,
        help="Fixed dollar cost per trade, e.g. 2.0 means $2 per buy or sell",
    )
    parser.add_argument(
        "--execution", choices=["close", "open-close"], default="close",
        help="Execution mode: 'close' (default, signal & execute on close) or "
             "'open-close' (signal on morning open, execute at close)",
    )
    parser.add_argument(
        "--attribution", action="store_true",
        help="Run factor attribution after performance metrics",
    )
    parser.add_argument(
        "--attribution-model", choices=["capm", "ff5", "ff5plus", "all"], default="all",
        help="Factor model selection for attribution output",
    )
    parser.add_argument(
        "--attribution-export", default=None,
        help="Directory to export factor attribution CSVs",
    )
    args = parser.parse_args()
    initial_capital = args.capital if args.capital is not None else 10_000
    use_open = args.execution == "open-close"

    universe = UNIVERSES[args.market]
    tickers = universe["fetch"]()
    benchmark = universe["benchmark"]
    benchmark_label = universe["benchmark_label"]

    # PIT universe: include all historical index members for US market
    pit_intervals = None
    if args.market == "us":
        pit_intervals = uh.load_sp500_history()
        historical_tickers = uh.all_tickers_ever(pit_intervals)
        all_tickers = sorted(set(tickers + historical_tickers + [benchmark]))
        print(f"--- PIT universe: {len(all_tickers)} tickers (current + historical members) ---")
    else:
        all_tickers = sorted(set(tickers + [benchmark]))

    result = data_manager.update(args.market, all_tickers, with_open=use_open)
    if use_open:
        data, open_data = result
    else:
        data = result
        open_data = None

    if args.years:
        cutoff = data.index[-1] - pd.DateOffset(years=args.years)
        data = data[data.index >= cutoff]
        if open_data is not None:
            open_data = open_data[open_data.index >= cutoff]
        print(f"--- Sliced to last {args.years} years: {data.index[0].date()} to {data.index[-1].date()} ---")

    # Apply PIT mask: NaN out prices for non-member dates
    if pit_intervals is not None:
        print("--- Applying PIT membership mask (survivorship-bias fix) ---")
        data = uh.mask_prices(data, pit_intervals)
        if open_data is not None:
            open_data = uh.mask_prices(open_data, pit_intervals)

    # Filter tickers to only those with any valid data
    if pit_intervals is not None:
        tickers = [t for t in data.columns if t != benchmark and data[t].notna().any()]
    else:
        tickers = [t for t in tickers if t in data.columns]
    print(f"--- Universe: {len(tickers)} stocks + {benchmark} benchmark ---")

    top_n = args.top_n if args.top_n else max(5, len(tickers) // 10)
    print(f"--- Selective strategies will pick top {top_n} stocks ---")
    if args.fixed_fee > 0:
        print(f"--- Fixed fee: ${args.fixed_fee:.2f} per trade ---")
    if use_open:
        print(f"--- Execution: open-close (signal on open, execute at close) ---")

    # Build strategy tuples: (strategy, close_data, open_data_or_None)
    open_tickers = open_data[tickers] if open_data is not None else None
    open_all = open_data if open_data is not None else None

    strategies = {
        # --- Original strategies ---
        "Buy & Hold (EW)":    (BuyAndHoldStrategy(),                                          data[tickers], open_tickers),
        "Momentum":           (MomentumStrategy(lookback=252, skip=21, top_n=top_n),           data[tickers], open_tickers),
        "Inverse Volatility": (InverseVolatilityStrategy(vol_window=20),                       data[tickers], open_tickers),
        "Multi-Factor":       (MultiFactorStrategy(tickers=tickers, benchmark=benchmark, top_n=top_n), data, open_all),
        # --- New strategies ---
        "Mean Reversion":     (MeanReversionStrategy(top_n=top_n),                                data[tickers], open_tickers),
        "Trend Following":    (TrendFollowingStrategy(ma_window=150, momentum_period=126, top_n=top_n), data[tickers], open_tickers),
        "Dual Momentum":      (DualMomentumStrategy(top_n=top_n),                               data[tickers], open_tickers),
        "Momentum+Quality":   (MomentumQualityStrategy(momentum_period=252, skip=21, top_n=top_n), data[tickers], open_tickers),
        "Mom+InvVol":         (AdaptiveMomentumStrategy(top_n=top_n),                          data[tickers], open_tickers),
        "Recovery+Mom Top20": (RecoveryMomentumStrategy(top_n=min(20, top_n)),                  data[tickers], open_tickers),
        "Recovery+Mom Top10": (RecoveryMomentumStrategy(top_n=10),                              data[tickers], open_tickers),
    }

    results: dict[str, pd.Series] = {}
    for name, (strategy, strat_data, strat_open) in strategies.items():
        print(f"\nRunning: {name}")
        results[name] = backtest(strategy, strat_data, initial_capital=initial_capital,
                                  fixed_fee=args.fixed_fee, open_data=strat_open)

    # Add benchmark (normalized to same initial capital)
    bench = data[benchmark].dropna()
    results[benchmark_label] = (bench / bench.iloc[0]) * initial_capital

    results_df = pd.DataFrame(results)

    # --- Performance metrics ---
    for name, equity in results_df.items():
        eq = equity.dropna()
        if len(eq) < 2:
            print(f"\n  {name}: insufficient data, skipping")
            continue
        metrics.summary(eq, name=name)

    if args.attribution:
        summary_df, loadings_df = factor_attribution.attribute_strategies(
            results_df=results_df,
            benchmark_label=benchmark_label,
            benchmark=benchmark,
            price_data=data,
            market=args.market,
            model_selection=args.attribution_model,
        )
        factor_attribution.print_attribution_summary(summary_df)
        if args.attribution_export:
            factor_attribution.export_attribution(summary_df, loadings_df, args.attribution_export)
            print(f"Attribution CSVs written to {args.attribution_export}")

    # --- Visualization ---
    if not args.no_plot:
        plot_results(results_df.dropna())


if __name__ == "__main__":
    main()