quant/research/us_combo_sweep.py

import numpy as np
import pandas as pd

from research.us_alpha_report import summarize_equity_window
from research.us_fundamentals import build_exploratory_fundamental_score
from strategies.recovery_momentum import RecoveryMomentumStrategy


TRADING_DAYS_PER_MONTH = 21


def xsec_rank(df: pd.DataFrame, ascending: bool = True) -> pd.DataFrame:
    return df.rank(axis=1, pct=True, na_option="keep", ascending=ascending)


def apply_filter_threshold(score: pd.DataFrame, filter_rank: pd.DataFrame, min_rank: float) -> pd.DataFrame:
    aligned_filter = filter_rank.reindex(index=score.index, columns=score.columns)
    return score.where(aligned_filter >= min_rank)


def weighted_rank_blend(factors: dict[str, pd.DataFrame], weights: dict[str, float]) -> pd.DataFrame:
    total = None
    total_weight = 0.0
    for name, weight in weights.items():
        rank = xsec_rank(factors[name])
        component = rank * weight
        total = component if total is None else total.add(component, fill_value=0.0)
        total_weight += weight
    return total / total_weight if total_weight > 0 else total


def build_price_factor_pack(close: pd.DataFrame) -> dict[str, pd.DataFrame]:
    monthly_ret = close.pct_change(TRADING_DAYS_PER_MONTH)
    rolling_max = close.rolling(252, min_periods=252).max()
    drawdown = close / rolling_max - 1.0

    return {
        "recovery": close / close.rolling(63, min_periods=63).min() - 1.0,
        "momentum_12_1": close.shift(21).pct_change(231),
        "consistency": monthly_ret.gt(0).rolling(252, min_periods=252).mean(),
        "inv_drawdown": -drawdown.rolling(252, min_periods=252).min(),
        "low_vol": -close.pct_change().rolling(60, min_periods=60).std(),
        "dip_21": -close.pct_change(21),
        "value_proxy": close.rolling(250, min_periods=250).min() / close,
        "uptrend": (close > close.rolling(150, min_periods=150).mean()).astype(float),
    }


def _monthly_score_weights(score: pd.DataFrame, top_n: int, rebal_freq: int = TRADING_DAYS_PER_MONTH) -> pd.DataFrame:
    score = score.sort_index()
    n_valid = score.notna().sum(axis=1)
    enough = n_valid >= top_n
    rank = score.rank(axis=1, ascending=False, na_option="bottom", method="first")
    top_mask = (rank <= top_n) & enough.to_numpy().reshape(-1, 1)

    raw = top_mask.astype(float)
    row_sums = raw.sum(axis=1).replace(0.0, np.nan)
    weights = raw.div(row_sums, axis=0).fillna(0.0)

    first_valid = int(np.argmax(score.notna().any(axis=1).to_numpy())) if score.notna().any().any() else 0
    rebal_mask = pd.Series(False, index=score.index)
    rebal_mask.iloc[list(range(first_valid, len(score), rebal_freq))] = True
    weights[~rebal_mask] = np.nan
    weights = weights.ffill().fillna(0.0)
    weights.iloc[:first_valid] = 0.0
    return weights.shift(1).fillna(0.0)


def _backtest_from_weights(
    close: pd.DataFrame,
    weights: pd.DataFrame,
    initial_capital: float = 10_000.0,
    transaction_cost: float = 0.001,
) -> pd.Series:
    daily_returns = close.pct_change(fill_method=None).fillna(0.0)
    portfolio_returns = (daily_returns * weights.reindex(close.index).fillna(0.0)).sum(axis=1)
    turnover = weights.diff().abs().sum(axis=1).fillna(0.0)
    portfolio_returns -= turnover * transaction_cost
    return (1.0 + portfolio_returns).cumprod() * initial_capital


def _equity_to_yearly_returns(equity: pd.Series) -> pd.Series:
    rows = {}
    for year in range(int(equity.index.min().year), int(equity.index.max().year) + 1):
        window = equity.loc[(equity.index >= pd.Timestamp(year=year, month=1, day=1)) & (equity.index <= pd.Timestamp(year=year, month=12, day=31))]
        if len(window.dropna()) >= 2:
            rows[year] = window.dropna().iloc[-1] / window.dropna().iloc[0] - 1.0
    return pd.Series(rows, name=equity.name)


def _cagr(equity: pd.Series) -> float:
    clean = equity.dropna()
    years = (clean.index[-1] - clean.index[0]).days / 365.25
    if years <= 0:
        return np.nan
    return (clean.iloc[-1] / clean.iloc[0]) ** (1 / years) - 1


def _max_dd(equity: pd.Series) -> float:
    clean = equity.dropna()
    return (clean / clean.cummax() - 1.0).min()


def _candidate_scores(price_factors: dict[str, pd.DataFrame], fundamental_score: pd.DataFrame) -> dict[str, pd.DataFrame]:
    factors = {**price_factors, "fundamental": fundamental_score}
    base_rm = weighted_rank_blend(factors, {"recovery": 0.5, "momentum_12_1": 0.5})
    candidates = {
        "rm_fund_filter_50": apply_filter_threshold(base_rm, xsec_rank(fundamental_score), min_rank=0.50),
        "rm_fund_filter_70": apply_filter_threshold(base_rm, xsec_rank(fundamental_score), min_rank=0.70),
        "rm_fund_tilt_20": weighted_rank_blend(factors, {"recovery": 0.4, "momentum_12_1": 0.4, "fundamental": 0.2}),
        "rm_fund_tilt_35": weighted_rank_blend(factors, {"recovery": 0.325, "momentum_12_1": 0.325, "fundamental": 0.35}),
        "rm_quality_fund": weighted_rank_blend(
            factors,
            {"recovery": 0.35, "momentum_12_1": 0.35, "consistency": 0.10, "inv_drawdown": 0.10, "fundamental": 0.10},
        ),
        "rm_quality_lowvol_fund": weighted_rank_blend(
            factors,
            {"recovery": 0.30, "momentum_12_1": 0.25, "consistency": 0.10, "inv_drawdown": 0.10, "low_vol": 0.10, "fundamental": 0.15},
        ),
        "mega_quality_fund": weighted_rank_blend(
            factors,
            {
                "recovery": 0.20,
                "momentum_12_1": 0.20,
                "consistency": 0.15,
                "inv_drawdown": 0.15,
                "low_vol": 0.10,
                "dip_21": 0.05,
                "value_proxy": 0.05,
                "fundamental": 0.10,
            },
        ),
        "mega_filter_fund_50": apply_filter_threshold(
            weighted_rank_blend(
                factors,
                {
                    "recovery": 0.25,
                    "momentum_12_1": 0.20,
                    "consistency": 0.10,
                    "inv_drawdown": 0.10,
                    "low_vol": 0.10,
                    "value_proxy": 0.10,
                    "fundamental": 0.15,
                },
            ),
            xsec_rank(fundamental_score),
            min_rank=0.50,
        ),
        "trend_rm_fund": apply_filter_threshold(
            weighted_rank_blend(factors, {"recovery": 0.35, "momentum_12_1": 0.35, "fundamental": 0.15, "low_vol": 0.15}),
            price_factors["uptrend"],
            min_rank=0.50,
        ),
    }
    return candidates


def run_combo_backtests(
    close: pd.DataFrame,
    fundamental_score: pd.DataFrame,
    top_n: int = 10,
    transaction_cost: float = 0.001,
) -> tuple[pd.DataFrame, pd.DataFrame]:
    benchmark_col = "SPY" if "SPY" in close.columns else None
    stock_close = close.drop(columns=[benchmark_col], errors="ignore").dropna(axis=1, how="all")
    fund = fundamental_score.reindex(index=stock_close.index, columns=stock_close.columns)

    price_factors = build_price_factor_pack(stock_close)
    equities: dict[str, pd.Series] = {}

    baseline = RecoveryMomentumStrategy(top_n=top_n)
    baseline_weights = baseline.generate_signals(stock_close)
    equities["Recovery+Mom Top10"] = _backtest_from_weights(stock_close, baseline_weights, transaction_cost=transaction_cost)

    for name, score in _candidate_scores(price_factors, fund).items():
        weights = _monthly_score_weights(score.reindex(index=stock_close.index, columns=stock_close.columns), top_n=top_n)
        equities[name] = _backtest_from_weights(stock_close, weights, transaction_cost=transaction_cost)

    if benchmark_col is not None:
        spy = close[benchmark_col].dropna()
        equities["SPY"] = (spy / spy.iloc[0]) * 10_000.0

    yearly = pd.DataFrame({name: _equity_to_yearly_returns(eq) for name, eq in equities.items()}).sort_index()
    baseline_yearly = yearly["Recovery+Mom Top10"]

    summary_rows = []
    for name, equity in equities.items():
        row = {
            "strategy": name,
            "CAGR": _cagr(equity),
            "MaxDD": _max_dd(equity),
            "TotalRet": equity.dropna().iloc[-1] / equity.dropna().iloc[0] - 1.0,
            "AvgAnnual": yearly[name].mean(),
            "MedianAnnual": yearly[name].median(),
            "YearsBeatRecovery": int(yearly[name].gt(baseline_yearly).sum()) if name != "Recovery+Mom Top10" else np.nan,
        }
        row.update({f"Win{window}Y": summarize_equity_window(equity / equity.dropna().iloc[0], name, window)["CAGR"] for window in (1, 3, 5, 10)})
        summary_rows.append(row)

    summary = pd.DataFrame(summary_rows).sort_values("AvgAnnual", ascending=False).reset_index(drop=True)
    return yearly, summary


def load_default_inputs(data_dir: str = "data") -> tuple[pd.DataFrame, pd.DataFrame]:
    close = pd.read_csv(f"{data_dir}/us.csv", index_col=0, parse_dates=True).sort_index()
    stock_close = close.drop(columns=["SPY"], errors="ignore")
    fundamental_score = build_exploratory_fundamental_score(stock_close, data_dir=data_dir)
    return close, fundamental_score


def main() -> None:
    close, fundamental_score = load_default_inputs()
    yearly, summary = run_combo_backtests(close, fundamental_score, top_n=10)
    yearly.to_csv("data/us_factor_combo_yearly.csv")
    summary.to_csv("data/us_factor_combo_summary.csv", index=False)

    print("=== Yearly Returns ===")
    print((yearly * 100.0).round(2).to_string())
    print("\n=== Summary ===")
    display_cols = ["strategy", "AvgAnnual", "MedianAnnual", "CAGR", "MaxDD", "YearsBeatRecovery", "Win1Y", "Win3Y", "Win5Y", "Win10Y"]
    print((summary[display_cols].assign(
        AvgAnnual=lambda df: df["AvgAnnual"] * 100.0,
        MedianAnnual=lambda df: df["MedianAnnual"] * 100.0,
        CAGR=lambda df: df["CAGR"] * 100.0,
        MaxDD=lambda df: df["MaxDD"] * 100.0,
        Win1Y=lambda df: df["Win1Y"] * 100.0,
        Win3Y=lambda df: df["Win3Y"] * 100.0,
        Win5Y=lambda df: df["Win5Y"] * 100.0,
        Win10Y=lambda df: df["Win10Y"] * 100.0,
    ).round(2)).to_string(index=False))


if __name__ == "__main__":
    main()