quant/research/strategies_plus.py

"""
Optimization variants of RecoveryMomentumStrategy.

Four dimensions explored:
  1. Hyperparameters (top_n, recovery_window, mom_lookback, rebal_freq, weights)
  2. Regime filter: zero-out weights when SPY < MA200
  3. Weighting scheme: equal / inverse-vol / rank-weighted
  4. Ensemble: weighted blend of multiple strategies

All strategies follow the same Strategy protocol (generate_signals → weights DF).
"""

import numpy as np
import pandas as pd

from strategies.base import Strategy


# ---------------------------------------------------------------------------
# Generalized Recovery+Momentum strategy
# ---------------------------------------------------------------------------

class RecoveryMomentumPlus(Strategy):
    """
    Recovery + momentum composite with configurable blend, weighting, and
    regime filter hooks.

    Parameters
    ----------
    recovery_window : int
        Lookback for the recovery factor (price / rolling min - 1).
    mom_lookback : int
        Long-horizon momentum window total length.
    mom_skip : int
        Short-term reversal skip for momentum.
    rebal_freq : int
        Trading-day rebalance interval.
    top_n : int
        Number of stocks selected each rebalance.
    rec_weight : float in [0, 1]
        Weight of recovery factor in composite rank blend (mom_weight = 1 - rec_weight).
    weighting : {"equal", "inv_vol", "rank"}
        Portfolio weighting scheme for the selected top_n.
    vol_window : int
        Volatility lookback when weighting="inv_vol".
    """

    def __init__(self,
                 recovery_window: int = 63,
                 mom_lookback: int = 252,
                 mom_skip: int = 21,
                 rebal_freq: int = 21,
                 top_n: int = 10,
                 rec_weight: float = 0.5,
                 weighting: str = "equal",
                 vol_window: int = 60):
        if weighting not in ("equal", "inv_vol", "rank"):
            raise ValueError(f"weighting must be equal|inv_vol|rank, got {weighting!r}")
        self.recovery_window = recovery_window
        self.mom_lookback = mom_lookback
        self.mom_skip = mom_skip
        self.rebal_freq = rebal_freq
        self.top_n = top_n
        self.rec_weight = rec_weight
        self.weighting = weighting
        self.vol_window = vol_window

    def generate_signals(self, data: pd.DataFrame) -> pd.DataFrame:
        # Factors
        recovery = data / data.rolling(self.recovery_window).min() - 1
        momentum = data.shift(self.mom_skip).pct_change(self.mom_lookback - self.mom_skip)

        rec_rank = recovery.rank(axis=1, pct=True, na_option="keep")
        mom_rank = momentum.rank(axis=1, pct=True, na_option="keep")
        composite = self.rec_weight * rec_rank + (1 - self.rec_weight) * mom_rank

        # Top-N selection
        rank = composite.rank(axis=1, ascending=False, na_option="bottom")
        n_valid = composite.notna().sum(axis=1)
        enough = n_valid >= self.top_n
        top_mask = (rank <= self.top_n) & enough.values.reshape(-1, 1)

        # Weighting within top-N
        if self.weighting == "equal":
            raw = top_mask.astype(float)
        elif self.weighting == "rank":
            # Higher composite → higher weight within top-N
            ranked_score = composite.where(top_mask, 0.0)
            raw = ranked_score
        elif self.weighting == "inv_vol":
            # Use inverse realized-volatility as weights within top-N
            rets = data.pct_change()
            vol = rets.rolling(self.vol_window).std()
            inv_vol = 1.0 / vol.replace(0, np.nan)
            raw = inv_vol.where(top_mask, 0.0).fillna(0.0)

        row_sums = raw.sum(axis=1).replace(0, np.nan)
        signals = raw.div(row_sums, axis=0).fillna(0.0)

        # Rebalance
        warmup = max(self.mom_lookback, self.recovery_window, self.vol_window)
        rebal_mask = pd.Series(False, index=data.index)
        rebal_indices = list(range(warmup, len(data), self.rebal_freq))
        rebal_mask.iloc[rebal_indices] = True
        signals[~rebal_mask] = np.nan
        signals = signals.ffill().fillna(0.0)
        signals.iloc[:warmup] = 0.0

        return signals.shift(1).fillna(0.0)


# ---------------------------------------------------------------------------
# Ensemble
# ---------------------------------------------------------------------------

class EnsembleStrategy(Strategy):
    """
    Weighted blend of several sub-strategies. Each sub-strategy produces a
    weight matrix; we linearly combine them. The result still sums to (at
    most) 1 per row since each sub-strategy does.
    """

    def __init__(self, components: list[tuple[Strategy, float]]):
        total = sum(w for _, w in components)
        self.components = [(s, w / total) for s, w in components]

    def generate_signals(self, data: pd.DataFrame) -> pd.DataFrame:
        out = None
        for strat, w in self.components:
            sig = strat.generate_signals(data).mul(w)
            if out is None:
                out = sig
            else:
                # Align columns (should be identical since same data passed)
                out = out.add(sig, fill_value=0.0)
        return out


# ---------------------------------------------------------------------------
# Regime filter helper
# ---------------------------------------------------------------------------

def spy_ma200_filter(spy: pd.Series, ma_window: int = 200) -> pd.Series:
    """
    Boolean Series: True when SPY close > SPY MA(ma_window), shifted by 1 to
    avoid lookahead. Use as `regime_filter=...` in pit_backtest.backtest().
    """
    ma = spy.rolling(ma_window, min_periods=ma_window).mean()
    signal = (spy > ma).fillna(False)
    return signal.shift(1).fillna(False)