"""Industry-neutral long/short momentum on the S&P 500. Strategy -------- At each rebalance date (default: monthly): 1. Compute 12-1 month momentum for every stock in the panel. 2. Group stocks by GICS sector. 3. Within each sector, rank by momentum. 4. Long the top `long_pct` (default 20%) of each sector. 5. Short the bottom `short_pct` (default 20%) of each sector. 6. Equal-weight within long-leg and short-leg, scaled so gross long = 1.0 and gross short = 1.0 → 200% gross exposure, ~0 net (β ≈ 0). The β-neutrality comes from sector-level matching: each sector contributes both long and short positions in equal $-amounts, so sector and (mostly) market exposures cancel out. Output ------ A weights DataFrame with positive (long) and negative (short) entries. PIT-safe via terminal `.shift(1)`. Costs ----- Realistic backtest of L/S requires three additional costs not present in long-only: * borrow fee on the short leg (handled by the eval script, not here) * higher slippage per turnover (this strategy churns more than V5) * dividend payment on shorts (small for SP500 ~ 1.5% × |short_w|) The strategy reports raw weights; the eval script applies costs. """ from __future__ import annotations import os import urllib.request import io import json import numpy as np import pandas as pd from strategies.base import Strategy SECTOR_CACHE = "data/us_sectors.csv" WIKIPEDIA_SP500_URL = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies" def fetch_sp500_sectors(force: bool = False) -> pd.DataFrame: """Return a DataFrame indexed by ticker with GICS sector / sub-industry. Cached at data/us_sectors.csv. Wikipedia is the canonical source for current S&P 500 sector membership; for backtest purposes we use today's sector — sector membership is stable enough year-over-year that this introduces minimal lookahead bias for an industry-neutral strategy. """ if not force and os.path.exists(SECTOR_CACHE): df = pd.read_csv(SECTOR_CACHE, index_col=0) if "GICS Sector" in df.columns and len(df) > 100: return df print("--- Fetching S&P 500 GICS sectors from Wikipedia ---") headers = {"User-Agent": "Mozilla/5.0 (quant-backtest)"} req = urllib.request.Request(WIKIPEDIA_SP500_URL, headers=headers) with urllib.request.urlopen(req) as resp: html = resp.read().decode("utf-8") tables = pd.read_html(io.StringIO(html)) df = tables[0] df = df.rename(columns={"Symbol": "ticker"}) df["ticker"] = df["ticker"].str.replace(".", "-", regex=False) df = df.set_index("ticker") keep = [c for c in df.columns if c in ("GICS Sector", "GICS Sub-Industry", "Security")] df = df[keep] os.makedirs(os.path.dirname(SECTOR_CACHE), exist_ok=True) df.to_csv(SECTOR_CACHE) print(f"--- Cached {len(df)} sector mappings to {SECTOR_CACHE} ---") return df def _signal_mom_12_1(prices: pd.DataFrame) -> pd.DataFrame: """12-1 month cross-sectional momentum (highest = long).""" return prices.shift(21).pct_change(231) def _signal_reversal_1m(prices: pd.DataFrame) -> pd.DataFrame: """1-month reversal: highest 21-day return → SHORT (so we negate).""" return -prices.pct_change(21) def _signal_reversal_5d(prices: pd.DataFrame) -> pd.DataFrame: """Short-term 5-day reversal.""" return -prices.pct_change(5) def _signal_recovery_63(prices: pd.DataFrame) -> pd.DataFrame: """Recovery factor: price / 63d low (V-shape continuation, long-only-friendly).""" return prices / prices.rolling(63, min_periods=63).min() - 1 def _signal_low_vol(prices: pd.DataFrame) -> pd.DataFrame: """Low-vol: invert 60-day realized vol so low vol → high signal.""" rets = prices.pct_change(fill_method=None) vol = rets.rolling(60, min_periods=40).std() * np.sqrt(252) return -vol def _signal_quality_mom(prices: pd.DataFrame) -> pd.DataFrame: """Composite: 12-1 mom + consistency (% positive days over 252d) + low-vol. Combines a positive long-side selection (mom × consistency) and avoids the fragile far-tail of pure momentum by inverse-vol weighting. """ mom = prices.shift(21).pct_change(231) rets = prices.pct_change(fill_method=None) pos_days = (rets > 0).rolling(252, min_periods=126).mean() vol = rets.rolling(60, min_periods=40).std() * np.sqrt(252) mom_r = mom.rank(axis=1, pct=True, na_option="keep") cons_r = pos_days.rank(axis=1, pct=True, na_option="keep") inv_vol_r = (-vol).rank(axis=1, pct=True, na_option="keep") return 0.4 * mom_r + 0.3 * cons_r + 0.3 * inv_vol_r def _signal_mom_x_lowvol(prices: pd.DataFrame) -> pd.DataFrame: """Momentum filtered by low-vol — long winners, short LOW-vol losers. Reduces meme-stock blowups on the short leg by avoiding high-vol losers. """ mom = prices.shift(21).pct_change(231) rets = prices.pct_change(fill_method=None) vol = rets.rolling(60, min_periods=40).std() * np.sqrt(252) mom_r = mom.rank(axis=1, pct=True, na_option="keep") inv_vol_r = (-vol).rank(axis=1, pct=True, na_option="keep") return 0.5 * mom_r + 0.5 * inv_vol_r SIGNAL_REGISTRY = { "mom_12_1": _signal_mom_12_1, "reversal_1m": _signal_reversal_1m, "reversal_5d": _signal_reversal_5d, "recovery_63": _signal_recovery_63, "low_vol": _signal_low_vol, "quality_mom": _signal_quality_mom, "mom_x_lowvol": _signal_mom_x_lowvol, } class IndustryNeutralLSMomentum(Strategy): """Industry-neutral long/short portfolio with selectable signal.""" def __init__( self, rebal_freq: int = 21, mom_lookback: int = 252, mom_skip: int = 21, long_pct: float = 0.20, short_pct: float = 0.20, min_sector_size: int = 5, sector_map: pd.Series | None = None, gross_long: float = 1.0, gross_short: float = 1.0, signal_name: str = "mom_12_1", ) -> None: self.rebal_freq = rebal_freq self.mom_lookback = mom_lookback self.mom_skip = mom_skip self.long_pct = long_pct self.short_pct = short_pct self.min_sector_size = min_sector_size self.sector_map = sector_map self.gross_long = gross_long self.gross_short = gross_short if signal_name not in SIGNAL_REGISTRY: raise ValueError(f"Unknown signal: {signal_name}") self.signal_name = signal_name self.signal_func = SIGNAL_REGISTRY[signal_name] def _resolve_sector_map(self, columns: list[str]) -> pd.Series: if self.sector_map is not None: return self.sector_map.reindex(columns) df = fetch_sp500_sectors() s = df["GICS Sector"] return s.reindex(columns) def generate_signals(self, data: pd.DataFrame) -> pd.DataFrame: cols = list(data.columns) sectors = self._resolve_sector_map(cols) mom = self.signal_func(data) weights = pd.DataFrame(0.0, index=data.index, columns=cols) warmup = self.mom_lookback + 5 # Pre-compute which rows are rebal days rebal_idx = list(range(warmup, len(data), self.rebal_freq)) rebal_set = set(rebal_idx) # Group columns by sector sector_to_cols: dict[str, list[str]] = {} for c in cols: s = sectors.get(c) if pd.isna(s): continue sector_to_cols.setdefault(s, []).append(c) for t in rebal_idx: row_mom = mom.iloc[t] longs: dict[str, float] = {} shorts: dict[str, float] = {} for sector, members in sector_to_cols.items(): ms = row_mom.reindex(members).dropna() if len(ms) < self.min_sector_size: continue n_long = max(1, int(round(len(ms) * self.long_pct))) n_short = max(1, int(round(len(ms) * self.short_pct))) ranked = ms.sort_values(ascending=False) long_picks = ranked.head(n_long).index short_picks = ranked.tail(n_short).index for sym in long_picks: longs[sym] = longs.get(sym, 0.0) + 1.0 for sym in short_picks: shorts[sym] = shorts.get(sym, 0.0) - 1.0 if not longs or not shorts: continue # Equal-weight within long leg and short leg n_l = sum(longs.values()) n_s = -sum(shorts.values()) for sym in longs: longs[sym] = self.gross_long * longs[sym] / n_l for sym in shorts: shorts[sym] = self.gross_short * shorts[sym] / n_s for sym, w in longs.items(): weights.iat[t, cols.index(sym)] = w for sym, w in shorts.items(): weights.iat[t, cols.index(sym)] = w # Forward-fill between rebal dates non_rebal_mask = pd.Series(True, index=data.index) for i in rebal_idx: non_rebal_mask.iat[i] = False weights[non_rebal_mask.values] = np.nan weights = weights.ffill().fillna(0.0) weights.iloc[:warmup] = 0.0 return weights.shift(1).fillna(0.0) __all__ = ["IndustryNeutralLSMomentum", "fetch_sp500_sectors"]