Add 12 strategy modules including adaptive blend, composite alpha, cross-asset momentum, ensemble alpha, trend rider v5/v6, and more.
252 lines
9.1 KiB
Python
252 lines
9.1 KiB
Python
"""Industry-neutral long/short momentum on the S&P 500.
|
||
|
||
Strategy
|
||
--------
|
||
At each rebalance date (default: monthly):
|
||
1. Compute 12-1 month momentum for every stock in the panel.
|
||
2. Group stocks by GICS sector.
|
||
3. Within each sector, rank by momentum.
|
||
4. Long the top `long_pct` (default 20%) of each sector.
|
||
5. Short the bottom `short_pct` (default 20%) of each sector.
|
||
6. Equal-weight within long-leg and short-leg, scaled so gross long = 1.0
|
||
and gross short = 1.0 → 200% gross exposure, ~0 net (β ≈ 0).
|
||
|
||
The β-neutrality comes from sector-level matching: each sector contributes
|
||
both long and short positions in equal $-amounts, so sector and (mostly)
|
||
market exposures cancel out.
|
||
|
||
Output
|
||
------
|
||
A weights DataFrame with positive (long) and negative (short) entries.
|
||
PIT-safe via terminal `.shift(1)`.
|
||
|
||
Costs
|
||
-----
|
||
Realistic backtest of L/S requires three additional costs not present in
|
||
long-only:
|
||
* borrow fee on the short leg (handled by the eval script, not here)
|
||
* higher slippage per turnover (this strategy churns more than V5)
|
||
* dividend payment on shorts (small for SP500 ~ 1.5% × |short_w|)
|
||
The strategy reports raw weights; the eval script applies costs.
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import os
|
||
import urllib.request
|
||
import io
|
||
import json
|
||
|
||
import numpy as np
|
||
import pandas as pd
|
||
|
||
from strategies.base import Strategy
|
||
|
||
|
||
SECTOR_CACHE = "data/us_sectors.csv"
|
||
WIKIPEDIA_SP500_URL = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
|
||
|
||
|
||
def fetch_sp500_sectors(force: bool = False) -> pd.DataFrame:
|
||
"""Return a DataFrame indexed by ticker with GICS sector / sub-industry.
|
||
|
||
Cached at data/us_sectors.csv. Wikipedia is the canonical source for
|
||
current S&P 500 sector membership; for backtest purposes we use today's
|
||
sector — sector membership is stable enough year-over-year that this
|
||
introduces minimal lookahead bias for an industry-neutral strategy.
|
||
"""
|
||
if not force and os.path.exists(SECTOR_CACHE):
|
||
df = pd.read_csv(SECTOR_CACHE, index_col=0)
|
||
if "GICS Sector" in df.columns and len(df) > 100:
|
||
return df
|
||
|
||
print("--- Fetching S&P 500 GICS sectors from Wikipedia ---")
|
||
headers = {"User-Agent": "Mozilla/5.0 (quant-backtest)"}
|
||
req = urllib.request.Request(WIKIPEDIA_SP500_URL, headers=headers)
|
||
with urllib.request.urlopen(req) as resp:
|
||
html = resp.read().decode("utf-8")
|
||
tables = pd.read_html(io.StringIO(html))
|
||
df = tables[0]
|
||
df = df.rename(columns={"Symbol": "ticker"})
|
||
df["ticker"] = df["ticker"].str.replace(".", "-", regex=False)
|
||
df = df.set_index("ticker")
|
||
keep = [c for c in df.columns if c in ("GICS Sector", "GICS Sub-Industry",
|
||
"Security")]
|
||
df = df[keep]
|
||
os.makedirs(os.path.dirname(SECTOR_CACHE), exist_ok=True)
|
||
df.to_csv(SECTOR_CACHE)
|
||
print(f"--- Cached {len(df)} sector mappings to {SECTOR_CACHE} ---")
|
||
return df
|
||
|
||
|
||
def _signal_mom_12_1(prices: pd.DataFrame) -> pd.DataFrame:
|
||
"""12-1 month cross-sectional momentum (highest = long)."""
|
||
return prices.shift(21).pct_change(231)
|
||
|
||
|
||
def _signal_reversal_1m(prices: pd.DataFrame) -> pd.DataFrame:
|
||
"""1-month reversal: highest 21-day return → SHORT (so we negate)."""
|
||
return -prices.pct_change(21)
|
||
|
||
|
||
def _signal_reversal_5d(prices: pd.DataFrame) -> pd.DataFrame:
|
||
"""Short-term 5-day reversal."""
|
||
return -prices.pct_change(5)
|
||
|
||
|
||
def _signal_recovery_63(prices: pd.DataFrame) -> pd.DataFrame:
|
||
"""Recovery factor: price / 63d low (V-shape continuation, long-only-friendly)."""
|
||
return prices / prices.rolling(63, min_periods=63).min() - 1
|
||
|
||
|
||
def _signal_low_vol(prices: pd.DataFrame) -> pd.DataFrame:
|
||
"""Low-vol: invert 60-day realized vol so low vol → high signal."""
|
||
rets = prices.pct_change(fill_method=None)
|
||
vol = rets.rolling(60, min_periods=40).std() * np.sqrt(252)
|
||
return -vol
|
||
|
||
|
||
def _signal_quality_mom(prices: pd.DataFrame) -> pd.DataFrame:
|
||
"""Composite: 12-1 mom + consistency (% positive days over 252d) + low-vol.
|
||
|
||
Combines a positive long-side selection (mom × consistency) and avoids the
|
||
fragile far-tail of pure momentum by inverse-vol weighting.
|
||
"""
|
||
mom = prices.shift(21).pct_change(231)
|
||
rets = prices.pct_change(fill_method=None)
|
||
pos_days = (rets > 0).rolling(252, min_periods=126).mean()
|
||
vol = rets.rolling(60, min_periods=40).std() * np.sqrt(252)
|
||
|
||
mom_r = mom.rank(axis=1, pct=True, na_option="keep")
|
||
cons_r = pos_days.rank(axis=1, pct=True, na_option="keep")
|
||
inv_vol_r = (-vol).rank(axis=1, pct=True, na_option="keep")
|
||
return 0.4 * mom_r + 0.3 * cons_r + 0.3 * inv_vol_r
|
||
|
||
|
||
def _signal_mom_x_lowvol(prices: pd.DataFrame) -> pd.DataFrame:
|
||
"""Momentum filtered by low-vol — long winners, short LOW-vol losers.
|
||
|
||
Reduces meme-stock blowups on the short leg by avoiding high-vol losers.
|
||
"""
|
||
mom = prices.shift(21).pct_change(231)
|
||
rets = prices.pct_change(fill_method=None)
|
||
vol = rets.rolling(60, min_periods=40).std() * np.sqrt(252)
|
||
mom_r = mom.rank(axis=1, pct=True, na_option="keep")
|
||
inv_vol_r = (-vol).rank(axis=1, pct=True, na_option="keep")
|
||
return 0.5 * mom_r + 0.5 * inv_vol_r
|
||
|
||
|
||
SIGNAL_REGISTRY = {
|
||
"mom_12_1": _signal_mom_12_1,
|
||
"reversal_1m": _signal_reversal_1m,
|
||
"reversal_5d": _signal_reversal_5d,
|
||
"recovery_63": _signal_recovery_63,
|
||
"low_vol": _signal_low_vol,
|
||
"quality_mom": _signal_quality_mom,
|
||
"mom_x_lowvol": _signal_mom_x_lowvol,
|
||
}
|
||
|
||
|
||
class IndustryNeutralLSMomentum(Strategy):
|
||
"""Industry-neutral long/short portfolio with selectable signal."""
|
||
|
||
def __init__(
|
||
self,
|
||
rebal_freq: int = 21,
|
||
mom_lookback: int = 252,
|
||
mom_skip: int = 21,
|
||
long_pct: float = 0.20,
|
||
short_pct: float = 0.20,
|
||
min_sector_size: int = 5,
|
||
sector_map: pd.Series | None = None,
|
||
gross_long: float = 1.0,
|
||
gross_short: float = 1.0,
|
||
signal_name: str = "mom_12_1",
|
||
) -> None:
|
||
self.rebal_freq = rebal_freq
|
||
self.mom_lookback = mom_lookback
|
||
self.mom_skip = mom_skip
|
||
self.long_pct = long_pct
|
||
self.short_pct = short_pct
|
||
self.min_sector_size = min_sector_size
|
||
self.sector_map = sector_map
|
||
self.gross_long = gross_long
|
||
self.gross_short = gross_short
|
||
if signal_name not in SIGNAL_REGISTRY:
|
||
raise ValueError(f"Unknown signal: {signal_name}")
|
||
self.signal_name = signal_name
|
||
self.signal_func = SIGNAL_REGISTRY[signal_name]
|
||
|
||
def _resolve_sector_map(self, columns: list[str]) -> pd.Series:
|
||
if self.sector_map is not None:
|
||
return self.sector_map.reindex(columns)
|
||
df = fetch_sp500_sectors()
|
||
s = df["GICS Sector"]
|
||
return s.reindex(columns)
|
||
|
||
def generate_signals(self, data: pd.DataFrame) -> pd.DataFrame:
|
||
cols = list(data.columns)
|
||
sectors = self._resolve_sector_map(cols)
|
||
mom = self.signal_func(data)
|
||
|
||
weights = pd.DataFrame(0.0, index=data.index, columns=cols)
|
||
warmup = self.mom_lookback + 5
|
||
|
||
# Pre-compute which rows are rebal days
|
||
rebal_idx = list(range(warmup, len(data), self.rebal_freq))
|
||
rebal_set = set(rebal_idx)
|
||
|
||
# Group columns by sector
|
||
sector_to_cols: dict[str, list[str]] = {}
|
||
for c in cols:
|
||
s = sectors.get(c)
|
||
if pd.isna(s):
|
||
continue
|
||
sector_to_cols.setdefault(s, []).append(c)
|
||
|
||
for t in rebal_idx:
|
||
row_mom = mom.iloc[t]
|
||
longs: dict[str, float] = {}
|
||
shorts: dict[str, float] = {}
|
||
|
||
for sector, members in sector_to_cols.items():
|
||
ms = row_mom.reindex(members).dropna()
|
||
if len(ms) < self.min_sector_size:
|
||
continue
|
||
n_long = max(1, int(round(len(ms) * self.long_pct)))
|
||
n_short = max(1, int(round(len(ms) * self.short_pct)))
|
||
ranked = ms.sort_values(ascending=False)
|
||
long_picks = ranked.head(n_long).index
|
||
short_picks = ranked.tail(n_short).index
|
||
for sym in long_picks:
|
||
longs[sym] = longs.get(sym, 0.0) + 1.0
|
||
for sym in short_picks:
|
||
shorts[sym] = shorts.get(sym, 0.0) - 1.0
|
||
|
||
if not longs or not shorts:
|
||
continue
|
||
# Equal-weight within long leg and short leg
|
||
n_l = sum(longs.values())
|
||
n_s = -sum(shorts.values())
|
||
for sym in longs:
|
||
longs[sym] = self.gross_long * longs[sym] / n_l
|
||
for sym in shorts:
|
||
shorts[sym] = self.gross_short * shorts[sym] / n_s
|
||
|
||
for sym, w in longs.items():
|
||
weights.iat[t, cols.index(sym)] = w
|
||
for sym, w in shorts.items():
|
||
weights.iat[t, cols.index(sym)] = w
|
||
|
||
# Forward-fill between rebal dates
|
||
non_rebal_mask = pd.Series(True, index=data.index)
|
||
for i in rebal_idx:
|
||
non_rebal_mask.iat[i] = False
|
||
weights[non_rebal_mask.values] = np.nan
|
||
weights = weights.ffill().fillna(0.0)
|
||
weights.iloc[:warmup] = 0.0
|
||
|
||
return weights.shift(1).fillna(0.0)
|
||
|
||
|
||
__all__ = ["IndustryNeutralLSMomentum", "fetch_sp500_sectors"]
|