Files
quant/strategies/ls_momentum.py
Gahow Wang d086930ab3 feat: add new trading strategies
Add 12 strategy modules including adaptive blend, composite alpha,
cross-asset momentum, ensemble alpha, trend rider v5/v6, and more.
2026-05-14 12:54:05 +08:00

252 lines
9.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Industry-neutral long/short momentum on the S&P 500.
Strategy
--------
At each rebalance date (default: monthly):
1. Compute 12-1 month momentum for every stock in the panel.
2. Group stocks by GICS sector.
3. Within each sector, rank by momentum.
4. Long the top `long_pct` (default 20%) of each sector.
5. Short the bottom `short_pct` (default 20%) of each sector.
6. Equal-weight within long-leg and short-leg, scaled so gross long = 1.0
and gross short = 1.0 → 200% gross exposure, ~0 net (β ≈ 0).
The β-neutrality comes from sector-level matching: each sector contributes
both long and short positions in equal $-amounts, so sector and (mostly)
market exposures cancel out.
Output
------
A weights DataFrame with positive (long) and negative (short) entries.
PIT-safe via terminal `.shift(1)`.
Costs
-----
Realistic backtest of L/S requires three additional costs not present in
long-only:
* borrow fee on the short leg (handled by the eval script, not here)
* higher slippage per turnover (this strategy churns more than V5)
* dividend payment on shorts (small for SP500 ~ 1.5% × |short_w|)
The strategy reports raw weights; the eval script applies costs.
"""
from __future__ import annotations
import os
import urllib.request
import io
import json
import numpy as np
import pandas as pd
from strategies.base import Strategy
SECTOR_CACHE = "data/us_sectors.csv"
WIKIPEDIA_SP500_URL = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
def fetch_sp500_sectors(force: bool = False) -> pd.DataFrame:
"""Return a DataFrame indexed by ticker with GICS sector / sub-industry.
Cached at data/us_sectors.csv. Wikipedia is the canonical source for
current S&P 500 sector membership; for backtest purposes we use today's
sector — sector membership is stable enough year-over-year that this
introduces minimal lookahead bias for an industry-neutral strategy.
"""
if not force and os.path.exists(SECTOR_CACHE):
df = pd.read_csv(SECTOR_CACHE, index_col=0)
if "GICS Sector" in df.columns and len(df) > 100:
return df
print("--- Fetching S&P 500 GICS sectors from Wikipedia ---")
headers = {"User-Agent": "Mozilla/5.0 (quant-backtest)"}
req = urllib.request.Request(WIKIPEDIA_SP500_URL, headers=headers)
with urllib.request.urlopen(req) as resp:
html = resp.read().decode("utf-8")
tables = pd.read_html(io.StringIO(html))
df = tables[0]
df = df.rename(columns={"Symbol": "ticker"})
df["ticker"] = df["ticker"].str.replace(".", "-", regex=False)
df = df.set_index("ticker")
keep = [c for c in df.columns if c in ("GICS Sector", "GICS Sub-Industry",
"Security")]
df = df[keep]
os.makedirs(os.path.dirname(SECTOR_CACHE), exist_ok=True)
df.to_csv(SECTOR_CACHE)
print(f"--- Cached {len(df)} sector mappings to {SECTOR_CACHE} ---")
return df
def _signal_mom_12_1(prices: pd.DataFrame) -> pd.DataFrame:
"""12-1 month cross-sectional momentum (highest = long)."""
return prices.shift(21).pct_change(231)
def _signal_reversal_1m(prices: pd.DataFrame) -> pd.DataFrame:
"""1-month reversal: highest 21-day return → SHORT (so we negate)."""
return -prices.pct_change(21)
def _signal_reversal_5d(prices: pd.DataFrame) -> pd.DataFrame:
"""Short-term 5-day reversal."""
return -prices.pct_change(5)
def _signal_recovery_63(prices: pd.DataFrame) -> pd.DataFrame:
"""Recovery factor: price / 63d low (V-shape continuation, long-only-friendly)."""
return prices / prices.rolling(63, min_periods=63).min() - 1
def _signal_low_vol(prices: pd.DataFrame) -> pd.DataFrame:
"""Low-vol: invert 60-day realized vol so low vol → high signal."""
rets = prices.pct_change(fill_method=None)
vol = rets.rolling(60, min_periods=40).std() * np.sqrt(252)
return -vol
def _signal_quality_mom(prices: pd.DataFrame) -> pd.DataFrame:
"""Composite: 12-1 mom + consistency (% positive days over 252d) + low-vol.
Combines a positive long-side selection (mom × consistency) and avoids the
fragile far-tail of pure momentum by inverse-vol weighting.
"""
mom = prices.shift(21).pct_change(231)
rets = prices.pct_change(fill_method=None)
pos_days = (rets > 0).rolling(252, min_periods=126).mean()
vol = rets.rolling(60, min_periods=40).std() * np.sqrt(252)
mom_r = mom.rank(axis=1, pct=True, na_option="keep")
cons_r = pos_days.rank(axis=1, pct=True, na_option="keep")
inv_vol_r = (-vol).rank(axis=1, pct=True, na_option="keep")
return 0.4 * mom_r + 0.3 * cons_r + 0.3 * inv_vol_r
def _signal_mom_x_lowvol(prices: pd.DataFrame) -> pd.DataFrame:
"""Momentum filtered by low-vol — long winners, short LOW-vol losers.
Reduces meme-stock blowups on the short leg by avoiding high-vol losers.
"""
mom = prices.shift(21).pct_change(231)
rets = prices.pct_change(fill_method=None)
vol = rets.rolling(60, min_periods=40).std() * np.sqrt(252)
mom_r = mom.rank(axis=1, pct=True, na_option="keep")
inv_vol_r = (-vol).rank(axis=1, pct=True, na_option="keep")
return 0.5 * mom_r + 0.5 * inv_vol_r
SIGNAL_REGISTRY = {
"mom_12_1": _signal_mom_12_1,
"reversal_1m": _signal_reversal_1m,
"reversal_5d": _signal_reversal_5d,
"recovery_63": _signal_recovery_63,
"low_vol": _signal_low_vol,
"quality_mom": _signal_quality_mom,
"mom_x_lowvol": _signal_mom_x_lowvol,
}
class IndustryNeutralLSMomentum(Strategy):
"""Industry-neutral long/short portfolio with selectable signal."""
def __init__(
self,
rebal_freq: int = 21,
mom_lookback: int = 252,
mom_skip: int = 21,
long_pct: float = 0.20,
short_pct: float = 0.20,
min_sector_size: int = 5,
sector_map: pd.Series | None = None,
gross_long: float = 1.0,
gross_short: float = 1.0,
signal_name: str = "mom_12_1",
) -> None:
self.rebal_freq = rebal_freq
self.mom_lookback = mom_lookback
self.mom_skip = mom_skip
self.long_pct = long_pct
self.short_pct = short_pct
self.min_sector_size = min_sector_size
self.sector_map = sector_map
self.gross_long = gross_long
self.gross_short = gross_short
if signal_name not in SIGNAL_REGISTRY:
raise ValueError(f"Unknown signal: {signal_name}")
self.signal_name = signal_name
self.signal_func = SIGNAL_REGISTRY[signal_name]
def _resolve_sector_map(self, columns: list[str]) -> pd.Series:
if self.sector_map is not None:
return self.sector_map.reindex(columns)
df = fetch_sp500_sectors()
s = df["GICS Sector"]
return s.reindex(columns)
def generate_signals(self, data: pd.DataFrame) -> pd.DataFrame:
cols = list(data.columns)
sectors = self._resolve_sector_map(cols)
mom = self.signal_func(data)
weights = pd.DataFrame(0.0, index=data.index, columns=cols)
warmup = self.mom_lookback + 5
# Pre-compute which rows are rebal days
rebal_idx = list(range(warmup, len(data), self.rebal_freq))
rebal_set = set(rebal_idx)
# Group columns by sector
sector_to_cols: dict[str, list[str]] = {}
for c in cols:
s = sectors.get(c)
if pd.isna(s):
continue
sector_to_cols.setdefault(s, []).append(c)
for t in rebal_idx:
row_mom = mom.iloc[t]
longs: dict[str, float] = {}
shorts: dict[str, float] = {}
for sector, members in sector_to_cols.items():
ms = row_mom.reindex(members).dropna()
if len(ms) < self.min_sector_size:
continue
n_long = max(1, int(round(len(ms) * self.long_pct)))
n_short = max(1, int(round(len(ms) * self.short_pct)))
ranked = ms.sort_values(ascending=False)
long_picks = ranked.head(n_long).index
short_picks = ranked.tail(n_short).index
for sym in long_picks:
longs[sym] = longs.get(sym, 0.0) + 1.0
for sym in short_picks:
shorts[sym] = shorts.get(sym, 0.0) - 1.0
if not longs or not shorts:
continue
# Equal-weight within long leg and short leg
n_l = sum(longs.values())
n_s = -sum(shorts.values())
for sym in longs:
longs[sym] = self.gross_long * longs[sym] / n_l
for sym in shorts:
shorts[sym] = self.gross_short * shorts[sym] / n_s
for sym, w in longs.items():
weights.iat[t, cols.index(sym)] = w
for sym, w in shorts.items():
weights.iat[t, cols.index(sym)] = w
# Forward-fill between rebal dates
non_rebal_mask = pd.Series(True, index=data.index)
for i in rebal_idx:
non_rebal_mask.iat[i] = False
weights[non_rebal_mask.values] = np.nan
weights = weights.ffill().fillna(0.0)
weights.iloc[:warmup] = 0.0
return weights.shift(1).fillna(0.0)
__all__ = ["IndustryNeutralLSMomentum", "fetch_sp500_sectors"]