157 lines
5.6 KiB
Python
157 lines
5.6 KiB
Python
import numpy as np
|
|
import pandas as pd
|
|
|
|
import data_manager
|
|
import universe_history as uh
|
|
from research.event_factors import breakout_after_compression_score
|
|
from research.regime_filters import build_regime_filter
|
|
from research.us_alpha_report import summarize_equity_window
|
|
from research.us_universe import build_tradable_mask
|
|
|
|
|
|
MIN_PRICE = 5.0
|
|
MIN_DOLLAR_VOLUME = 20_000_000.0
|
|
MIN_HISTORY_DAYS = 252
|
|
MIN_VALID_VOLUME_DAYS = 40
|
|
LIQUIDITY_WINDOW = 60
|
|
|
|
TREND_WINDOW = 126
|
|
RECOVERY_WINDOW = 63
|
|
HIGH_PROX_WINDOW = 126
|
|
ETF_TICKERS = ["SPY", "QQQ", "IWM", "MDY", "XLK", "XLF", "XLI", "XLV"]
|
|
|
|
|
|
def _price_rank_blend_score(close: pd.DataFrame) -> pd.DataFrame:
|
|
"""Simple price-only cross-sectional blend, shifted for next-day trading."""
|
|
trend = close.pct_change(TREND_WINDOW, fill_method=None)
|
|
recovery = close / close.rolling(RECOVERY_WINDOW, min_periods=RECOVERY_WINDOW).min() - 1
|
|
high_proximity = close / close.rolling(HIGH_PROX_WINDOW, min_periods=HIGH_PROX_WINDOW).max().replace(0, np.nan)
|
|
|
|
trend_rank = trend.rank(axis=1, pct=True, na_option="keep")
|
|
recovery_rank = recovery.rank(axis=1, pct=True, na_option="keep")
|
|
high_rank = high_proximity.rank(axis=1, pct=True, na_option="keep")
|
|
return ((trend_rank + recovery_rank + high_rank) / 3.0).shift(1)
|
|
|
|
|
|
def _build_equal_weight_portfolio(
|
|
score: pd.DataFrame,
|
|
tradable_mask: pd.DataFrame,
|
|
regime_filter: pd.Series,
|
|
top_n: int,
|
|
) -> pd.DataFrame:
|
|
"""Build equal-weight top-n long-only weights from aligned scores."""
|
|
aligned_score = score.reindex(index=tradable_mask.index, columns=tradable_mask.columns)
|
|
eligible_score = aligned_score.where(tradable_mask)
|
|
rank = eligible_score.rank(axis=1, ascending=False, na_option="bottom", method="first")
|
|
selected = (rank <= top_n) & eligible_score.notna()
|
|
selected = selected & regime_filter.reindex(tradable_mask.index, fill_value=False).to_numpy().reshape(-1, 1)
|
|
|
|
raw = selected.astype(float)
|
|
row_sums = raw.sum(axis=1).replace(0.0, np.nan)
|
|
return raw.div(row_sums, axis=0).fillna(0.0)
|
|
|
|
|
|
def _equity_curve(close: pd.DataFrame, weights: pd.DataFrame) -> pd.Series:
|
|
"""Convert daily weights into a simple close-to-close equity curve."""
|
|
returns = close.pct_change(fill_method=None).fillna(0.0)
|
|
portfolio_returns = (returns * weights).sum(axis=1)
|
|
return (1.0 + portfolio_returns).cumprod()
|
|
|
|
|
|
def _read_panel_csv(path: str) -> pd.DataFrame:
|
|
return pd.read_csv(path, index_col=0, parse_dates=True).sort_index()
|
|
|
|
|
|
def load_saved_pit_market_data(data_dir: str = "data", prefix: str = "us_pit") -> dict[str, pd.DataFrame]:
|
|
"""Load saved PIT OHLCV panels from disk."""
|
|
panels = {}
|
|
for field in ("close", "high", "low", "volume"):
|
|
panels[field] = _read_panel_csv(f"{data_dir}/{prefix}_{field}.csv")
|
|
return panels
|
|
|
|
|
|
def load_saved_etf_close(data_dir: str = "data", market: str = "us_etf") -> pd.DataFrame:
|
|
"""Load saved ETF closes or populate them on demand."""
|
|
path = f"{data_dir}/{market}.csv"
|
|
try:
|
|
return _read_panel_csv(path)
|
|
except FileNotFoundError:
|
|
original_data_dir = data_manager.DATA_DIR
|
|
try:
|
|
data_manager.DATA_DIR = data_dir
|
|
return data_manager.update_market_data(market, ETF_TICKERS, ["close"])["close"]
|
|
finally:
|
|
data_manager.DATA_DIR = original_data_dir
|
|
|
|
|
|
def run_alpha_pipeline(
|
|
market_data,
|
|
etf_close,
|
|
pit_membership=None,
|
|
windows=(1, 2, 3, 5, 10),
|
|
top_n=10,
|
|
) -> pd.DataFrame:
|
|
"""Run a lightweight strict US alpha pipeline and summarize trailing windows."""
|
|
close = market_data["close"].sort_index()
|
|
high = market_data["high"].reindex(index=close.index, columns=close.columns).sort_index()
|
|
low = market_data["low"].reindex(index=close.index, columns=close.columns).sort_index()
|
|
volume = market_data["volume"].reindex(index=close.index, columns=close.columns).sort_index()
|
|
|
|
tradable_mask = build_tradable_mask(
|
|
close=close,
|
|
volume=volume,
|
|
pit_membership=pit_membership,
|
|
min_price=MIN_PRICE,
|
|
min_dollar_volume=MIN_DOLLAR_VOLUME,
|
|
min_history_days=MIN_HISTORY_DAYS,
|
|
min_valid_volume_days=MIN_VALID_VOLUME_DAYS,
|
|
liquidity_window=LIQUIDITY_WINDOW,
|
|
)
|
|
regime_filter = build_regime_filter(etf_close).reindex(close.index, fill_value=False)
|
|
|
|
strategy_scores = {
|
|
"breakout_regime": breakout_after_compression_score(close, high, low, volume),
|
|
"rank_blend_regime": _price_rank_blend_score(close),
|
|
}
|
|
|
|
summary_rows = []
|
|
for strategy_name, score in strategy_scores.items():
|
|
weights = _build_equal_weight_portfolio(score, tradable_mask, regime_filter, top_n)
|
|
equity = _equity_curve(close, weights)
|
|
for window_years in windows:
|
|
summary_rows.append(summarize_equity_window(equity, strategy_name, window_years))
|
|
|
|
return pd.DataFrame(summary_rows)
|
|
|
|
|
|
def run_saved_pit_alpha_pipeline(
|
|
data_dir: str = "data",
|
|
windows=(1, 2, 3, 5, 10),
|
|
top_n: int = 10,
|
|
) -> pd.DataFrame:
|
|
"""Load saved PIT OHLCV inputs and run the strict alpha pipeline."""
|
|
market_data = load_saved_pit_market_data(data_dir=data_dir)
|
|
etf_close = load_saved_etf_close(data_dir=data_dir)
|
|
intervals = uh.load_sp500_history()
|
|
pit_membership = uh.membership_mask(
|
|
market_data["close"].index,
|
|
intervals=intervals,
|
|
tickers=list(market_data["close"].columns),
|
|
)
|
|
return run_alpha_pipeline(
|
|
market_data=market_data,
|
|
etf_close=etf_close,
|
|
pit_membership=pit_membership,
|
|
windows=windows,
|
|
top_n=top_n,
|
|
)
|
|
|
|
|
|
def main() -> None:
|
|
summary = run_saved_pit_alpha_pipeline()
|
|
print(summary.to_string(index=False))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|