import numpy as np import pandas as pd import data_manager import universe_history as uh from research.event_factors import breakout_after_compression_score from research.regime_filters import build_regime_filter from research.us_alpha_report import summarize_equity_window from research.us_universe import build_tradable_mask MIN_PRICE = 5.0 MIN_DOLLAR_VOLUME = 20_000_000.0 MIN_HISTORY_DAYS = 252 MIN_VALID_VOLUME_DAYS = 40 LIQUIDITY_WINDOW = 60 TREND_WINDOW = 126 RECOVERY_WINDOW = 63 HIGH_PROX_WINDOW = 126 ETF_TICKERS = ["SPY", "QQQ", "IWM", "MDY", "XLK", "XLF", "XLI", "XLV"] def _price_rank_blend_score(close: pd.DataFrame) -> pd.DataFrame: """Simple price-only cross-sectional blend, shifted for next-day trading.""" trend = close.pct_change(TREND_WINDOW, fill_method=None) recovery = close / close.rolling(RECOVERY_WINDOW, min_periods=RECOVERY_WINDOW).min() - 1 high_proximity = close / close.rolling(HIGH_PROX_WINDOW, min_periods=HIGH_PROX_WINDOW).max().replace(0, np.nan) trend_rank = trend.rank(axis=1, pct=True, na_option="keep") recovery_rank = recovery.rank(axis=1, pct=True, na_option="keep") high_rank = high_proximity.rank(axis=1, pct=True, na_option="keep") return ((trend_rank + recovery_rank + high_rank) / 3.0).shift(1) def _build_equal_weight_portfolio( score: pd.DataFrame, tradable_mask: pd.DataFrame, regime_filter: pd.Series, top_n: int, ) -> pd.DataFrame: """Build equal-weight top-n long-only weights from aligned scores.""" aligned_score = score.reindex(index=tradable_mask.index, columns=tradable_mask.columns) eligible_score = aligned_score.where(tradable_mask) rank = eligible_score.rank(axis=1, ascending=False, na_option="bottom", method="first") selected = (rank <= top_n) & eligible_score.notna() selected = selected & regime_filter.reindex(tradable_mask.index, fill_value=False).to_numpy().reshape(-1, 1) raw = selected.astype(float) row_sums = raw.sum(axis=1).replace(0.0, np.nan) return raw.div(row_sums, axis=0).fillna(0.0) def _equity_curve(close: pd.DataFrame, weights: pd.DataFrame) -> pd.Series: """Convert daily weights into a simple close-to-close equity curve.""" returns = close.pct_change(fill_method=None).fillna(0.0) portfolio_returns = (returns * weights).sum(axis=1) return (1.0 + portfolio_returns).cumprod() def _read_panel_csv(path: str) -> pd.DataFrame: return pd.read_csv(path, index_col=0, parse_dates=True).sort_index() def load_saved_pit_market_data(data_dir: str = "data", prefix: str = "us_pit") -> dict[str, pd.DataFrame]: """Load saved PIT OHLCV panels from disk.""" panels = {} for field in ("close", "high", "low", "volume"): panels[field] = _read_panel_csv(f"{data_dir}/{prefix}_{field}.csv") return panels def load_saved_etf_close(data_dir: str = "data", market: str = "us_etf") -> pd.DataFrame: """Load saved ETF closes or populate them on demand.""" path = f"{data_dir}/{market}.csv" try: return _read_panel_csv(path) except FileNotFoundError: original_data_dir = data_manager.DATA_DIR try: data_manager.DATA_DIR = data_dir return data_manager.update_market_data(market, ETF_TICKERS, ["close"])["close"] finally: data_manager.DATA_DIR = original_data_dir def run_alpha_pipeline( market_data, etf_close, pit_membership=None, windows=(1, 2, 3, 5, 10), top_n=10, ) -> pd.DataFrame: """Run a lightweight strict US alpha pipeline and summarize trailing windows.""" close = market_data["close"].sort_index() high = market_data["high"].reindex(index=close.index, columns=close.columns).sort_index() low = market_data["low"].reindex(index=close.index, columns=close.columns).sort_index() volume = market_data["volume"].reindex(index=close.index, columns=close.columns).sort_index() tradable_mask = build_tradable_mask( close=close, volume=volume, pit_membership=pit_membership, min_price=MIN_PRICE, min_dollar_volume=MIN_DOLLAR_VOLUME, min_history_days=MIN_HISTORY_DAYS, min_valid_volume_days=MIN_VALID_VOLUME_DAYS, liquidity_window=LIQUIDITY_WINDOW, ) regime_filter = build_regime_filter(etf_close).reindex(close.index, fill_value=False) strategy_scores = { "breakout_regime": breakout_after_compression_score(close, high, low, volume), "rank_blend_regime": _price_rank_blend_score(close), } summary_rows = [] for strategy_name, score in strategy_scores.items(): weights = _build_equal_weight_portfolio(score, tradable_mask, regime_filter, top_n) equity = _equity_curve(close, weights) for window_years in windows: summary_rows.append(summarize_equity_window(equity, strategy_name, window_years)) return pd.DataFrame(summary_rows) def run_saved_pit_alpha_pipeline( data_dir: str = "data", windows=(1, 2, 3, 5, 10), top_n: int = 10, ) -> pd.DataFrame: """Load saved PIT OHLCV inputs and run the strict alpha pipeline.""" market_data = load_saved_pit_market_data(data_dir=data_dir) etf_close = load_saved_etf_close(data_dir=data_dir) intervals = uh.load_sp500_history() pit_membership = uh.membership_mask( market_data["close"].index, intervals=intervals, tickers=list(market_data["close"].columns), ) return run_alpha_pipeline( market_data=market_data, etf_close=etf_close, pit_membership=pit_membership, windows=windows, top_n=top_n, ) def main() -> None: summary = run_saved_pit_alpha_pipeline() print(summary.to_string(index=False)) if __name__ == "__main__": main()