feat: add PIT OHLCV runner and fetch support

This commit is contained in:
2026-04-18 14:59:48 +08:00
parent c015873ee1
commit f5e8c708f3
5 changed files with 221 additions and 16 deletions

View File

@@ -1,6 +1,8 @@
import numpy as np
import pandas as pd
import data_manager
import universe_history as uh
from research.event_factors import breakout_after_compression_score
from research.regime_filters import build_regime_filter
from research.us_alpha_report import summarize_equity_window
@@ -16,6 +18,7 @@ LIQUIDITY_WINDOW = 60
TREND_WINDOW = 126
RECOVERY_WINDOW = 63
HIGH_PROX_WINDOW = 126
ETF_TICKERS = ["SPY", "QQQ", "IWM", "MDY", "XLK", "XLF", "XLI", "XLV"]
def _price_rank_blend_score(close: pd.DataFrame) -> pd.DataFrame:
@@ -51,10 +54,36 @@ def _build_equal_weight_portfolio(
def _equity_curve(close: pd.DataFrame, weights: pd.DataFrame) -> pd.Series:
"""Convert daily weights into a simple close-to-close equity curve."""
returns = close.pct_change(fill_method=None).fillna(0.0)
portfolio_returns = (returns * weights.shift(1).fillna(0.0)).sum(axis=1)
portfolio_returns = (returns * weights).sum(axis=1)
return (1.0 + portfolio_returns).cumprod()
def _read_panel_csv(path: str) -> pd.DataFrame:
return pd.read_csv(path, index_col=0, parse_dates=True).sort_index()
def load_saved_pit_market_data(data_dir: str = "data", prefix: str = "us_pit") -> dict[str, pd.DataFrame]:
"""Load saved PIT OHLCV panels from disk."""
panels = {}
for field in ("close", "high", "low", "volume"):
panels[field] = _read_panel_csv(f"{data_dir}/{prefix}_{field}.csv")
return panels
def load_saved_etf_close(data_dir: str = "data", market: str = "us_etf") -> pd.DataFrame:
"""Load saved ETF closes or populate them on demand."""
path = f"{data_dir}/{market}.csv"
try:
return _read_panel_csv(path)
except FileNotFoundError:
original_data_dir = data_manager.DATA_DIR
try:
data_manager.DATA_DIR = data_dir
return data_manager.update_market_data(market, ETF_TICKERS, ["close"])["close"]
finally:
data_manager.DATA_DIR = original_data_dir
def run_alpha_pipeline(
market_data,
etf_close,
@@ -93,3 +122,35 @@ def run_alpha_pipeline(
summary_rows.append(summarize_equity_window(equity, strategy_name, window_years))
return pd.DataFrame(summary_rows)
def run_saved_pit_alpha_pipeline(
data_dir: str = "data",
windows=(1, 2, 3, 5, 10),
top_n: int = 10,
) -> pd.DataFrame:
"""Load saved PIT OHLCV inputs and run the strict alpha pipeline."""
market_data = load_saved_pit_market_data(data_dir=data_dir)
etf_close = load_saved_etf_close(data_dir=data_dir)
intervals = uh.load_sp500_history()
pit_membership = uh.membership_mask(
market_data["close"].index,
intervals=intervals,
tickers=list(market_data["close"].columns),
)
return run_alpha_pipeline(
market_data=market_data,
etf_close=etf_close,
pit_membership=pit_membership,
windows=windows,
top_n=top_n,
)
def main() -> None:
summary = run_saved_pit_alpha_pipeline()
print(summary.to_string(index=False))
if __name__ == "__main__":
main()