feat: add PIT OHLCV runner and fetch support
This commit is contained in:
@@ -25,6 +25,15 @@ YEARS = 10
|
||||
BATCH_SIZE = 50
|
||||
|
||||
|
||||
def _field_out_paths() -> dict[str, str]:
|
||||
return {
|
||||
"Close": os.path.join(DATA_DIR, "us_pit_close.csv"),
|
||||
"High": os.path.join(DATA_DIR, "us_pit_high.csv"),
|
||||
"Low": os.path.join(DATA_DIR, "us_pit_low.csv"),
|
||||
"Volume": os.path.join(DATA_DIR, "us_pit_volume.csv"),
|
||||
}
|
||||
|
||||
|
||||
def fetch_all_historical(force: bool = False) -> pd.DataFrame:
|
||||
os.makedirs(DATA_DIR, exist_ok=True)
|
||||
intervals = uh.load_sp500_history()
|
||||
@@ -74,8 +83,41 @@ def fetch_all_historical(force: bool = False) -> pd.DataFrame:
|
||||
return combined
|
||||
|
||||
|
||||
def fetch_all_historical_ohlcv(force: bool = False) -> dict[str, pd.DataFrame]:
|
||||
os.makedirs(DATA_DIR, exist_ok=True)
|
||||
intervals = uh.load_sp500_history()
|
||||
tickers = uh.all_tickers_ever(intervals) + ["SPY"]
|
||||
tickers = sorted(set(tickers))
|
||||
start = (datetime.now() - timedelta(days=365 * YEARS)).strftime("%Y-%m-%d")
|
||||
panels = _download_batched_fields(tickers, start=start, fields=["Close", "High", "Low", "Volume"])
|
||||
if not panels:
|
||||
raise RuntimeError("No PIT OHLCV data downloaded")
|
||||
|
||||
close = panels["Close"]
|
||||
close.to_csv(OUT_PATH)
|
||||
print(f"--- Saved {close.shape} to {OUT_PATH} ---")
|
||||
result: dict[str, pd.DataFrame] = {"close": close}
|
||||
for field, path in _field_out_paths().items():
|
||||
panel = panels[field]
|
||||
panel.to_csv(path)
|
||||
print(f"--- Saved {panel.shape} to {path} ---")
|
||||
result[field.lower()] = panel
|
||||
return result
|
||||
|
||||
|
||||
def _download_batched(tickers: list[str], start: str) -> pd.DataFrame | None:
|
||||
frames = []
|
||||
panels = _download_batched_fields(tickers, start=start, fields=["Close"])
|
||||
if not panels:
|
||||
return None
|
||||
return panels["Close"]
|
||||
|
||||
|
||||
def _download_batched_fields(
|
||||
tickers: list[str],
|
||||
start: str,
|
||||
fields: list[str],
|
||||
) -> dict[str, pd.DataFrame]:
|
||||
frames = {field: [] for field in fields}
|
||||
n = len(tickers)
|
||||
for i in range(0, n, BATCH_SIZE):
|
||||
batch = tickers[i:i + BATCH_SIZE]
|
||||
@@ -85,19 +127,24 @@ def _download_batched(tickers: list[str], start: str) -> pd.DataFrame | None:
|
||||
progress=False, threads=True)
|
||||
if raw.empty:
|
||||
continue
|
||||
if isinstance(raw.columns, pd.MultiIndex):
|
||||
close = raw["Close"]
|
||||
else:
|
||||
close = raw[["Close"]].rename(columns={"Close": batch[0]})
|
||||
close = close.dropna(axis=1, how="all")
|
||||
if not close.empty:
|
||||
frames.append(close)
|
||||
for field in fields:
|
||||
if isinstance(raw.columns, pd.MultiIndex):
|
||||
panel = raw[field]
|
||||
else:
|
||||
panel = raw[[field]].rename(columns={field: batch[0]})
|
||||
panel = panel.dropna(axis=1, how="all")
|
||||
if not panel.empty:
|
||||
frames[field].append(panel)
|
||||
except Exception as e:
|
||||
print(f" batch failed: {e}")
|
||||
if not frames:
|
||||
return None
|
||||
result = pd.concat(frames, axis=1).sort_index()
|
||||
result = result.loc[:, ~result.columns.duplicated()]
|
||||
result = {}
|
||||
for field, field_frames in frames.items():
|
||||
if field_frames:
|
||||
panel = pd.concat(field_frames, axis=1).sort_index()
|
||||
panel = panel.loc[:, ~panel.columns.duplicated()]
|
||||
result[field] = panel
|
||||
else:
|
||||
result[field] = pd.DataFrame()
|
||||
return result
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
import data_manager
|
||||
import universe_history as uh
|
||||
from research.event_factors import breakout_after_compression_score
|
||||
from research.regime_filters import build_regime_filter
|
||||
from research.us_alpha_report import summarize_equity_window
|
||||
@@ -16,6 +18,7 @@ LIQUIDITY_WINDOW = 60
|
||||
TREND_WINDOW = 126
|
||||
RECOVERY_WINDOW = 63
|
||||
HIGH_PROX_WINDOW = 126
|
||||
ETF_TICKERS = ["SPY", "QQQ", "IWM", "MDY", "XLK", "XLF", "XLI", "XLV"]
|
||||
|
||||
|
||||
def _price_rank_blend_score(close: pd.DataFrame) -> pd.DataFrame:
|
||||
@@ -51,10 +54,36 @@ def _build_equal_weight_portfolio(
|
||||
def _equity_curve(close: pd.DataFrame, weights: pd.DataFrame) -> pd.Series:
|
||||
"""Convert daily weights into a simple close-to-close equity curve."""
|
||||
returns = close.pct_change(fill_method=None).fillna(0.0)
|
||||
portfolio_returns = (returns * weights.shift(1).fillna(0.0)).sum(axis=1)
|
||||
portfolio_returns = (returns * weights).sum(axis=1)
|
||||
return (1.0 + portfolio_returns).cumprod()
|
||||
|
||||
|
||||
def _read_panel_csv(path: str) -> pd.DataFrame:
|
||||
return pd.read_csv(path, index_col=0, parse_dates=True).sort_index()
|
||||
|
||||
|
||||
def load_saved_pit_market_data(data_dir: str = "data", prefix: str = "us_pit") -> dict[str, pd.DataFrame]:
|
||||
"""Load saved PIT OHLCV panels from disk."""
|
||||
panels = {}
|
||||
for field in ("close", "high", "low", "volume"):
|
||||
panels[field] = _read_panel_csv(f"{data_dir}/{prefix}_{field}.csv")
|
||||
return panels
|
||||
|
||||
|
||||
def load_saved_etf_close(data_dir: str = "data", market: str = "us_etf") -> pd.DataFrame:
|
||||
"""Load saved ETF closes or populate them on demand."""
|
||||
path = f"{data_dir}/{market}.csv"
|
||||
try:
|
||||
return _read_panel_csv(path)
|
||||
except FileNotFoundError:
|
||||
original_data_dir = data_manager.DATA_DIR
|
||||
try:
|
||||
data_manager.DATA_DIR = data_dir
|
||||
return data_manager.update_market_data(market, ETF_TICKERS, ["close"])["close"]
|
||||
finally:
|
||||
data_manager.DATA_DIR = original_data_dir
|
||||
|
||||
|
||||
def run_alpha_pipeline(
|
||||
market_data,
|
||||
etf_close,
|
||||
@@ -93,3 +122,35 @@ def run_alpha_pipeline(
|
||||
summary_rows.append(summarize_equity_window(equity, strategy_name, window_years))
|
||||
|
||||
return pd.DataFrame(summary_rows)
|
||||
|
||||
|
||||
def run_saved_pit_alpha_pipeline(
|
||||
data_dir: str = "data",
|
||||
windows=(1, 2, 3, 5, 10),
|
||||
top_n: int = 10,
|
||||
) -> pd.DataFrame:
|
||||
"""Load saved PIT OHLCV inputs and run the strict alpha pipeline."""
|
||||
market_data = load_saved_pit_market_data(data_dir=data_dir)
|
||||
etf_close = load_saved_etf_close(data_dir=data_dir)
|
||||
intervals = uh.load_sp500_history()
|
||||
pit_membership = uh.membership_mask(
|
||||
market_data["close"].index,
|
||||
intervals=intervals,
|
||||
tickers=list(market_data["close"].columns),
|
||||
)
|
||||
return run_alpha_pipeline(
|
||||
market_data=market_data,
|
||||
etf_close=etf_close,
|
||||
pit_membership=pit_membership,
|
||||
windows=windows,
|
||||
top_n=top_n,
|
||||
)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
summary = run_saved_pit_alpha_pipeline()
|
||||
print(summary.to_string(index=False))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@@ -8,8 +8,8 @@ TRADING_DAYS_PER_YEAR = 252
|
||||
def summarize_equity_window(equity: pd.Series, strategy: str, window_years: int | float) -> dict:
|
||||
"""Summarize a strategy equity curve over a trailing trading-day window."""
|
||||
window_days = max(int(window_years * TRADING_DAYS_PER_YEAR), 1)
|
||||
window_equity = equity.tail(window_days + 1).dropna()
|
||||
if len(window_equity) < 2:
|
||||
clean_equity = equity.dropna()
|
||||
if len(clean_equity) < window_days + 1:
|
||||
return {
|
||||
"strategy": strategy,
|
||||
"window_years": window_years,
|
||||
@@ -18,6 +18,7 @@ def summarize_equity_window(equity: pd.Series, strategy: str, window_years: int
|
||||
"MaxDD": np.nan,
|
||||
"TotalRet": np.nan,
|
||||
}
|
||||
window_equity = clean_equity.tail(window_days + 1)
|
||||
|
||||
daily = window_equity.pct_change(fill_method=None).dropna()
|
||||
total_ret = window_equity.iloc[-1] / window_equity.iloc[0] - 1
|
||||
|
||||
Reference in New Issue
Block a user