feat: add PIT-aware tradable universe mask
This commit is contained in:
53
research/us_universe.py
Normal file
53
research/us_universe.py
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
def build_tradable_mask(
|
||||||
|
close: pd.DataFrame,
|
||||||
|
volume: pd.DataFrame,
|
||||||
|
pit_membership: pd.DataFrame | None,
|
||||||
|
min_price: float,
|
||||||
|
min_dollar_volume: float,
|
||||||
|
min_history_days: int,
|
||||||
|
min_valid_volume_days: int,
|
||||||
|
liquidity_window: int = 60,
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
"""Build a point-in-time tradable universe mask using only lagged inputs."""
|
||||||
|
close = close.sort_index()
|
||||||
|
volume = volume.reindex(index=close.index, columns=close.columns).sort_index()
|
||||||
|
if pit_membership is None:
|
||||||
|
pit_mask = pd.DataFrame(True, index=close.index, columns=close.columns)
|
||||||
|
else:
|
||||||
|
pit_mask = pit_membership.reindex(
|
||||||
|
index=close.index,
|
||||||
|
columns=close.columns,
|
||||||
|
fill_value=False,
|
||||||
|
)
|
||||||
|
pit_mask = pit_mask.where(pit_mask.notna(), False).astype(bool)
|
||||||
|
|
||||||
|
eligible_close = close.where(pit_mask)
|
||||||
|
eligible_volume = volume.where(pit_mask)
|
||||||
|
|
||||||
|
lagged_close = eligible_close.shift(1)
|
||||||
|
lagged_volume = eligible_volume.shift(1)
|
||||||
|
lagged_dollar_volume = lagged_close * lagged_volume
|
||||||
|
|
||||||
|
price_ok = lagged_close.gt(min_price)
|
||||||
|
liquidity_ok = (
|
||||||
|
lagged_dollar_volume.rolling(window=liquidity_window, min_periods=1).median().gt(min_dollar_volume)
|
||||||
|
)
|
||||||
|
history_ok = (
|
||||||
|
lagged_close.notna()
|
||||||
|
.rolling(window=min_history_days, min_periods=min_history_days)
|
||||||
|
.sum()
|
||||||
|
.ge(min_history_days)
|
||||||
|
)
|
||||||
|
valid_volume_ok = (
|
||||||
|
lagged_dollar_volume.notna()
|
||||||
|
.rolling(window=liquidity_window, min_periods=1)
|
||||||
|
.sum()
|
||||||
|
.ge(min_valid_volume_days)
|
||||||
|
)
|
||||||
|
|
||||||
|
mask = price_ok & liquidity_ok & history_ok & valid_volume_ok
|
||||||
|
mask = mask & pit_mask
|
||||||
|
return mask.astype(bool)
|
||||||
213
tests/test_us_universe.py
Normal file
213
tests/test_us_universe.py
Normal file
@@ -0,0 +1,213 @@
|
|||||||
|
import unittest
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
class BuildTradableMaskTests(unittest.TestCase):
|
||||||
|
def test_build_tradable_mask_uses_only_lagged_price_and_liquidity_inputs(self):
|
||||||
|
from research.us_universe import build_tradable_mask
|
||||||
|
|
||||||
|
dates = pd.date_range("2024-01-01", periods=4, freq="D")
|
||||||
|
close = pd.DataFrame({"AAA": [4.0, 10.0, 10.0, 10.0]}, index=dates)
|
||||||
|
volume = pd.DataFrame({"AAA": [float("nan"), 200.0, 200.0, 200.0]}, index=dates)
|
||||||
|
|
||||||
|
mask = build_tradable_mask(
|
||||||
|
close=close,
|
||||||
|
volume=volume,
|
||||||
|
pit_membership=None,
|
||||||
|
min_price=5.0,
|
||||||
|
min_dollar_volume=1000.0,
|
||||||
|
min_history_days=2,
|
||||||
|
min_valid_volume_days=2,
|
||||||
|
liquidity_window=2,
|
||||||
|
)
|
||||||
|
|
||||||
|
expected = pd.DataFrame({"AAA": [False, False, False, True]}, index=dates, dtype=bool)
|
||||||
|
pd.testing.assert_frame_equal(mask, expected)
|
||||||
|
|
||||||
|
def test_build_tradable_mask_uses_only_lagged_history(self):
|
||||||
|
from research.us_universe import build_tradable_mask
|
||||||
|
|
||||||
|
dates = pd.date_range("2024-01-01", periods=4, freq="D")
|
||||||
|
close = pd.DataFrame({"AAA": [10.0, float("nan"), 10.0, 10.0]}, index=dates)
|
||||||
|
volume = pd.DataFrame({"AAA": [200.0, 200.0, 200.0, 200.0]}, index=dates)
|
||||||
|
|
||||||
|
mask = build_tradable_mask(
|
||||||
|
close=close,
|
||||||
|
volume=volume,
|
||||||
|
pit_membership=None,
|
||||||
|
min_price=5.0,
|
||||||
|
min_dollar_volume=1_000.0,
|
||||||
|
min_history_days=2,
|
||||||
|
min_valid_volume_days=1,
|
||||||
|
liquidity_window=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
expected = pd.DataFrame({"AAA": [False, False, False, False]}, index=dates, dtype=bool)
|
||||||
|
pd.testing.assert_frame_equal(mask, expected)
|
||||||
|
|
||||||
|
def test_build_tradable_mask_requires_membership_history_before_first_eligible_day(self):
|
||||||
|
from research.us_universe import build_tradable_mask
|
||||||
|
|
||||||
|
dates = pd.date_range("2024-01-01", periods=4, freq="D")
|
||||||
|
close = pd.DataFrame({"AAA": [10.0, 10.0, 10.0, 10.0]}, index=dates)
|
||||||
|
volume = pd.DataFrame({"AAA": [200.0, 200.0, 200.0, 200.0]}, index=dates)
|
||||||
|
pit_membership = pd.DataFrame({"AAA": [False, False, True, True]}, index=dates)
|
||||||
|
|
||||||
|
mask = build_tradable_mask(
|
||||||
|
close=close,
|
||||||
|
volume=volume,
|
||||||
|
pit_membership=pit_membership,
|
||||||
|
min_price=5.0,
|
||||||
|
min_dollar_volume=1_000.0,
|
||||||
|
min_history_days=1,
|
||||||
|
min_valid_volume_days=1,
|
||||||
|
liquidity_window=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
expected = pd.DataFrame({"AAA": [False, False, False, True]}, index=dates, dtype=bool)
|
||||||
|
pd.testing.assert_frame_equal(mask, expected)
|
||||||
|
|
||||||
|
def test_build_tradable_mask_aligns_pit_membership_without_truthy_carryover(self):
|
||||||
|
from research.us_universe import build_tradable_mask
|
||||||
|
|
||||||
|
dates = pd.date_range("2024-01-01", periods=3, freq="D")
|
||||||
|
close = pd.DataFrame(
|
||||||
|
{
|
||||||
|
"AAA": [10.0, 10.0, 10.0],
|
||||||
|
"BBB": [12.0, 12.0, 12.0],
|
||||||
|
},
|
||||||
|
index=dates,
|
||||||
|
)
|
||||||
|
volume = pd.DataFrame(
|
||||||
|
{
|
||||||
|
"AAA": [1_000_000.0, 1_000_000.0, 1_000_000.0],
|
||||||
|
"BBB": [1_000_000.0, 1_000_000.0, 1_000_000.0],
|
||||||
|
},
|
||||||
|
index=dates,
|
||||||
|
)
|
||||||
|
pit_membership = pd.DataFrame(
|
||||||
|
{
|
||||||
|
"BBB": [True, True, False],
|
||||||
|
"CCC": [True, True, True],
|
||||||
|
},
|
||||||
|
index=pd.date_range("2024-01-02", periods=3, freq="D"),
|
||||||
|
)
|
||||||
|
|
||||||
|
with warnings.catch_warnings(record=True) as caught:
|
||||||
|
warnings.simplefilter("always")
|
||||||
|
mask = build_tradable_mask(
|
||||||
|
close=close,
|
||||||
|
volume=volume,
|
||||||
|
pit_membership=pit_membership,
|
||||||
|
min_price=5.0,
|
||||||
|
min_dollar_volume=1_000.0,
|
||||||
|
min_history_days=1,
|
||||||
|
min_valid_volume_days=1,
|
||||||
|
liquidity_window=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(len(caught), 0)
|
||||||
|
expected = pd.DataFrame(
|
||||||
|
{
|
||||||
|
"AAA": [False, False, False],
|
||||||
|
"BBB": [False, False, True],
|
||||||
|
},
|
||||||
|
index=dates,
|
||||||
|
dtype=bool,
|
||||||
|
)
|
||||||
|
pd.testing.assert_frame_equal(mask, expected)
|
||||||
|
|
||||||
|
def test_build_tradable_mask_treats_missing_membership_cells_as_false(self):
|
||||||
|
from research.us_universe import build_tradable_mask
|
||||||
|
|
||||||
|
dates = pd.date_range("2024-01-01", periods=3, freq="D")
|
||||||
|
close = pd.DataFrame({"AAA": [10.0, 10.0, 10.0]}, index=dates)
|
||||||
|
volume = pd.DataFrame({"AAA": [1_000_000.0, 1_000_000.0, 1_000_000.0]}, index=dates)
|
||||||
|
pit_membership = pd.DataFrame(
|
||||||
|
{"AAA": [True, pd.NA, True]},
|
||||||
|
index=dates,
|
||||||
|
dtype="boolean",
|
||||||
|
)
|
||||||
|
|
||||||
|
mask = build_tradable_mask(
|
||||||
|
close=close,
|
||||||
|
volume=volume,
|
||||||
|
pit_membership=pit_membership,
|
||||||
|
min_price=5.0,
|
||||||
|
min_dollar_volume=1_000.0,
|
||||||
|
min_history_days=1,
|
||||||
|
min_valid_volume_days=1,
|
||||||
|
liquidity_window=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
expected = pd.DataFrame({"AAA": [False, False, False]}, index=dates, dtype=bool)
|
||||||
|
pd.testing.assert_frame_equal(mask, expected)
|
||||||
|
|
||||||
|
def test_build_tradable_mask_uses_strict_thresholds(self):
|
||||||
|
from research.us_universe import build_tradable_mask
|
||||||
|
|
||||||
|
dates = pd.date_range("2024-01-01", periods=3, freq="D")
|
||||||
|
close = pd.DataFrame({"AAA": [5.0, 5.0, 5.0]}, index=dates)
|
||||||
|
volume = pd.DataFrame({"AAA": [300.0, 300.0, 300.0]}, index=dates)
|
||||||
|
|
||||||
|
mask = build_tradable_mask(
|
||||||
|
close=close,
|
||||||
|
volume=volume,
|
||||||
|
pit_membership=None,
|
||||||
|
min_price=5.0,
|
||||||
|
min_dollar_volume=1_000.0,
|
||||||
|
min_history_days=1,
|
||||||
|
min_valid_volume_days=1,
|
||||||
|
liquidity_window=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
expected = pd.DataFrame({"AAA": [False, False, False]}, index=dates, dtype=bool)
|
||||||
|
pd.testing.assert_frame_equal(mask, expected)
|
||||||
|
|
||||||
|
def test_build_tradable_mask_uses_strict_dollar_volume_threshold(self):
|
||||||
|
from research.us_universe import build_tradable_mask
|
||||||
|
|
||||||
|
dates = pd.date_range("2024-01-01", periods=3, freq="D")
|
||||||
|
close = pd.DataFrame({"AAA": [8.0, 8.0, 8.0]}, index=dates)
|
||||||
|
volume = pd.DataFrame({"AAA": [125.0, 125.0, 125.0]}, index=dates)
|
||||||
|
|
||||||
|
mask = build_tradable_mask(
|
||||||
|
close=close,
|
||||||
|
volume=volume,
|
||||||
|
pit_membership=None,
|
||||||
|
min_price=5.0,
|
||||||
|
min_dollar_volume=1_000.0,
|
||||||
|
min_history_days=1,
|
||||||
|
min_valid_volume_days=1,
|
||||||
|
liquidity_window=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
expected = pd.DataFrame({"AAA": [False, False, False]}, index=dates, dtype=bool)
|
||||||
|
pd.testing.assert_frame_equal(mask, expected)
|
||||||
|
|
||||||
|
def test_build_tradable_mask_requires_valid_dollar_volume_history(self):
|
||||||
|
from research.us_universe import build_tradable_mask
|
||||||
|
|
||||||
|
dates = pd.date_range("2024-01-01", periods=4, freq="D")
|
||||||
|
close = pd.DataFrame({"AAA": [10.0, float("nan"), 10.0, 10.0]}, index=dates)
|
||||||
|
volume = pd.DataFrame({"AAA": [200.0, 200.0, 200.0, 200.0]}, index=dates)
|
||||||
|
|
||||||
|
mask = build_tradable_mask(
|
||||||
|
close=close,
|
||||||
|
volume=volume,
|
||||||
|
pit_membership=None,
|
||||||
|
min_price=5.0,
|
||||||
|
min_dollar_volume=1_000.0,
|
||||||
|
min_history_days=1,
|
||||||
|
min_valid_volume_days=2,
|
||||||
|
liquidity_window=2,
|
||||||
|
)
|
||||||
|
|
||||||
|
expected = pd.DataFrame({"AAA": [False, False, False, False]}, index=dates, dtype=bool)
|
||||||
|
pd.testing.assert_frame_equal(mask, expected)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
Reference in New Issue
Block a user