From 7853eafe55747cb38a6ecfbcfd8dd9a52803c863 Mon Sep 17 00:00:00 2001 From: Gahow Wang Date: Sat, 18 Apr 2026 00:23:07 +0800 Subject: [PATCH] feat: add PIT-aware tradable universe mask --- research/us_universe.py | 53 ++++++++++ tests/test_us_universe.py | 213 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 266 insertions(+) create mode 100644 research/us_universe.py create mode 100644 tests/test_us_universe.py diff --git a/research/us_universe.py b/research/us_universe.py new file mode 100644 index 0000000..89f7c23 --- /dev/null +++ b/research/us_universe.py @@ -0,0 +1,53 @@ +import pandas as pd + + +def build_tradable_mask( + close: pd.DataFrame, + volume: pd.DataFrame, + pit_membership: pd.DataFrame | None, + min_price: float, + min_dollar_volume: float, + min_history_days: int, + min_valid_volume_days: int, + liquidity_window: int = 60, +) -> pd.DataFrame: + """Build a point-in-time tradable universe mask using only lagged inputs.""" + close = close.sort_index() + volume = volume.reindex(index=close.index, columns=close.columns).sort_index() + if pit_membership is None: + pit_mask = pd.DataFrame(True, index=close.index, columns=close.columns) + else: + pit_mask = pit_membership.reindex( + index=close.index, + columns=close.columns, + fill_value=False, + ) + pit_mask = pit_mask.where(pit_mask.notna(), False).astype(bool) + + eligible_close = close.where(pit_mask) + eligible_volume = volume.where(pit_mask) + + lagged_close = eligible_close.shift(1) + lagged_volume = eligible_volume.shift(1) + lagged_dollar_volume = lagged_close * lagged_volume + + price_ok = lagged_close.gt(min_price) + liquidity_ok = ( + lagged_dollar_volume.rolling(window=liquidity_window, min_periods=1).median().gt(min_dollar_volume) + ) + history_ok = ( + lagged_close.notna() + .rolling(window=min_history_days, min_periods=min_history_days) + .sum() + .ge(min_history_days) + ) + valid_volume_ok = ( + lagged_dollar_volume.notna() + .rolling(window=liquidity_window, min_periods=1) + .sum() + .ge(min_valid_volume_days) + ) + + mask = price_ok & liquidity_ok & history_ok & valid_volume_ok + mask = mask & pit_mask + return mask.astype(bool) diff --git a/tests/test_us_universe.py b/tests/test_us_universe.py new file mode 100644 index 0000000..983a789 --- /dev/null +++ b/tests/test_us_universe.py @@ -0,0 +1,213 @@ +import unittest +import warnings + +import pandas as pd + + +class BuildTradableMaskTests(unittest.TestCase): + def test_build_tradable_mask_uses_only_lagged_price_and_liquidity_inputs(self): + from research.us_universe import build_tradable_mask + + dates = pd.date_range("2024-01-01", periods=4, freq="D") + close = pd.DataFrame({"AAA": [4.0, 10.0, 10.0, 10.0]}, index=dates) + volume = pd.DataFrame({"AAA": [float("nan"), 200.0, 200.0, 200.0]}, index=dates) + + mask = build_tradable_mask( + close=close, + volume=volume, + pit_membership=None, + min_price=5.0, + min_dollar_volume=1000.0, + min_history_days=2, + min_valid_volume_days=2, + liquidity_window=2, + ) + + expected = pd.DataFrame({"AAA": [False, False, False, True]}, index=dates, dtype=bool) + pd.testing.assert_frame_equal(mask, expected) + + def test_build_tradable_mask_uses_only_lagged_history(self): + from research.us_universe import build_tradable_mask + + dates = pd.date_range("2024-01-01", periods=4, freq="D") + close = pd.DataFrame({"AAA": [10.0, float("nan"), 10.0, 10.0]}, index=dates) + volume = pd.DataFrame({"AAA": [200.0, 200.0, 200.0, 200.0]}, index=dates) + + mask = build_tradable_mask( + close=close, + volume=volume, + pit_membership=None, + min_price=5.0, + min_dollar_volume=1_000.0, + min_history_days=2, + min_valid_volume_days=1, + liquidity_window=1, + ) + + expected = pd.DataFrame({"AAA": [False, False, False, False]}, index=dates, dtype=bool) + pd.testing.assert_frame_equal(mask, expected) + + def test_build_tradable_mask_requires_membership_history_before_first_eligible_day(self): + from research.us_universe import build_tradable_mask + + dates = pd.date_range("2024-01-01", periods=4, freq="D") + close = pd.DataFrame({"AAA": [10.0, 10.0, 10.0, 10.0]}, index=dates) + volume = pd.DataFrame({"AAA": [200.0, 200.0, 200.0, 200.0]}, index=dates) + pit_membership = pd.DataFrame({"AAA": [False, False, True, True]}, index=dates) + + mask = build_tradable_mask( + close=close, + volume=volume, + pit_membership=pit_membership, + min_price=5.0, + min_dollar_volume=1_000.0, + min_history_days=1, + min_valid_volume_days=1, + liquidity_window=1, + ) + + expected = pd.DataFrame({"AAA": [False, False, False, True]}, index=dates, dtype=bool) + pd.testing.assert_frame_equal(mask, expected) + + def test_build_tradable_mask_aligns_pit_membership_without_truthy_carryover(self): + from research.us_universe import build_tradable_mask + + dates = pd.date_range("2024-01-01", periods=3, freq="D") + close = pd.DataFrame( + { + "AAA": [10.0, 10.0, 10.0], + "BBB": [12.0, 12.0, 12.0], + }, + index=dates, + ) + volume = pd.DataFrame( + { + "AAA": [1_000_000.0, 1_000_000.0, 1_000_000.0], + "BBB": [1_000_000.0, 1_000_000.0, 1_000_000.0], + }, + index=dates, + ) + pit_membership = pd.DataFrame( + { + "BBB": [True, True, False], + "CCC": [True, True, True], + }, + index=pd.date_range("2024-01-02", periods=3, freq="D"), + ) + + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + mask = build_tradable_mask( + close=close, + volume=volume, + pit_membership=pit_membership, + min_price=5.0, + min_dollar_volume=1_000.0, + min_history_days=1, + min_valid_volume_days=1, + liquidity_window=1, + ) + + self.assertEqual(len(caught), 0) + expected = pd.DataFrame( + { + "AAA": [False, False, False], + "BBB": [False, False, True], + }, + index=dates, + dtype=bool, + ) + pd.testing.assert_frame_equal(mask, expected) + + def test_build_tradable_mask_treats_missing_membership_cells_as_false(self): + from research.us_universe import build_tradable_mask + + dates = pd.date_range("2024-01-01", periods=3, freq="D") + close = pd.DataFrame({"AAA": [10.0, 10.0, 10.0]}, index=dates) + volume = pd.DataFrame({"AAA": [1_000_000.0, 1_000_000.0, 1_000_000.0]}, index=dates) + pit_membership = pd.DataFrame( + {"AAA": [True, pd.NA, True]}, + index=dates, + dtype="boolean", + ) + + mask = build_tradable_mask( + close=close, + volume=volume, + pit_membership=pit_membership, + min_price=5.0, + min_dollar_volume=1_000.0, + min_history_days=1, + min_valid_volume_days=1, + liquidity_window=1, + ) + + expected = pd.DataFrame({"AAA": [False, False, False]}, index=dates, dtype=bool) + pd.testing.assert_frame_equal(mask, expected) + + def test_build_tradable_mask_uses_strict_thresholds(self): + from research.us_universe import build_tradable_mask + + dates = pd.date_range("2024-01-01", periods=3, freq="D") + close = pd.DataFrame({"AAA": [5.0, 5.0, 5.0]}, index=dates) + volume = pd.DataFrame({"AAA": [300.0, 300.0, 300.0]}, index=dates) + + mask = build_tradable_mask( + close=close, + volume=volume, + pit_membership=None, + min_price=5.0, + min_dollar_volume=1_000.0, + min_history_days=1, + min_valid_volume_days=1, + liquidity_window=1, + ) + + expected = pd.DataFrame({"AAA": [False, False, False]}, index=dates, dtype=bool) + pd.testing.assert_frame_equal(mask, expected) + + def test_build_tradable_mask_uses_strict_dollar_volume_threshold(self): + from research.us_universe import build_tradable_mask + + dates = pd.date_range("2024-01-01", periods=3, freq="D") + close = pd.DataFrame({"AAA": [8.0, 8.0, 8.0]}, index=dates) + volume = pd.DataFrame({"AAA": [125.0, 125.0, 125.0]}, index=dates) + + mask = build_tradable_mask( + close=close, + volume=volume, + pit_membership=None, + min_price=5.0, + min_dollar_volume=1_000.0, + min_history_days=1, + min_valid_volume_days=1, + liquidity_window=1, + ) + + expected = pd.DataFrame({"AAA": [False, False, False]}, index=dates, dtype=bool) + pd.testing.assert_frame_equal(mask, expected) + + def test_build_tradable_mask_requires_valid_dollar_volume_history(self): + from research.us_universe import build_tradable_mask + + dates = pd.date_range("2024-01-01", periods=4, freq="D") + close = pd.DataFrame({"AAA": [10.0, float("nan"), 10.0, 10.0]}, index=dates) + volume = pd.DataFrame({"AAA": [200.0, 200.0, 200.0, 200.0]}, index=dates) + + mask = build_tradable_mask( + close=close, + volume=volume, + pit_membership=None, + min_price=5.0, + min_dollar_volume=1_000.0, + min_history_days=1, + min_valid_volume_days=2, + liquidity_window=2, + ) + + expected = pd.DataFrame({"AAA": [False, False, False, False]}, index=dates, dtype=bool) + pd.testing.assert_frame_equal(mask, expected) + + +if __name__ == "__main__": + unittest.main()