diff --git a/data/sp500_history.json b/data/sp500_history.json new file mode 100644 index 0000000..f2e4c0a --- /dev/null +++ b/data/sp500_history.json @@ -0,0 +1 @@ +{"date": "2026-04-17", "intervals": {"TXT": [[null, null]], "EA": [[null, null]], "PSX": [["2012-04-23", null]], "SOLV": [["2024-04-01", null]], "NWSA": [["2013-07-01", null]], "META": [[null, null]], "LIN": [[null, null]], "PFG": [[null, null]], "KKR": [["2024-06-24", null]], "VRSN": [[null, null]], "ACN": [["2011-07-05", null]], "EL": [[null, null]], "TJX": [[null, null]], "WBD": [["2022-04-11", null]], "SWKS": [["2015-03-12", null]], "VLTO": [["2023-10-02", null]], "TT": [[null, null]], "LVS": [["2019-10-03", null]], "PYPL": [["2015-07-20", null]], "IBM": [[null, null]], "CMS": [[null, null]], "HIG": [[null, null]], "PG": [[null, null]], "TSN": [[null, null]], "LYV": [["2019-12-23", null]], "ADSK": [[null, null]], "T": [[null, null]], "TDG": [["2016-06-03", null]], "TTD": [["2025-07-18", null]], "CHTR": [["2016-09-08", null]], "SPGI": [[null, null]], "SPG": [[null, null]], "GEHC": [["2023-01-04", null]], "NWS": [["2015-09-18", null]], "AVY": [[null, null]], "AME": [["2013-09-20", null]], "MCO": [[null, null]], "EFX": [[null, null]], "FOX": [["2015-09-18", "2019-03-19"], ["2019-03-19", null]], "SWK": [[null, null]], "UHS": [["2014-09-20", null]], "MAS": [[null, null]], "CL": [[null, null]], "RCL": [["2014-12-05", null]], "JBHT": [["2015-07-01", null]], "WYNN": [[null, null]], "HD": [[null, null]], "IBKR": [["2025-08-28", null]], "CAT": [[null, null]], "EXR": [["2016-01-19", null]], "ZBH": [[null, null]], "ETN": [[null, null]], "CTRA": [[null, null]], "PEP": [[null, null]], "LOW": [[null, null]], "DAL": [["2013-09-10", null]], "FANG": [["2018-12-03", null]], "BKNG": [[null, null]], "CF": [[null, null]], "AIG": [[null, null]], "LEN": [[null, null]], "IDXX": [["2017-01-05", null]], "AZO": [[null, null]], "DD": [[null, "2017-09-01"], ["2019-06-03", null]], "APTV": [[null, null]], "C": [[null, null]], "MRSH": [[null, null]], "SYY": [[null, null]], "ORLY": [[null, null]], "RMD": [["2017-07-26", null]], "DHI": [[null, null]], "NSC": [[null, null]], "ADBE": [[null, null]], "UAL": [["2015-09-02", null]], "CCI": [["2012-03-13", null]], "BR": [["2018-06-18", null]], "COP": [[null, null]], "GLW": [[null, null]], "MAA": [["2016-12-02", null]], "CNC": [["2016-03-30", null]], "COR": [[null, null]], "FRT": [["2016-02-01", null]], "AIZ": [[null, null]], "VST": [["2024-05-08", null]], "SATS": [["2026-03-23", null]], "AMAT": [[null, null]], "SNPS": [["2017-03-16", null]], "SNA": [[null, null]], "GEV": [["2024-04-02", null]], "AJG": [["2016-05-31", null]], "HAS": [[null, null]], "BEN": [[null, null]], "INCY": [["2017-02-28", null]], "BAX": [[null, null]], "TTWO": [["2018-03-19", null]], "CAH": [[null, null]], "FITB": [[null, null]], "ARE": [["2017-03-20", null]], "FDX": [[null, null]], "AMGN": [[null, null]], "TAP": [[null, null]], "VRSK": [["2015-10-07", null]], "COST": [[null, null]], "ROK": [[null, null]], "TSLA": [["2020-12-21", null]], "ALLE": [["2013-12-02", null]], "YUM": [[null, null]], "NOW": [["2019-11-21", null]], "AMP": [[null, null]], "SBUX": [["2000-06-07", null]], "PCAR": [[null, null]], "TSCO": [["2014-01-24", null]], "WM": [[null, null]], "AON": [[null, null]], "DOW": [[null, "2017-09-01"], ["2019-04-02", null]], "NUE": [[null, null]], "GPN": [["2016-04-25", null]], "GPC": [[null, null]], "XYZ": [["2025-07-23", null]], "ECL": [[null, null]], "EQT": [[null, "2018-11-13"], ["2022-10-03", null]], "WEC": [[null, null]], "PPL": [[null, null]], "DOV": [[null, null]], "WAT": [[null, null]], "ABT": [[null, null]], "TMO": [[null, null]], "TFC": [[null, null]], "J": [[null, null]], "IQV": [[null, null]], "EXPE": [["2007-10-02", null]], "AMCR": [["2019-06-11", null]], "MAR": [[null, null]], "DXCM": [["2020-05-12", null]], "URI": [["2014-09-20", null]], "COIN": [["2025-05-19", null]], "BRK-B": [[null, null]], "BMY": [[null, null]], "WST": [["2020-05-22", null]], "EXE": [["2025-03-24", null]], "Q": [[null, "2011-03-31"], ["2025-11-03", null]], "CRWD": [["2024-06-24", null]], "CDW": [["2019-09-23", null]], "KLAC": [[null, null]], "NOC": [[null, null]], "HLT": [["2017-06-19", null]], "CRH": [["2025-12-22", null]], "ERIE": [["2024-09-23", null]], "AXON": [["2023-05-04", null]], "HSY": [[null, null]], "AMD": [[null, "2013-09-20"], ["2017-03-20", null]], "LLY": [[null, null]], "UBER": [["2023-12-18", null]], "PNW": [[null, null]], "WDC": [[null, null]], "CHD": [["2015-12-29", null]], "WTW": [[null, null]], "EXPD": [[null, null]], "CDNS": [["2017-09-18", null]], "CRM": [["2008-09-12", null]], "REGN": [["2013-04-30", null]], "IRM": [[null, null]], "ABNB": [["2023-09-18", null]], "DPZ": [["2020-05-12", null]], "OKE": [[null, null]], "MU": [[null, null]], "SW": [[null, null]], "VRT": [["2026-03-23", null]], "HSIC": [["2015-03-18", null]], "AVB": [["2007-01-10", null]], "STX": [["2012-07-02", null]], "MPC": [["2011-06-30", null]], "WAB": [["2019-02-27", null]], "MRNA": [["2021-07-21", null]], "JPM": [[null, null]], "CLX": [[null, null]], "HCA": [["2015-01-27", null]], "PH": [[null, null]], "DOC": [[null, null]], "DGX": [[null, null]], "IEX": [["2019-08-09", null]], "TGT": [[null, null]], "COF": [[null, null]], "ABBV": [["2013-01-02", null]], "PAYX": [[null, null]], "HST": [[null, null]], "QCOM": [[null, null]], "IT": [["2017-04-05", null]], "GOOG": [[null, null]], "MDT": [[null, null]], "LNT": [["2016-07-01", null]], "NRG": [[null, null]], "NEM": [[null, null]], "VMC": [[null, null]], "NEE": [[null, null]], "PKG": [["2017-07-26", null]], "KEY": [[null, null]], "DRI": [[null, null]], "AOS": [["2017-07-26", null]], "CSGP": [["2022-09-19", null]], "WELL": [[null, null]], "TMUS": [["2019-07-15", null]], "EW": [["2011-03-31", null]], "ES": [[null, null]], "GM": [["2013-06-06", null]], "CIEN": [[null, "2009-12-18"], ["2026-02-09", null]], "INTC": [[null, null]], "FFIV": [["2010-12-17", null]], "PODD": [["2023-03-15", null]], "FE": [[null, null]], "STLD": [["2022-12-22", null]], "XOM": [[null, null]], "ANET": [["2018-08-28", null]], "DTE": [[null, null]], "FDS": [["2021-12-20", null]], "EME": [["2025-09-22", null]], "AMZN": [["2005-11-18", null]], "MOS": [["2011-09-23", null]], "JKHY": [["2018-11-13", null]], "ACGL": [["2022-11-01", null]], "NDAQ": [[null, null]], "MTB": [[null, null]], "APA": [[null, null]], "CINF": [[null, null]], "COHR": [["2026-03-23", null]], "NTAP": [[null, null]], "VLO": [[null, null]], "HUBB": [["2023-10-18", null]], "EQR": [[null, null]], "PNR": [["2012-10-01", null]], "XEL": [[null, null]], "CBRE": [[null, null]], "BAC": [[null, null]], "SNDK": [[null, "2016-05-13"], ["2025-11-28", null]], "STT": [[null, null]], "SRE": [[null, null]], "RJF": [["2017-03-20", null]], "FOXA": [[null, "2019-03-19"], ["2019-03-19", null]], "COO": [["2016-09-22", null]], "PLTR": [["2024-09-23", null]], "HWM": [["2020-04-01", null]], "FSLR": [[null, "2017-03-20"], ["2022-12-19", null]], "MKC": [[null, null]], "CTAS": [[null, null]], "AES": [[null, null]], "ROL": [["2018-10-01", null]], "PEG": [[null, null]], "STZ": [["2005-07-01", null]], "BKR": [["2017-07-07", null]], "GNRC": [["2021-03-22", null]], "WMT": [[null, null]], "DVA": [[null, null]], "BALL": [[null, null]], "DLTR": [["2011-12-16", null]], "HPE": [["2015-11-02", null]], "BDX": [[null, null]], "TROW": [[null, null]], "LHX": [[null, null]], "CVS": [[null, null]], "BG": [["2023-03-15", null]], "CEG": [[null, "2012-03-13"], ["2022-02-02", null]], "NDSN": [["2022-02-15", null]], "CBOE": [["2017-03-01", null]], "EMR": [[null, null]], "MSFT": [[null, null]], "BX": [["2023-09-18", null]], "PM": [[null, null]], "A": [[null, null]], "PRU": [[null, null]], "WY": [[null, null]], "WFC": [[null, null]], "IFF": [[null, null]], "UPS": [[null, null]], "BRO": [["2021-09-20", null]], "TEL": [[null, "2009-06-25"], ["2011-10-14", null]], "PPG": [[null, null]], "MGM": [["2017-07-26", null]], "PTC": [["2021-04-20", null]], "PHM": [[null, null]], "CSCO": [[null, null]], "NTRS": [[null, null]], "ELV": [[null, null]], "GEN": [[null, null]], "MSCI": [["2018-04-04", null]], "KO": [[null, null]], "ISRG": [[null, null]], "SMCI": [["2024-03-18", null]], "FCX": [[null, null]], "ULTA": [["2016-04-18", null]], "BF-B": [[null, null]], "VTR": [["2009-03-03", null]], "CMG": [["2011-04-27", null]], "LITE": [["2026-03-23", null]], "TKO": [["2025-03-24", null]], "CTSH": [[null, null]], "ADP": [[null, null]], "BSX": [[null, null]], "FISV": [[null, null]], "ED": [[null, null]], "JNJ": [[null, null]], "WSM": [["2025-03-24", null]], "NXPI": [["2021-03-22", null]], "TER": [[null, "2013-12-23"], ["2020-09-21", null]], "LDOS": [["2019-08-09", null]], "CB": [["2010-07-14", null]], "MMM": [[null, null]], "BBY": [[null, null]], "DELL": [[null, "2013-10-29"], ["2024-09-23", null]], "ARES": [["2025-12-11", null]], "ADM": [[null, null]], "MCD": [[null, null]], "OTIS": [["2020-04-03", null]], "CPAY": [[null, null]], "IP": [[null, null]], "AVGO": [["2014-05-08", null]], "GS": [[null, null]], "KIM": [[null, null]], "NKE": [[null, null]], "BIIB": [[null, null]], "GL": [[null, null]], "FIX": [["2025-12-22", null]], "V": [["2009-12-18", null]], "EXC": [[null, null]], "DLR": [["2016-05-18", null]], "SHW": [[null, null]], "EVRG": [["2018-06-05", null]], "KMB": [[null, null]], "MS": [[null, null]], "VICI": [["2022-06-08", null]], "LH": [[null, null]], "CPB": [[null, null]], "DG": [["2012-12-03", null]], "PSKY": [[null, null]], "AEE": [[null, null]], "TDY": [["2020-06-22", null]], "GD": [[null, null]], "PSA": [[null, null]], "HOOD": [["2025-09-22", null]], "UNP": [[null, null]], "ALB": [["2016-07-01", null]], "CSX": [[null, null]], "NI": [[null, null]], "CPRT": [["2018-07-02", null]], "HRL": [["2009-03-03", null]], "DASH": [["2025-03-24", null]], "RF": [[null, null]], "JCI": [[null, null]], "O": [["2015-04-07", null]], "KR": [[null, null]], "CPT": [["2022-04-04", null]], "ROST": [["2009-12-18", null]], "TECH": [["2021-08-30", null]], "UDR": [["2016-03-07", null]], "ROP": [[null, null]], "MO": [[null, null]], "IVZ": [[null, null]], "GDDY": [["2024-06-24", null]], "TPR": [[null, null]], "TRGP": [["2022-10-12", null]], "DVN": [[null, null]], "PFE": [[null, null]], "MCHP": [[null, null]], "FICO": [["2023-03-20", null]], "TXN": [[null, null]], "CTVA": [["2019-06-03", null]], "DECK": [["2024-03-18", null]], "PCG": [[null, "2019-01-18"], ["2022-10-03", null]], "AKAM": [[null, null]], "ATO": [["2019-02-15", null]], "REG": [["2017-03-02", null]], "APP": [["2025-09-22", null]], "CMCSA": [[null, null]], "INTU": [["2000-12-05", null]], "ADI": [[null, null]], "SLB": [[null, null]], "BXP": [[null, null]], "SYF": [["2015-11-18", null]], "GIS": [[null, null]], "KDP": [["2022-06-21", null]], "STE": [["2019-12-23", null]], "ODFL": [["2019-12-09", null]], "AEP": [[null, null]], "DDOG": [["2025-07-09", null]], "PLD": [[null, null]], "HII": [["2018-01-03", null]], "GILD": [[null, null]], "WMB": [[null, null]], "D": [[null, null]], "SBAC": [["2017-09-01", null]], "OMC": [[null, null]], "ETR": [[null, null]], "LRCX": [["2012-06-05", null]], "EG": [[null, null]], "HBAN": [[null, null]], "OXY": [[null, null]], "F": [[null, null]], "UNH": [[null, null]], "DE": [[null, null]], "TPL": [["2024-11-26", null]], "MET": [[null, null]], "SO": [[null, null]], "XYL": [["2011-10-31", null]], "AWK": [["2016-03-04", null]], "BA": [[null, null]], "TRMB": [["2021-01-21", null]], "KMI": [["2012-05-17", null]], "MDLZ": [["2012-10-02", null]], "RL": [[null, null]], "MRK": [[null, null]], "LYB": [["2012-09-05", null]], "HPQ": [[null, null]], "HUM": [[null, null]], "MCK": [[null, null]], "APO": [["2024-12-23", null]], "WRB": [["2019-12-05", null]], "DHR": [[null, null]], "DIS": [["1976-07-01", null]], "NVDA": [[null, null]], "EIX": [[null, null]], "ALGN": [["2017-06-19", null]], "LMT": [[null, null]], "MPWR": [["2021-02-12", null]], "ICE": [["2007-09-26", null]], "CI": [[null, null]], "PGR": [[null, null]], "ZBRA": [["2019-12-23", null]], "ORCL": [[null, null]], "NCLH": [["2017-10-13", null]], "GRMN": [["2012-12-11", null]], "LII": [["2024-12-23", null]], "CCL": [["1998-12-11", null]], "RVTY": [[null, null]], "MNST": [["2012-06-29", null]], "ITW": [[null, null]], "VZ": [[null, null]], "CHRW": [[null, null]], "NFLX": [["2010-12-17", null]], "APD": [[null, null]], "IR": [["2020-03-02", null]], "MTD": [["2016-09-06", null]], "PANW": [["2023-06-20", null]], "NVR": [["2019-09-26", null]], "EQIX": [["2015-03-23", null]], "JBL": [[null, "2014-11-05"], ["2023-12-18", null]], "RTX": [[null, null]], "SJM": [[null, null]], "EPAM": [["2021-12-14", null]], "ZTS": [["2013-06-21", null]], "APH": [[null, null]], "EBAY": [[null, null]], "AMT": [[null, null]], "FTV": [["2016-07-05", null]], "GE": [[null, null]], "RSG": [[null, null]], "CMI": [[null, null]], "KEYS": [["2018-11-06", null]], "AFL": [[null, null]], "CME": [[null, null]], "AXP": [[null, null]], "FTNT": [["2018-10-11", null]], "AAPL": [[null, null]], "CAG": [[null, null]], "FAST": [["2008-09-12", null]], "HON": [[null, null]], "VRTX": [["2013-09-20", null]], "MA": [[null, null]], "CVX": [[null, null]], "ALL": [[null, null]], "POOL": [["2020-10-07", null]], "TYL": [["2020-06-22", null]], "LULU": [["2023-10-18", null]], "PNC": [[null, null]], "CVNA": [["2025-12-22", null]], "KHC": [["2015-07-06", null]], "SCHW": [[null, null]], "HAL": [[null, null]], "ESS": [["2014-04-02", null]], "CNP": [[null, null]], "MLM": [["2014-07-02", null]], "TRV": [[null, null]], "DUK": [[null, null]], "KVUE": [["2023-08-25", null]], "PWR": [[null, null]], "GWW": [[null, null]], "CASY": [["2026-04-09", null]], "SYK": [[null, null]], "WDAY": [["2024-12-23", null]], "BK": [[null, null]], "EOG": [[null, null]], "MSI": [[null, null]], "BLDR": [["2023-12-18", null]], "USB": [[null, null]], "CFG": [["2016-02-01", null]], "CRL": [["2021-05-14", null]], "FIS": [[null, null]], "ON": [["2022-06-21", null]], "GOOGL": [["2014-04-03", null]], "INVH": [["2022-09-19", null]], "CARR": [["2020-04-03", null]], "BLK": [["2011-04-01", null]], "VTRS": [[null, null]], "LUV": [[null, null]], "L": [[null, null]], "HOLX": [["2016-03-30", "2026-04-09"]], "LW": [["2018-12-03", "2026-03-23"]], "PAYC": [["2020-01-28", "2026-03-23"]], "MTCH": [["2021-09-20", "2026-03-23"]], "MOH": [["2022-03-02", "2026-03-23"]], "DAY": [[null, "2026-02-09"]], "LKQ": [["2016-05-23", "2025-12-22"]], "MHK": [["2013-12-23", "2025-12-22"]], "SOLS": [["2025-10-30", "2025-12-22"]], "K": [[null, "2025-12-11"]], "IPG": [[null, "2025-11-28"]], "EMN": [[null, "2025-11-04"]], "KMX": [["2010-06-28", "2025-10-31"]], "CZR": [["2021-03-22", "2025-09-22"]], "MKTX": [["2019-07-01", "2025-09-22"]], "ENPH": [["2021-01-07", "2025-09-22"]], "WBA": [[null, "2025-08-28"]], "HES": [[null, "2025-07-23"]], "ANSS": [["2017-06-19", "2025-07-18"]], "JNPR": [["2006-06-02", "2025-07-09"]], "DFS": [["2007-07-02", "2025-05-19"]], "BWA": [["2011-12-16", "2025-03-24"]], "TFX": [["2019-01-18", "2025-03-24"]], "CE": [["2018-12-24", "2025-03-24"]], "FMC": [["2009-08-19", "2025-03-24"]], "CTLT": [["2020-09-21", "2024-12-23"]], "AMTM": [["2024-09-30", "2024-12-23"]], "QRVO": [["2015-06-11", "2024-12-23"]], "MRO": [[null, "2024-11-26"]], "BBWI": [[null, "2024-10-01"]], "AAL": [["2015-03-23", "2024-09-23"]], "ETSY": [["2020-09-21", "2024-09-23"]], "BIO": [["2020-06-22", "2024-09-23"]], "RHI": [[null, "2024-06-24"]], "CMA": [[null, "2024-06-24"]], "ILMN": [["2015-11-19", "2024-06-24"]], "PXD": [[null, "2024-05-08"]], "XRAY": [[null, "2024-04-03"]], "VFC": [[null, "2024-04-03"]], "WHR": [[null, "2024-03-18"]], "ZION": [[null, "2024-03-18"]], "SEE": [[null, "2023-12-18"]], "ALK": [["2016-05-13", "2023-12-18"]], "SEDG": [["2021-12-20", "2023-12-18"]], "ATVI": [["2015-08-28", "2023-10-18"]], "OGN": [["2021-06-03", "2023-10-18"]], "DXC": [["2017-04-04", "2023-10-03"]], "LNC": [[null, "2023-09-18"]], "NWL": [[null, "2023-09-18"]], "AAP": [["2015-07-08", "2023-08-25"]], "DISH": [["2017-03-13", "2023-06-20"]], "FRC": [["2019-01-02", "2023-05-04"]], "LUMN": [[null, "2023-03-20"]], "SBNY": [["2021-12-20", "2023-03-15"]], "SIVB": [["2018-03-19", "2023-03-15"]], "VNO": [[null, "2023-01-05"]], "ABMD": [["2018-05-31", "2022-12-22"]], "FBHS": [["2016-06-22", "2022-12-19"]], "MBC": [["2022-12-15", "2022-12-19"]], "TWTR": [["2018-06-07", "2022-11-01"]], "NLSN": [["2013-07-08", "2022-10-12"]], "CTXS": [[null, "2022-10-03"]], "DRE": [["2017-07-26", "2022-10-03"]], "PVH": [["2013-02-15", "2022-09-19"]], "PENN": [["2021-03-22", "2022-09-19"]], "UAA": [[null, "2022-06-21"]], "IPGP": [["2018-03-07", "2022-06-21"]], "UA": [["2016-04-08", "2022-06-21"]], "CERN": [["2010-04-29", "2022-06-08"]], "DISCA": [[null, "2022-04-11"]], "DISCK": [["2014-08-06", "2022-04-11"]], "PBCT": [[null, "2022-04-04"]], "INFO": [["2017-06-02", "2022-03-02"]], "XLNX": [[null, "2022-02-15"]], "GPS": [[null, "2022-02-03"]], "LEG": [[null, "2021-12-20"]], "HBI": [["2015-03-23", "2021-12-20"]], "WU": [[null, "2021-12-20"]], "KSU": [["2013-05-23", "2021-12-14"]], "PRGO": [["2011-12-16", "2021-09-20"]], "UNM": [[null, "2021-09-20"]], "NOV": [[null, "2021-09-20"]], "MXIM": [[null, "2007-09-27"], ["2018-12-03", "2021-08-30"]], "ALXN": [["2012-05-21", "2021-07-21"]], "HFC": [["2018-06-18", "2021-06-04"]], "FLIR": [[null, "2021-05-14"]], "VAR": [[null, "2021-04-20"]], "VNT": [["2020-10-09", "2021-03-22"]], "XRX": [[null, "2021-03-22"]], "SLG": [["2015-03-23", "2021-03-22"]], "FLS": [[null, "2021-03-22"]], "FTI": [["2009-06-05", "2021-02-12"]], "CXO": [["2016-02-22", "2021-01-21"]], "TIF": [[null, "2021-01-07"]], "AIV": [[null, "2020-12-21"]], "NBL": [[null, "2020-10-12"]], "ETFC": [[null, "2020-10-07"]], "HRB": [[null, "2020-09-21"]], "COTY": [["2016-09-30", "2020-09-21"]], "KSS": [[null, "2020-09-21"]], "JWN": [[null, "2020-06-22"]], "ADS": [["2013-12-23", "2020-06-22"]], "HOG": [[null, "2020-06-22"]], "HP": [["2010-02-26", "2020-05-22"]], "CPRI": [[null, "2020-05-12"]], "AGN": [[null, "2020-05-12"]], "M": [[null, "2020-04-06"]], "RTN": [[null, "2020-04-06"]], "ARNC": [["2016-11-01", "2020-04-01"]], "XEC": [["2014-06-20", "2020-03-02"]], "WCG": [["2018-09-14", "2020-01-28"]], "MAC": [["2013-05-08", "2019-12-23"]], "AMG": [["2014-07-01", "2019-12-23"]], "TRIP": [["2011-12-20", "2019-12-23"]], "STI": [[null, "2019-12-09"]], "VIAB": [[null, "2019-12-05"]], "CELG": [[null, "2019-11-21"]], "NKTR": [["2018-03-19", "2019-10-03"]], "JEF": [[null, "2019-09-26"]], "TSS": [[null, "2019-09-23"]], "APC": [[null, "2019-08-09"]], "FL": [["2016-04-04", "2019-08-09"]], "RHT": [[null, "2019-07-15"]], "LLL": [[null, "2019-07-01"]], "BMS": [[null, "2014-12-05"], ["2019-06-07", "2019-06-11"]], "MAT": [[null, "2019-06-07"]], "DWDP": [["2017-09-01", "2019-06-03"]], "FLR": [[null, "2019-06-03"]], "BHF": [["2017-08-08", "2019-04-02"]], "GT": [[null, "2019-02-27"]], "NFX": [["2010-12-17", "2019-02-15"]], "SCG": [[null, "2019-01-02"]], "ESRX": [["2003-09-25", "2018-12-24"]], "COL": [[null, "2018-12-03"]], "SRCL": [[null, "2018-12-03"]], "AET": [[null, "2018-12-03"]], "CA": [[null, "2018-11-06"]], "EVHC": [["2016-12-02", "2018-10-11"]], "ANDV": [[null, "2018-10-01"]], "XL": [[null, "2018-09-14"]], "GGP": [["2013-12-10", "2018-08-28"]], "DPS": [[null, "2018-07-02"]], "TWX": [[null, "2018-06-20"]], "AYI": [["2016-05-03", "2018-06-18"]], "RRC": [["2007-12-20", "2018-06-18"]], "MON": [[null, "2018-06-07"]], "NAVI": [["2014-05-01", "2018-06-05"]], "WYN": [[null, "2018-05-31"]], "CSRA": [["2015-12-01", "2018-04-04"]], "SIG": [["2015-07-29", "2018-03-19"]], "PDCO": [[null, "2018-03-19"]], "CHK": [[null, "2018-03-19"]], "SNI": [[null, "2018-03-07"]], "BCR": [[null, "2018-01-03"]], "LVLT": [["2014-11-05", "2017-10-13"]], "SPLS": [[null, "2017-09-18"]], "WFM": [[null, "2017-08-29"]], "AN": [[null, "2017-08-08"]], "RIG": [["2013-10-29", "2017-07-26"]], "BBBY": [[null, "2017-07-26"]], "MUR": [[null, "2017-07-26"]], "MNK": [["2014-08-18", "2017-07-26"]], "RAI": [[null, "2017-07-26"]], "BHI": [[null, "2017-07-07"]], "R": [[null, "2017-06-19"]], "MJN": [["2009-12-18", "2017-06-19"]], "TDC": [["2007-10-01", "2017-06-19"]], "YHOO": [["1999-12-08", "2017-06-19"]], "TGNA": [[null, "2017-06-02"]], "DNB": [[null, "2017-04-05"]], "SWN": [[null, "2017-04-04"]], "URBN": [[null, "2017-03-20"]], "FTR": [[null, "2017-03-20"]], "HAR": [[null, "2017-03-16"]], "LLTC": [[null, "2017-03-13"]], "ENDP": [["2015-01-27", "2017-03-02"]], "PBI": [[null, "2017-03-01"]], "SE": [[null, "2017-02-28"]], "STJ": [[null, "2017-01-05"]], "OI": [[null, "2000-12-05"], ["2008-12-31", "2016-12-02"]], "LM": [[null, "2016-12-02"]], "AA": [[null, "2016-11-01"]], "DO": [[null, "2016-09-30"]], "HOT": [[null, "2016-09-22"]], "EMC": [[null, "2016-09-08"]], "TYC": [["2010-08-26", "2016-09-06"]], "CPGX": [["2015-07-02", "2016-07-05"]], "GAS": [[null, "2011-12-12"], ["2011-12-12", "2016-07-01"]], "TE": [[null, "2016-07-01"]], "CVC": [["2010-12-17", "2016-06-22"]], "BXLT": [["2015-07-01", "2016-06-03"]], "CCE": [[null, "2016-05-31"]], "ARG": [["2009-09-28", "2016-05-23"]], "TWC": [[null, "2016-05-18"]], "ADT": [["2012-10-01", "2016-05-03"]], "GME": [["2007-12-13", "2016-04-25"]], "THC": [[null, "2016-04-18"]], "CAM": [[null, "2016-04-04"]], "POM": [[null, "2016-03-30"]], "ESV": [["2012-07-31", "2016-03-30"]], "GMCR": [["2014-03-21", "2016-03-07"]], "CNX": [[null, "2016-03-04"]], "PCL": [[null, "2016-02-22"]], "PCP": [[null, "2016-02-01"]], "BRCM": [[null, "2016-02-01"]], "ACE": [[null, "2016-01-19"]], "FOSL": [["2012-04-03", "2016-01-05"]], "ALTR": [[null, "2015-12-29"]], "CMCSK": [["2015-09-18", "2015-12-15"]], "CSC": [[null, "2015-12-01"]], "SIAL": [[null, "2015-11-19"]], "GNW": [[null, "2015-11-18"]], "HCBK": [[null, "2015-11-02"]], "JOY": [[null, "2015-10-07"]], "HSP": [[null, "2015-09-02"]], "PLL": [[null, "2015-08-28"]], "DTV": [[null, "2015-07-29"]], "NE": [[null, "2015-07-20"]], "FDO": [[null, "2015-07-08"]], "KRFT": [["2012-10-02", "2015-07-06"]], "ATI": [[null, "2015-07-02"]], "QEP": [["2010-06-30", "2015-07-01"]], "TEG": [[null, "2015-07-01"]], "LO": [["2008-06-10", "2015-06-11"]], "WIN": [[null, "2015-04-07"]], "DNR": [[null, "2015-03-23"]], "NBR": [[null, "2015-03-23"]], "AVP": [[null, "2015-03-23"]], "CFN": [[null, "2015-03-18"]], "PETM": [["2012-10-10", "2015-03-12"]], "SWY": [[null, "2015-01-27"]], "COV": [[null, "2009-06-05"], ["2011-02-28", "2015-01-27"]], "BTU": [[null, "2014-09-20"]], "GHC": [[null, "2014-09-20"]], "RDC": [[null, "2014-08-18"]], "X": [[null, "2014-07-02"]], "FRX": [[null, "2014-07-01"]], "IGT": [[null, "2014-06-20"]], "LSI": [[null, "2014-05-08"]], "BEAM": [[null, "2014-05-01"]], "SLM": [[null, "2014-05-01"]], "CLF": [["2009-12-18", "2014-04-02"]], "WPX": [["2011-12-31", "2014-03-21"]], "LIFE": [[null, "2014-01-24"]], "ANF": [[null, "2013-12-23"]], "JDSU": [["2000-07-27", "2013-12-23"]], "MOLX": [[null, "2013-12-10"]], "JCP": [[null, "2013-12-02"]], "NYX": [[null, "2013-11-13"]], "SAI": [["2009-12-18", "2013-09-20"]], "BMC": [[null, "2013-09-10"]], "S": [[null, "2013-07-08"]], "APOL": [[null, "2013-07-01"]], "FHN": [[null, "2013-06-21"]], "HNZ": [[null, "2013-06-06"]], "DF": [[null, "2013-05-23"]], "CVH": [[null, "2013-05-08"]], "PCS": [["2009-06-29", "2013-04-30"]], "BIG": [[null, "2013-02-15"]], "FII": [[null, "2013-01-02"]], "TIE": [[null, "2012-12-21"]], "RRD": [[null, "2012-12-11"]], "CBE": [[null, "2009-09-28"], ["2011-11-18", "2012-12-03"]], "SUN": [[null, "2012-10-10"]], "ANR": [["2011-06-01", "2012-10-02"]], "KFT": [["2007-03-30", "2012-10-02"]], "LXK": [[null, "2012-10-01"]], "DV": [[null, "2012-10-01"]], "SHLD": [[null, "2012-09-05"]], "GR": [[null, "2012-07-31"]], "PGN": [[null, "2012-07-02"]], "SLE": [[null, "2012-06-29"]], "NVLS": [[null, "2012-06-05"]], "MMI": [["2011-01-03", "2012-05-21"]], "EP": [[null, "2012-05-17"]], "SVU": [[null, "2012-04-23"]], "MHS": [[null, "2012-04-03"]], "CPWR": [["1998-12-11", "2011-12-31"]], "TLAB": [[null, "2011-12-20"]], "AKS": [["2008-07-01", "2011-12-16"]], "MWW": [[null, "2011-12-16"]], "WFR": [[null, "2011-12-16"]], "JNS": [[null, "2011-11-18"]], "ITT": [[null, "2011-10-31"]], "CEPH": [[null, "2011-10-14"]], "NSM": [[null, "2011-09-23"]], "MI": [[null, "2011-07-05"]], "RSH": [[null, "2011-06-30"]], "MEE": [["2008-06-23", "2011-06-01"]], "NOVL": [[null, "2011-04-27"]], "GENZ": [[null, "2011-04-01"]], "MFE": [[null, "2011-02-28"]], "AYE": [[null, "1976-07-01"], ["2000-12-05", "2011-02-25"]], "MDP": [[null, "2011-01-03"]], "NYT": [[null, "2010-12-17"]], "ODP": [[null, "2010-12-17"]], "EK": [[null, "2010-12-17"]], "KG": [[null, "2010-12-17"]], "PTV": [[null, "2010-11-17"]], "SII": [[null, "2010-08-26"]], "MIL": [[null, "2010-07-14"]], "STR": [[null, "2010-06-30"]], "XTO": [[null, "2010-06-28"]], "BJS": [[null, "2010-04-29"]], "RX": [[null, "2010-02-26"]], "KBH": [[null, "2009-12-18"]], "CVG": [["2000-06-12", "2009-12-18"]], "MBI": [[null, "2009-12-18"]], "DYN": [[null, "2009-12-18"]], "SGP": [[null, "2009-11-03"]], "CTX": [[null, "2009-08-19"]], "ACAS": [[null, "2009-03-03"]], "JNY": [[null, "2009-03-03"]], "WB": [[null, "2008-12-31"]], "LEH": [[null, "2008-09-16"]], "FNM": [[null, "2008-09-12"]], "FRE": [[null, "2008-09-12"]], "CFC": [[null, "2008-07-01"]], "BC": [[null, "2008-06-23"]], "OMX": [[null, "2008-06-23"]], "ABK": [["2000-12-05", "2008-06-10"]], "TRB": [[null, "2007-12-20"]], "DJ": [[null, "2007-12-13"]], "AV": [[null, "2007-10-26"]], "SLR": [[null, "2007-10-02"]], "NCR": [[null, "2007-10-01"]], "FDC": [[null, "2007-09-26"]], "KSE": [[null, "2007-08-24"]], "ADCT": [[null, "2007-07-02"]], "TSG": [[null, "2007-03-30"]], "SBL": [["2000-12-05", "2007-01-10"]], "ABS": [[null, "2006-06-02"]], "GLK": [[null, "2005-07-01"]], "QTRN": [[null, "2003-09-25"]], "BS": [[null, "2000-12-05"]], "GRA": [[null, "2000-12-05"]], "CCK": [[null, "2000-12-05"]], "RAD": [[null, "2000-07-27"]], "TMC": [[null, "2000-06-12"]], "SMS": [[null, "2000-06-07"]], "LDW": [[null, "1999-12-08"]], "HPH": [[null, "1999-06-09"]], "GRN": [[null, "1998-12-11"]], "USL": [[null, "1997-06-17"]], "HNG": [[null, "1976-07-01"]]}} \ No newline at end of file diff --git a/research/__init__.py b/research/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/research/fetch_historical.py b/research/fetch_historical.py new file mode 100644 index 0000000..12c2940 --- /dev/null +++ b/research/fetch_historical.py @@ -0,0 +1,105 @@ +""" +Fetch price history for all tickers that were ever S&P 500 members — including +delisted ones — and save to data/us_pit.csv. This is the foundation for a +survivorship-bias-free backtest. + +NOTE: Yahoo Finance no longer serves price data for many fully-delisted tickers +(bankruptcies, old mergers). Those are silently skipped. The result is still +a major improvement over "today's S&P 500 extrapolated 10 years back", but it +is NOT a perfect point-in-time dataset — only a dataset where the universe +mask is correct at each date. A subset of worst-outcome tickers (e.g., ABK, +ACAS) will be missing entirely. This caveat is documented in the run summary. +""" + +import os +from datetime import datetime, timedelta + +import pandas as pd +import yfinance as yf + +import universe_history as uh + +DATA_DIR = "data" +OUT_PATH = os.path.join(DATA_DIR, "us_pit.csv") +YEARS = 10 +BATCH_SIZE = 50 + + +def fetch_all_historical(force: bool = False) -> pd.DataFrame: + os.makedirs(DATA_DIR, exist_ok=True) + intervals = uh.load_sp500_history() + tickers = uh.all_tickers_ever(intervals) + ["SPY"] + tickers = sorted(set(tickers)) + + existing = None + if os.path.exists(OUT_PATH) and not force: + existing = pd.read_csv(OUT_PATH, index_col=0, parse_dates=True) + missing = [t for t in tickers if t not in existing.columns] + if not missing: + # Just append latest dates + last_date = existing.index[-1] + if (datetime.now() - last_date.to_pydatetime()).days < 2: + print(f"--- us_pit.csv already up to date: {existing.shape} ---") + return existing + tickers = list(existing.columns) + start = (last_date + timedelta(days=1)).strftime("%Y-%m-%d") + print(f"--- Appending new dates from {start} for {len(tickers)} tickers ---") + new = _download_batched(tickers, start=start) + if new is not None and not new.empty: + combined = pd.concat([existing, new]).sort_index() + combined = combined[~combined.index.duplicated(keep="last")] + combined.to_csv(OUT_PATH) + print(f"--- Saved {combined.shape} to {OUT_PATH} ---") + return combined + return existing + else: + print(f"--- Have {existing.shape[1]} cols; need {len(missing)} more ---") + tickers = missing + + start = (datetime.now() - timedelta(days=365 * YEARS)).strftime("%Y-%m-%d") + new = _download_batched(tickers, start=start) + + if existing is not None and new is not None and not new.empty: + combined = pd.concat([existing, new.reindex(existing.index)], axis=1) + # Add any new rows from `new` not in existing + new_only_idx = new.index.difference(existing.index) + if len(new_only_idx) > 0: + combined_new = new.loc[new_only_idx].reindex(columns=combined.columns) + combined = pd.concat([combined, combined_new]).sort_index() + else: + combined = new + + combined.to_csv(OUT_PATH) + print(f"--- Saved {combined.shape} to {OUT_PATH} ---") + return combined + + +def _download_batched(tickers: list[str], start: str) -> pd.DataFrame | None: + frames = [] + n = len(tickers) + for i in range(0, n, BATCH_SIZE): + batch = tickers[i:i + BATCH_SIZE] + print(f" [{i}/{n}] fetching {len(batch)} tickers...", flush=True) + try: + raw = yf.download(batch, start=start, auto_adjust=True, + progress=False, threads=True) + if raw.empty: + continue + if isinstance(raw.columns, pd.MultiIndex): + close = raw["Close"] + else: + close = raw[["Close"]].rename(columns={"Close": batch[0]}) + close = close.dropna(axis=1, how="all") + if not close.empty: + frames.append(close) + except Exception as e: + print(f" batch failed: {e}") + if not frames: + return None + result = pd.concat(frames, axis=1).sort_index() + result = result.loc[:, ~result.columns.duplicated()] + return result + + +if __name__ == "__main__": + fetch_all_historical() diff --git a/research/optimize.py b/research/optimize.py new file mode 100644 index 0000000..ca45a5e --- /dev/null +++ b/research/optimize.py @@ -0,0 +1,299 @@ +""" +End-to-end optimization study for the US recovery+momentum strategy family, +run on a point-in-time (survivorship-bias-mitigated) S&P 500 universe. + +Experiments: + E1 — Baseline drift: biased vs point-in-time universe, current top10 params. + E2 — Hyperparameter sweep with 2016-2022 train / 2023-2026 test split. + E3 — SPY MA200 regime filter (compare base vs filtered). + E4 — Weighting schemes: equal vs inverse-vol vs rank. + E5 — Ensemble of top-3 uncorrelated configs. + +Usage: uv run python -m research.optimize +""" + +import os + +import numpy as np +import pandas as pd + +import data_manager +import research.pit_backtest as pit +from research.strategies_plus import (EnsembleStrategy, RecoveryMomentumPlus, + spy_ma200_filter) +from strategies.recovery_momentum import RecoveryMomentumStrategy + +DATA_DIR = "data" +BIASED_CSV = os.path.join(DATA_DIR, "us.csv") + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def slice_period(df: pd.DataFrame, start: str | None, end: str | None) -> pd.DataFrame: + out = df + if start: + out = out[out.index >= start] + if end: + out = out[out.index <= end] + return out + + +def run_strategy(strategy, prices, benchmark=None, regime_filter=None, + fixed_fee: float = 0.0) -> pd.Series: + return pit.backtest( + strategy=strategy, prices=prices, initial_capital=10_000, + transaction_cost=0.001, fixed_fee=fixed_fee, + benchmark=benchmark, regime_filter=regime_filter, + ) + + +# --------------------------------------------------------------------------- +# Experiment 1: bias drift +# --------------------------------------------------------------------------- + +def exp1_bias_drift(pit_prices_masked: pd.DataFrame) -> pd.DataFrame: + print("\n" + "=" * 90) + print("E1 — Biased universe vs Point-in-time universe (recovery_mom_top10)") + print("=" * 90) + rows = [] + + # Biased: current 503 tickers extrapolated backward + biased = pd.read_csv(BIASED_CSV, index_col=0, parse_dates=True) + # Use same date range as PIT for a fair comparison + common_start = max(biased.index[0], pit_prices_masked.index[0]) + common_end = min(biased.index[-1], pit_prices_masked.index[-1]) + biased_window = slice_period(biased, str(common_start.date()), str(common_end.date())) + pit_window = slice_period(pit_prices_masked, str(common_start.date()), str(common_end.date())) + + # Drop non-ticker columns (SPY is in PIT but not in the masked tickers) + biased_tickers = [c for c in biased_window.columns if c != "SPY"] + pit_tickers = [c for c in pit_window.columns if c != "SPY"] + + # Use RecoveryMomentumPlus with identical defaults to recovery_mom_top10. + # The original strategy uses na_option="bottom" which misranks NaN-masked + # data (non-members appear "top"); the Plus variant uses na_option="keep". + strat = RecoveryMomentumPlus(top_n=10) # defaults match RecoveryMomentumStrategy + eq_biased = run_strategy(strat, biased_window[biased_tickers]) + eq_pit = run_strategy(RecoveryMomentumPlus(top_n=10), pit_window[pit_tickers]) + + rows.append(pit.summarize(eq_biased, name="recovery_mom_top10 (BIASED)")) + rows.append(pit.summarize(eq_pit, name="recovery_mom_top10 (POINT-IN-TIME)")) + # Benchmark: SPY buy-and-hold in same window + if "SPY" in biased_window.columns: + spy_bh = (biased_window["SPY"] / biased_window["SPY"].iloc[0]) * 10_000 + rows.append(pit.summarize(spy_bh, name="SPY buy-and-hold")) + + for r in rows: + print(pit.fmt_row(r)) + return pd.DataFrame(rows) + + +# --------------------------------------------------------------------------- +# Experiment 2: hyperparameter sweep with train/test split +# --------------------------------------------------------------------------- + +def exp2_sweep(pit_masked: pd.DataFrame) -> pd.DataFrame: + print("\n" + "=" * 90) + print("E2 — Hyperparameter sweep (train: 2016-2022, test: 2023-2026)") + print("=" * 90) + tickers = [c for c in pit_masked.columns if c != "SPY"] + prices = pit_masked[tickers] + + train = slice_period(prices, "2016-04-01", "2022-12-31") + test = slice_period(prices, "2023-01-01", None) + + grid = [] + for top_n in [5, 8, 10, 15]: + for rec_win in [42, 63, 126]: + for rec_w in [0.3, 0.5, 0.7]: + for rebal in [10, 21]: + grid.append(dict(top_n=top_n, recovery_window=rec_win, + rec_weight=rec_w, rebal_freq=rebal)) + + results = [] + for i, cfg in enumerate(grid): + strat_train = RecoveryMomentumPlus(**cfg) + eq_tr = run_strategy(strat_train, train) + sum_tr = pit.summarize(eq_tr, name="train") + + strat_test = RecoveryMomentumPlus(**cfg) + eq_te = run_strategy(strat_test, test) + sum_te = pit.summarize(eq_te, name="test") + + results.append({ + **cfg, + "train_CAGR": sum_tr["CAGR"], + "train_Sharpe": sum_tr["Sharpe"], + "train_MaxDD": sum_tr["MaxDD"], + "test_CAGR": sum_te["CAGR"], + "test_Sharpe": sum_te["Sharpe"], + "test_MaxDD": sum_te["MaxDD"], + "test_Calmar": sum_te["Calmar"], + }) + if (i + 1) % 10 == 0 or i == len(grid) - 1: + print(f" ... {i+1}/{len(grid)} configs evaluated") + + df = pd.DataFrame(results) + df = df.sort_values("test_Sharpe", ascending=False) + + # Print top 10 by TEST Sharpe, then top 10 by TRAIN Sharpe to see overfit gap + print("\n --- Top 10 by TEST Sharpe (out-of-sample, 2023-2026) ---") + disp_cols = ["top_n", "recovery_window", "rec_weight", "rebal_freq", + "train_Sharpe", "test_Sharpe", "train_CAGR", "test_CAGR", + "test_MaxDD", "test_Calmar"] + print(df.head(10)[disp_cols].to_string(index=False, + formatters={"train_Sharpe": "{:.2f}".format, "test_Sharpe": "{:.2f}".format, + "train_CAGR": "{:.1%}".format, "test_CAGR": "{:.1%}".format, + "test_MaxDD": "{:.1%}".format, "test_Calmar": "{:.2f}".format})) + + print("\n --- Top 10 by TRAIN Sharpe (for comparison / overfit check) ---") + df_tr = df.sort_values("train_Sharpe", ascending=False) + print(df_tr.head(10)[disp_cols].to_string(index=False, + formatters={"train_Sharpe": "{:.2f}".format, "test_Sharpe": "{:.2f}".format, + "train_CAGR": "{:.1%}".format, "test_CAGR": "{:.1%}".format, + "test_MaxDD": "{:.1%}".format, "test_Calmar": "{:.2f}".format})) + + return df + + +# --------------------------------------------------------------------------- +# Experiment 3: regime filter +# --------------------------------------------------------------------------- + +def exp3_regime(pit_masked: pd.DataFrame) -> pd.DataFrame: + print("\n" + "=" * 90) + print("E3 — SPY MA200 regime filter (out-of-sample 2023-2026)") + print("=" * 90) + tickers = [c for c in pit_masked.columns if c != "SPY"] + # Compute MA from FULL history so the filter is warmed up before 2023. + spy_full = pit_masked["SPY"].dropna() if "SPY" in pit_masked.columns else None + filt_full_200 = spy_ma200_filter(spy_full, ma_window=200) if spy_full is not None else None + filt_full_150 = spy_ma200_filter(spy_full, ma_window=150) if spy_full is not None else None + + test = slice_period(pit_masked, "2023-01-01", None) + prices = test[tickers] + filt = filt_full_200.reindex(test.index).fillna(False).astype(bool) if filt_full_200 is not None else None + filt_150 = filt_full_150.reindex(test.index).fillna(False).astype(bool) if filt_full_150 is not None else None + rows = [] + base = RecoveryMomentumPlus(top_n=10) + rows.append(pit.summarize(run_strategy(base, prices), name="top10 (no filter)")) + rows.append(pit.summarize(run_strategy(RecoveryMomentumPlus(top_n=10), prices, + regime_filter=filt), + name="top10 + SPY>MA200 filter")) + rows.append(pit.summarize(run_strategy(RecoveryMomentumPlus(top_n=10), prices, + regime_filter=filt_150), + name="top10 + SPY>MA150 filter")) + + for r in rows: + print(pit.fmt_row(r)) + return pd.DataFrame(rows) + + +# --------------------------------------------------------------------------- +# Experiment 4: weighting schemes +# --------------------------------------------------------------------------- + +def exp4_weighting(pit_masked: pd.DataFrame) -> pd.DataFrame: + print("\n" + "=" * 90) + print("E4 — Weighting schemes (out-of-sample 2023-2026, top_n=10)") + print("=" * 90) + tickers = [c for c in pit_masked.columns if c != "SPY"] + test = slice_period(pit_masked[tickers], "2023-01-01", None) + + rows = [] + for w in ["equal", "inv_vol", "rank"]: + strat = RecoveryMomentumPlus(top_n=10, weighting=w) + eq = run_strategy(strat, test) + rows.append(pit.summarize(eq, name=f"top10 weighting={w}")) + for r in rows: + print(pit.fmt_row(r)) + return pd.DataFrame(rows) + + +# --------------------------------------------------------------------------- +# Experiment 5: ensemble +# --------------------------------------------------------------------------- + +def exp5_ensemble(pit_masked: pd.DataFrame, sweep_df: pd.DataFrame) -> pd.DataFrame: + print("\n" + "=" * 90) + print("E5 — Ensemble of 3 uncorrelated top configs (out-of-sample 2023-2026)") + print("=" * 90) + tickers = [c for c in pit_masked.columns if c != "SPY"] + test = slice_period(pit_masked[tickers], "2023-01-01", None) + + # Pick top-20 by test_Sharpe, then greedily keep picks whose equity curves + # correlate < 0.9 with already-kept picks. + top20 = sweep_df.sort_values("test_Sharpe", ascending=False).head(20) + curves = [] + components = [] + for _, row in top20.iterrows(): + cfg = dict(top_n=int(row["top_n"]), + recovery_window=int(row["recovery_window"]), + rec_weight=float(row["rec_weight"]), + rebal_freq=int(row["rebal_freq"])) + strat = RecoveryMomentumPlus(**cfg) + eq = run_strategy(strat, test) + if any(eq.pct_change().corr(c.pct_change()) > 0.9 for c in curves): + continue + curves.append(eq) + components.append((RecoveryMomentumPlus(**cfg), 1.0)) + if len(components) >= 3: + break + + print(f" Selected {len(components)} uncorrelated configs for ensemble:") + for strat, _ in components: + print(f" top_n={strat.top_n}, rec_win={strat.recovery_window}, " + f"rec_w={strat.rec_weight}, rebal={strat.rebal_freq}") + + ens = EnsembleStrategy(components) + eq_ens = run_strategy(ens, test) + + rows = [ + pit.summarize(curves[i], name=f" component {i+1}") for i in range(len(curves)) + ] + rows.append(pit.summarize(eq_ens, name="ENSEMBLE (equal-weight)")) + # Also ensemble + regime filter (compute MA from full history) + if "SPY" in pit_masked.columns: + spy_full = pit_masked["SPY"].dropna() + filt = spy_ma200_filter(spy_full).reindex(test.index).fillna(False).astype(bool) + eq_ens_reg = run_strategy(EnsembleStrategy(components), test, regime_filter=filt) + rows.append(pit.summarize(eq_ens_reg, name="ENSEMBLE + SPY>MA200 filter")) + + for r in rows: + print(pit.fmt_row(r)) + return pd.DataFrame(rows) + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +def main(): + print("Loading point-in-time price data...") + raw = pit.load_pit_prices() + print(f" Raw (union) shape: {raw.shape}, {raw.index[0].date()} → {raw.index[-1].date()}") + + masked = pit.pit_universe(raw) + # Sanity: how many ticker-days are masked out? + total = masked.size + valid = masked.notna().sum().sum() + print(f" Point-in-time valid ticker-days: {valid:,} / {total:,} ({valid/total*100:.1f}%)") + daily_universe = masked.notna().sum(axis=1) + print(f" Universe size per day: min={daily_universe.min()}, median={int(daily_universe.median())}, max={daily_universe.max()}") + + e1 = exp1_bias_drift(masked) + sweep = exp2_sweep(masked) + e3 = exp3_regime(masked) + e4 = exp4_weighting(masked) + e5 = exp5_ensemble(masked, sweep) + + # Save sweep for inspection + out = os.path.join(DATA_DIR, "research_sweep.csv") + sweep.to_csv(out, index=False) + print(f"\n Full sweep saved to {out}") + + +if __name__ == "__main__": + main() diff --git a/research/pit_backtest.py b/research/pit_backtest.py new file mode 100644 index 0000000..f27f6fa --- /dev/null +++ b/research/pit_backtest.py @@ -0,0 +1,125 @@ +""" +Point-in-time backtest runner. + +Key idea: mask price data to NaN outside S&P 500 membership windows before +passing to the strategy. The strategy's signal computations then naturally +exclude non-members — no refactoring of strategies required. + +Caveat: a stock joining the index has no signal for ~252 days after joining +(rolling windows need non-NaN warm-up). This is conservative but unbiased. +""" + +import os + +import numpy as np +import pandas as pd + +import metrics +import universe_history as uh + +DATA_DIR = "data" +PIT_CSV = os.path.join(DATA_DIR, "us_pit.csv") + + +# --------------------------------------------------------------------------- +# Data loading +# --------------------------------------------------------------------------- + +def load_pit_prices() -> pd.DataFrame: + """Load the full historical S&P 500 price matrix (delisted included).""" + if not os.path.exists(PIT_CSV): + raise FileNotFoundError( + f"{PIT_CSV} not found. Run `uv run python -m research.fetch_historical` first." + ) + df = pd.read_csv(PIT_CSV, index_col=0, parse_dates=True) + return df.sort_index() + + +def pit_universe(prices: pd.DataFrame) -> pd.DataFrame: + """Return prices masked to S&P 500 membership at each date (NaN outside).""" + intervals = uh.load_sp500_history() + return uh.mask_prices(prices, intervals) + + +# --------------------------------------------------------------------------- +# Backtest engine (mirrors main.backtest but accepts masked prices) +# --------------------------------------------------------------------------- + +def backtest( + strategy, + prices: pd.DataFrame, + initial_capital: float = 10_000, + transaction_cost: float = 0.001, + fixed_fee: float = 0.0, + benchmark: pd.Series | None = None, + regime_filter: pd.Series | None = None, +) -> pd.Series: + """ + Vectorized backtest with optional regime filter. + + `regime_filter`: boolean series aligned to prices.index. True → be in the + market (use strategy weights). False → go to cash. When None, always invested. + """ + weights = strategy.generate_signals(prices) + weights = weights.reindex(prices.index).fillna(0.0) + + if regime_filter is not None: + rf = regime_filter.reindex(prices.index).fillna(False).astype(float) + weights = weights.mul(rf, axis=0) + + daily_returns = prices.pct_change().fillna(0.0) + portfolio_returns = (daily_returns * weights).sum(axis=1) + + turnover = weights.diff().abs().sum(axis=1).fillna(0.0) + portfolio_returns -= turnover * transaction_cost + + if fixed_fee > 0: + weight_changes = weights.diff().fillna(0.0) + n_trades = (weight_changes.abs() > 1e-8).sum(axis=1) + equity_running = (1 + portfolio_returns).cumprod() * initial_capital + fee_impact = (n_trades * fixed_fee) / equity_running.shift(1).fillna(initial_capital) + portfolio_returns -= fee_impact + + equity = (1 + portfolio_returns).cumprod() * initial_capital + return equity + + +# --------------------------------------------------------------------------- +# Metrics helper +# --------------------------------------------------------------------------- + +def summarize(equity: pd.Series, name: str = "") -> dict: + """Return a dict of key performance metrics (no printing).""" + eq = equity.dropna() + if len(eq) < 2: + return {"name": name, "error": "insufficient data"} + daily = eq.pct_change().dropna() + total_return = eq.iloc[-1] / eq.iloc[0] - 1 + years = (eq.index[-1] - eq.index[0]).days / 365.25 + cagr = (eq.iloc[-1] / eq.iloc[0]) ** (1 / years) - 1 if years > 0 else 0.0 + vol = daily.std() * np.sqrt(252) + sharpe = (daily.mean() * 252) / vol if vol > 0 else 0.0 + downside = daily[daily < 0].std() * np.sqrt(252) + sortino = (daily.mean() * 252) / downside if downside > 0 else 0.0 + dd = (eq / eq.cummax() - 1).min() + calmar = cagr / abs(dd) if dd < 0 else 0.0 + return { + "name": name, + "CAGR": cagr, + "Sharpe": sharpe, + "Sortino": sortino, + "MaxDD": dd, + "Calmar": calmar, + "TotalRet": total_return, + "Vol": vol, + } + + +def fmt_row(r: dict) -> str: + return (f" {r['name']:<38s} " + f"CAGR={r['CAGR']*100:>6.1f}% " + f"Sharpe={r['Sharpe']:>5.2f} " + f"Sortino={r['Sortino']:>5.2f} " + f"MaxDD={r['MaxDD']*100:>6.1f}% " + f"Calmar={r['Calmar']:>5.2f} " + f"Total={r['TotalRet']*100:>7.1f}%") diff --git a/research/strategies_plus.py b/research/strategies_plus.py new file mode 100644 index 0000000..d604819 --- /dev/null +++ b/research/strategies_plus.py @@ -0,0 +1,150 @@ +""" +Optimization variants of RecoveryMomentumStrategy. + +Four dimensions explored: + 1. Hyperparameters (top_n, recovery_window, mom_lookback, rebal_freq, weights) + 2. Regime filter: zero-out weights when SPY < MA200 + 3. Weighting scheme: equal / inverse-vol / rank-weighted + 4. Ensemble: weighted blend of multiple strategies + +All strategies follow the same Strategy protocol (generate_signals → weights DF). +""" + +import numpy as np +import pandas as pd + +from strategies.base import Strategy + + +# --------------------------------------------------------------------------- +# Generalized Recovery+Momentum strategy +# --------------------------------------------------------------------------- + +class RecoveryMomentumPlus(Strategy): + """ + Recovery + momentum composite with configurable blend, weighting, and + regime filter hooks. + + Parameters + ---------- + recovery_window : int + Lookback for the recovery factor (price / rolling min - 1). + mom_lookback : int + Long-horizon momentum window total length. + mom_skip : int + Short-term reversal skip for momentum. + rebal_freq : int + Trading-day rebalance interval. + top_n : int + Number of stocks selected each rebalance. + rec_weight : float in [0, 1] + Weight of recovery factor in composite rank blend (mom_weight = 1 - rec_weight). + weighting : {"equal", "inv_vol", "rank"} + Portfolio weighting scheme for the selected top_n. + vol_window : int + Volatility lookback when weighting="inv_vol". + """ + + def __init__(self, + recovery_window: int = 63, + mom_lookback: int = 252, + mom_skip: int = 21, + rebal_freq: int = 21, + top_n: int = 10, + rec_weight: float = 0.5, + weighting: str = "equal", + vol_window: int = 60): + if weighting not in ("equal", "inv_vol", "rank"): + raise ValueError(f"weighting must be equal|inv_vol|rank, got {weighting!r}") + self.recovery_window = recovery_window + self.mom_lookback = mom_lookback + self.mom_skip = mom_skip + self.rebal_freq = rebal_freq + self.top_n = top_n + self.rec_weight = rec_weight + self.weighting = weighting + self.vol_window = vol_window + + def generate_signals(self, data: pd.DataFrame) -> pd.DataFrame: + # Factors + recovery = data / data.rolling(self.recovery_window).min() - 1 + momentum = data.shift(self.mom_skip).pct_change(self.mom_lookback - self.mom_skip) + + rec_rank = recovery.rank(axis=1, pct=True, na_option="keep") + mom_rank = momentum.rank(axis=1, pct=True, na_option="keep") + composite = self.rec_weight * rec_rank + (1 - self.rec_weight) * mom_rank + + # Top-N selection + rank = composite.rank(axis=1, ascending=False, na_option="bottom") + n_valid = composite.notna().sum(axis=1) + enough = n_valid >= self.top_n + top_mask = (rank <= self.top_n) & enough.values.reshape(-1, 1) + + # Weighting within top-N + if self.weighting == "equal": + raw = top_mask.astype(float) + elif self.weighting == "rank": + # Higher composite → higher weight within top-N + ranked_score = composite.where(top_mask, 0.0) + raw = ranked_score + elif self.weighting == "inv_vol": + # Use inverse realized-volatility as weights within top-N + rets = data.pct_change() + vol = rets.rolling(self.vol_window).std() + inv_vol = 1.0 / vol.replace(0, np.nan) + raw = inv_vol.where(top_mask, 0.0).fillna(0.0) + + row_sums = raw.sum(axis=1).replace(0, np.nan) + signals = raw.div(row_sums, axis=0).fillna(0.0) + + # Rebalance + warmup = max(self.mom_lookback, self.recovery_window, self.vol_window) + rebal_mask = pd.Series(False, index=data.index) + rebal_indices = list(range(warmup, len(data), self.rebal_freq)) + rebal_mask.iloc[rebal_indices] = True + signals[~rebal_mask] = np.nan + signals = signals.ffill().fillna(0.0) + signals.iloc[:warmup] = 0.0 + + return signals.shift(1).fillna(0.0) + + +# --------------------------------------------------------------------------- +# Ensemble +# --------------------------------------------------------------------------- + +class EnsembleStrategy(Strategy): + """ + Weighted blend of several sub-strategies. Each sub-strategy produces a + weight matrix; we linearly combine them. The result still sums to (at + most) 1 per row since each sub-strategy does. + """ + + def __init__(self, components: list[tuple[Strategy, float]]): + total = sum(w for _, w in components) + self.components = [(s, w / total) for s, w in components] + + def generate_signals(self, data: pd.DataFrame) -> pd.DataFrame: + out = None + for strat, w in self.components: + sig = strat.generate_signals(data).mul(w) + if out is None: + out = sig + else: + # Align columns (should be identical since same data passed) + out = out.add(sig, fill_value=0.0) + return out + + +# --------------------------------------------------------------------------- +# Regime filter helper +# --------------------------------------------------------------------------- + +def spy_ma200_filter(spy: pd.Series, ma_window: int = 200) -> pd.Series: + """ + Boolean Series: True when SPY close > SPY MA(ma_window), shifted by 1 to + avoid lookahead. Use as `regime_filter=...` in pit_backtest.backtest(). + """ + ma = spy.rolling(ma_window, min_periods=ma_window).mean() + signal = (spy > ma).fillna(False) + return signal.shift(1).fillna(False) diff --git a/universe_history.py b/universe_history.py new file mode 100644 index 0000000..b8fc916 --- /dev/null +++ b/universe_history.py @@ -0,0 +1,230 @@ +""" +Point-in-time index membership reconstruction — fixes survivorship bias. + +Approach: Wikipedia's "Selected changes to the list of S&P 500 components" +table lists every add/remove event (394 rows back to 1976, as of 2026). We +start from today's membership and walk the change log *backward*: + - An 'Added' ticker on date D was NOT a member before D. + - A 'Removed' ticker on date D WAS a member before D. +Applied iteratively, this yields the set of members on any historical date. + +The membership info is cached in data/sp500_history.json so Wikipedia is hit +at most once per day. The cache stores per-ticker membership intervals: + { "ticker": [[start, end_or_null], ...] } +where dates are YYYY-MM-DD strings. +""" + +import io +import json +import os +import urllib.request +from datetime import date, datetime + +import pandas as pd + +CACHE_DIR = "data" +_HEADERS = {"User-Agent": "Mozilla/5.0 (quant-backtest)"} + + +# --------------------------------------------------------------------------- +# Fetch + parse Wikipedia +# --------------------------------------------------------------------------- + +def _fetch_sp500_tables() -> tuple[pd.DataFrame, pd.DataFrame]: + """Return (current_list, changes_log) from the S&P 500 Wikipedia page.""" + url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies" + req = urllib.request.Request(url, headers=_HEADERS) + with urllib.request.urlopen(req) as resp: + html = resp.read().decode("utf-8") + tables = pd.read_html(io.StringIO(html)) + current = tables[0] + changes = tables[1] + changes.columns = [ + "_".join(c).strip() if isinstance(c, tuple) else c + for c in changes.columns + ] + changes.columns = [ + c.replace("Effective Date_Effective Date", "Date") for c in changes.columns + ] + return current, changes + + +def _normalize_ticker(t: str) -> str: + """Yahoo Finance ticker format: BRK.B → BRK-B.""" + return str(t).replace(".", "-").strip() + + +# --------------------------------------------------------------------------- +# Membership reconstruction +# --------------------------------------------------------------------------- + +def build_sp500_history() -> dict[str, list[list[str | None]]]: + """ + Reconstruct per-ticker membership intervals. + + Returns + ------- + dict: ticker -> list of [start_date, end_date_or_None] pairs. + end_date=None means the ticker is still a member as of today. + Dates are YYYY-MM-DD strings. + + Algorithm: start from today's set of members, walk the change log from + newest to oldest. For each event on date D: + - The 'Added' ticker: its current (open) interval starts on D. + Close it: [..., D] — it was NOT a member before D. + - The 'Removed' ticker: it was a member up to D (exclusive). + Open a new interval ending on D (start unknown for now; will be + closed by an earlier event or left open-start). + + After the walk, any ticker still "open" (never closed backward) has an + interval reaching back before the earliest logged change. + """ + current, changes = _fetch_sp500_tables() + + current_tickers = {_normalize_ticker(s) for s in current["Symbol"].tolist()} + + # Parse change log + changes["dt"] = pd.to_datetime(changes["Date"], errors="coerce") + changes = changes.dropna(subset=["dt"]).sort_values("dt", ascending=False) + + # For each ticker, collect intervals [start, end]. + # We track a "current open interval" per ticker during the backward walk. + # intervals[ticker] = list of [start, end] completed intervals (oldest-first). + # open_start[ticker] = start date of the currently open (most-recent) interval. + intervals: dict[str, list[list[str | None]]] = {} + open_end: dict[str, str | None] = {} # end of currently-open interval + + # Initialize: today's members have an open interval ending = None (still in) + for t in current_tickers: + open_end[t] = None # still a member today + intervals[t] = [] + + # Track the start date of each open interval as we walk backward. + # For a member today, the interval started at the last "Added" event in the + # changes log, OR before the log begins if never added. + # We'll close the interval when we hit the "Added" event going backward. + open_start: dict[str, str | None] = {t: None for t in current_tickers} + + for _, row in changes.iterrows(): + d = row["dt"].strftime("%Y-%m-%d") + added = row.get("Added_Ticker") + removed = row.get("Removed_Ticker") + + if pd.notna(added): + a = _normalize_ticker(added) + # This ticker was added on d → its open interval starts on d. + if a in open_end: + open_start[a] = d + # Finalize the current open interval + intervals[a].append([d, open_end[a]]) + # Pop: no further open interval backward in time for this ticker + # (unless 'Removed' opens a new older one below) + del open_end[a] + + if pd.notna(removed): + r = _normalize_ticker(removed) + # This ticker was removed on d → it WAS a member before d. + # Open a new interval ending on d (start unknown yet). + if r not in open_end: + intervals.setdefault(r, []) + open_end[r] = d # end of the new older interval + + # Any ticker still with an open interval → start predates the log. + # Use the oldest logged date as a conservative "unknown earlier" marker: None. + for t, end in open_end.items(): + intervals.setdefault(t, []).append([None, end]) + + # Sort intervals per ticker oldest→newest + for t, ivs in intervals.items(): + ivs.sort(key=lambda iv: (iv[0] or "0000-00-00")) + + return intervals + + +# --------------------------------------------------------------------------- +# Cache I/O +# --------------------------------------------------------------------------- + +def _cache_path() -> str: + return os.path.join(CACHE_DIR, "sp500_history.json") + + +def load_sp500_history(force_refresh: bool = False) -> dict[str, list[list[str | None]]]: + """Load cached membership history, or rebuild if stale (>1 day old).""" + path = _cache_path() + if not force_refresh and os.path.exists(path): + try: + with open(path) as f: + data = json.load(f) + if data.get("date") == str(date.today()): + return data["intervals"] + except Exception: + pass + print("--- Rebuilding S&P 500 membership history from Wikipedia ---") + intervals = build_sp500_history() + os.makedirs(CACHE_DIR, exist_ok=True) + with open(path, "w") as f: + json.dump({"date": str(date.today()), "intervals": intervals}, f) + print(f"--- Cached {len(intervals)} tickers' membership intervals ---") + return intervals + + +# --------------------------------------------------------------------------- +# Convert intervals → aligned mask DataFrame +# --------------------------------------------------------------------------- + +def membership_mask(dates: pd.DatetimeIndex, + intervals: dict[str, list[list[str | None]]] | None = None, + tickers: list[str] | None = None) -> pd.DataFrame: + """ + Boolean DataFrame: rows = dates, columns = tickers. + True where the ticker was an S&P 500 member on that date. + + If `tickers` is given, restrict columns to that list (useful for aligning + with a price DataFrame). Otherwise, include all tickers ever a member. + """ + if intervals is None: + intervals = load_sp500_history() + cols = tickers if tickers is not None else sorted(intervals.keys()) + # Tickers not in `intervals` (e.g. SPY, benchmarks, ETFs) are treated as + # always-members so callers can pass the full price matrix through + # mask_prices without zeroing out benchmark series. + mask = pd.DataFrame(False, index=dates, columns=cols) + for t in cols: + if t not in intervals: + mask[t] = True + continue + for start, end in intervals[t]: + s = pd.Timestamp(start) if start else dates[0] + e = pd.Timestamp(end) if end else dates[-1] + pd.Timedelta(days=1) + # Interval semantics: member on [start, end). A ticker removed on + # date D was no longer a member on D. + mask.loc[(mask.index >= s) & (mask.index < e), t] = True + return mask + + +def all_tickers_ever(intervals: dict | None = None) -> list[str]: + """All tickers that were ever S&P 500 members (for price data fetching).""" + if intervals is None: + intervals = load_sp500_history() + return sorted(intervals.keys()) + + +def mask_prices(prices: pd.DataFrame, + intervals: dict | None = None) -> pd.DataFrame: + """ + Return a copy of `prices` with NaN set for (date, ticker) pairs where + the ticker was not an S&P 500 member on that date. + + This is the key survivorship-bias fix: strategies compute signals from + the masked price data, so they naturally cannot select stocks outside + the point-in-time index membership. + + Warm-up note: a newly-added member needs sufficient non-NaN history for + its rolling windows to produce a valid signal. For this codebase's + ~252-day lookbacks, a stock becomes "selectable" roughly 1 year after + joining. This is conservative but correct: before that, we have no + legitimate signal anyway. + """ + mask = membership_mask(prices.index, intervals, tickers=list(prices.columns)) + return prices.where(mask)