import io import socket import ssl import tempfile import unittest import zipfile from pathlib import Path from urllib.error import URLError from unittest import mock import pandas as pd from factor_attribution import ( ExternalFactorDownloadError, ExternalFactorFormatError, KEN_FRENCH_DAILY_FF5_ZIP_URL, _download_kf_zip_bytes, _parse_kf_daily_csv, load_external_us_factors, ) class ExternalFactorLoaderTests(unittest.TestCase): def test_download_kf_zip_bytes_fetches_official_ken_french_zip(self): response = mock.MagicMock() response.read.return_value = b"zip-bytes" response.__enter__.return_value = response response.__exit__.return_value = False with mock.patch("factor_attribution.urlopen", return_value=response) as mocked_urlopen: raw_bytes = _download_kf_zip_bytes() self.assertEqual(raw_bytes, b"zip-bytes") request = mocked_urlopen.call_args.args[0] self.assertEqual(request.full_url, KEN_FRENCH_DAILY_FF5_ZIP_URL) self.assertEqual(mocked_urlopen.call_args.kwargs["timeout"], 30) def test_download_kf_zip_bytes_wraps_transport_errors(self): for error in ( URLError("boom"), TimeoutError("timed out"), ConnectionError("conn reset"), socket.timeout("socket timed out"), ssl.SSLError("tls failed"), ): with self.subTest(error_type=type(error).__name__): with mock.patch("factor_attribution.urlopen", side_effect=error): with self.assertRaises(ExternalFactorDownloadError): _download_kf_zip_bytes() def test_load_external_us_factors_parses_percent_values_and_dates_from_zip_payload(self): csv_text = ( "This line is ignored\n" ",Mkt-RF,SMB,HML,RMW,CMA,RF\n" "20260102,1.00,0.50,-0.25,0.10,-0.05,0.02\n" "20260105,-0.20,0.10,0.30,-0.15,0.05,0.02\n" "\n" ) zip_bytes = self._make_zip_bytes( "F-F_Research_Data_5_Factors_2x3_daily.csv", csv_text, ) with tempfile.TemporaryDirectory() as tmpdir: with mock.patch( "factor_attribution._download_kf_zip_bytes", return_value=zip_bytes, ): factors = load_external_us_factors(cache_dir=Path(tmpdir)) self.assertListEqual( list(factors.columns), ["MKT_RF", "SMB", "HML", "RMW", "CMA", "RF"], ) self.assertAlmostEqual(factors.iloc[0]["MKT_RF"], 0.01) self.assertAlmostEqual(factors.iloc[0]["RF"], 0.0002) self.assertEqual(str(factors.index[0].date()), "2026-01-02") def test_load_external_us_factors_falls_back_to_cache_when_download_fails(self): cached = pd.DataFrame( { "MKT_RF": [0.01], "SMB": [0.0], "HML": [0.0], "RMW": [0.0], "CMA": [0.0], "RF": [0.0001], }, index=pd.to_datetime(["2026-01-02"]), ) with tempfile.TemporaryDirectory() as tmpdir: cache_dir = Path(tmpdir) cached.to_csv(cache_dir / "ff5_us_daily.csv") with mock.patch( "factor_attribution._download_kf_zip_bytes", side_effect=ExternalFactorDownloadError("boom"), ): with self.assertWarnsRegex(UserWarning, "cached data"): factors = load_external_us_factors(cache_dir=cache_dir) self.assertEqual(len(factors), 1) self.assertAlmostEqual(factors.iloc[0]["MKT_RF"], 0.01) def test_parse_kf_daily_csv_raises_external_factor_format_error_for_missing_header(self): zip_bytes = self._make_zip_bytes( "F-F_Research_Data_5_Factors_2x3_daily.csv", "not the expected file format\n20260102,1.00\n", ) with self.assertRaises(ExternalFactorFormatError): _parse_kf_daily_csv(zip_bytes) def test_load_external_us_factors_warns_and_falls_back_to_cache_when_source_format_is_invalid(self): cached = pd.DataFrame( { "MKT_RF": [0.01], "SMB": [0.0], "HML": [0.0], "RMW": [0.0], "CMA": [0.0], "RF": [0.0001], }, index=pd.to_datetime(["2026-01-02"]), ) with tempfile.TemporaryDirectory() as tmpdir: cache_dir = Path(tmpdir) cached.to_csv(cache_dir / "ff5_us_daily.csv") malformed_zip_bytes = self._make_zip_bytes( "F-F_Research_Data_5_Factors_2x3_daily.csv", "not the expected file format\n20260102,1.00\n", ) with mock.patch( "factor_attribution._download_kf_zip_bytes", return_value=malformed_zip_bytes, ): with self.assertWarnsRegex(UserWarning, "cached data"): factors = load_external_us_factors(cache_dir=cache_dir) self.assertEqual(len(factors), 1) self.assertAlmostEqual(factors.iloc[0]["MKT_RF"], 0.01) def test_load_external_us_factors_warns_and_falls_back_to_cache_when_zip_is_invalid(self): cached = pd.DataFrame( { "MKT_RF": [0.01], "SMB": [0.0], "HML": [0.0], "RMW": [0.0], "CMA": [0.0], "RF": [0.0001], }, index=pd.to_datetime(["2026-01-02"]), ) with tempfile.TemporaryDirectory() as tmpdir: cache_dir = Path(tmpdir) cached.to_csv(cache_dir / "ff5_us_daily.csv") with mock.patch( "factor_attribution._download_kf_zip_bytes", return_value=b"not-a-zip-file", ): with self.assertWarnsRegex(UserWarning, "cached data"): factors = load_external_us_factors(cache_dir=cache_dir) self.assertEqual(len(factors), 1) self.assertAlmostEqual(factors.iloc[0]["MKT_RF"], 0.01) def test_load_external_us_factors_surfaces_cache_write_failures(self): csv_text = ( "This line is ignored\n" ",Mkt-RF,SMB,HML,RMW,CMA,RF\n" "20260102,1.00,0.50,-0.25,0.10,-0.05,0.02\n" "\n" ) zip_bytes = self._make_zip_bytes( "F-F_Research_Data_5_Factors_2x3_daily.csv", csv_text, ) with tempfile.TemporaryDirectory() as tmpdir: with mock.patch( "factor_attribution._download_kf_zip_bytes", return_value=zip_bytes, ): with mock.patch("pandas.DataFrame.to_csv", side_effect=OSError("disk full")): with self.assertRaises(OSError): load_external_us_factors(cache_dir=Path(tmpdir)) def test_load_external_us_factors_does_not_swallow_unrelated_local_failures(self): csv_text = ( "This line is ignored\n" ",Mkt-RF,SMB,HML,RMW,CMA,RF\n" "20260102,1.00,0.50,-0.25,0.10,-0.05,0.02\n" "\n" ) zip_bytes = self._make_zip_bytes( "F-F_Research_Data_5_Factors_2x3_daily.csv", csv_text, ) cached = pd.DataFrame( { "MKT_RF": [0.01], "SMB": [0.0], "HML": [0.0], "RMW": [0.0], "CMA": [0.0], "RF": [0.0001], }, index=pd.to_datetime(["2026-01-02"]), ) with tempfile.TemporaryDirectory() as tmpdir: cache_dir = Path(tmpdir) cached.to_csv(cache_dir / "ff5_us_daily.csv") with mock.patch( "factor_attribution._download_kf_zip_bytes", return_value=zip_bytes, ): with mock.patch( "factor_attribution._parse_kf_daily_csv", side_effect=RuntimeError("unexpected local bug"), ): with self.assertRaises(RuntimeError): load_external_us_factors(cache_dir=cache_dir) def _make_zip_bytes(self, filename: str, contents: str) -> bytes: buffer = io.BytesIO() with zipfile.ZipFile(buffer, mode="w") as archive: archive.writestr(filename, contents) return buffer.getvalue()