diff --git a/factor_attribution.py b/factor_attribution.py index 27d178c..7aa6c92 100644 --- a/factor_attribution.py +++ b/factor_attribution.py @@ -1,6 +1,8 @@ from __future__ import annotations import io +import socket +import ssl import warnings import zipfile from pathlib import Path @@ -21,13 +23,20 @@ class ExternalFactorFormatError(ValueError): pass +class ExternalFactorDownloadError(OSError): + pass + + def _download_kf_zip_bytes() -> bytes: request = Request( KEN_FRENCH_DAILY_FF5_ZIP_URL, headers={"User-Agent": "quant-factor-attribution/0.1"}, ) - with urlopen(request, timeout=30) as response: - return response.read() + try: + with urlopen(request, timeout=30) as response: + return response.read() + except (URLError, TimeoutError, ConnectionError, socket.timeout, ssl.SSLError) as exc: + raise ExternalFactorDownloadError(f"Failed to download external factor data: {exc}") from exc def _parse_kf_daily_csv(raw_bytes: bytes) -> pd.DataFrame: @@ -99,7 +108,7 @@ def load_external_us_factors(cache_dir: Path | str = "data/factors") -> pd.DataF try: raw_bytes = _download_kf_zip_bytes() - except (URLError, TimeoutError, ConnectionError, OSError) as exc: + except ExternalFactorDownloadError as exc: if cache_path.exists(): return _warn_and_load_cached_factors(cache_path, f"download failed: {exc}") raise diff --git a/tests/test_factor_attribution.py b/tests/test_factor_attribution.py index 73a8cab..4c3a80b 100644 --- a/tests/test_factor_attribution.py +++ b/tests/test_factor_attribution.py @@ -1,4 +1,6 @@ import io +import socket +import ssl import tempfile import unittest import zipfile @@ -9,6 +11,7 @@ from unittest import mock import pandas as pd from factor_attribution import ( + ExternalFactorDownloadError, ExternalFactorFormatError, KEN_FRENCH_DAILY_FF5_ZIP_URL, _download_kf_zip_bytes, @@ -32,6 +35,19 @@ class ExternalFactorLoaderTests(unittest.TestCase): self.assertEqual(request.full_url, KEN_FRENCH_DAILY_FF5_ZIP_URL) self.assertEqual(mocked_urlopen.call_args.kwargs["timeout"], 30) + def test_download_kf_zip_bytes_wraps_transport_errors(self): + for error in ( + URLError("boom"), + TimeoutError("timed out"), + ConnectionError("conn reset"), + socket.timeout("socket timed out"), + ssl.SSLError("tls failed"), + ): + with self.subTest(error_type=type(error).__name__): + with mock.patch("factor_attribution.urlopen", side_effect=error): + with self.assertRaises(ExternalFactorDownloadError): + _download_kf_zip_bytes() + def test_load_external_us_factors_parses_percent_values_and_dates_from_zip_payload(self): csv_text = ( "This line is ignored\n" @@ -78,33 +94,7 @@ class ExternalFactorLoaderTests(unittest.TestCase): cached.to_csv(cache_dir / "ff5_us_daily.csv") with mock.patch( "factor_attribution._download_kf_zip_bytes", - side_effect=URLError("boom"), - ): - with self.assertWarnsRegex(UserWarning, "cached data"): - factors = load_external_us_factors(cache_dir=cache_dir) - - self.assertEqual(len(factors), 1) - self.assertAlmostEqual(factors.iloc[0]["MKT_RF"], 0.01) - - def test_load_external_us_factors_falls_back_to_cache_when_download_raises_oserror(self): - cached = pd.DataFrame( - { - "MKT_RF": [0.01], - "SMB": [0.0], - "HML": [0.0], - "RMW": [0.0], - "CMA": [0.0], - "RF": [0.0001], - }, - index=pd.to_datetime(["2026-01-02"]), - ) - - with tempfile.TemporaryDirectory() as tmpdir: - cache_dir = Path(tmpdir) - cached.to_csv(cache_dir / "ff5_us_daily.csv") - with mock.patch( - "factor_attribution._download_kf_zip_bytes", - side_effect=OSError("transport reset"), + side_effect=ExternalFactorDownloadError("boom"), ): with self.assertWarnsRegex(UserWarning, "cached data"): factors = load_external_us_factors(cache_dir=cache_dir)