Use explicit download errors for factor loader fallback

This commit is contained in:
2026-04-07 16:01:51 +08:00
parent c46727b1ca
commit 7f0c5de574
2 changed files with 29 additions and 30 deletions

View File

@@ -1,6 +1,8 @@
from __future__ import annotations from __future__ import annotations
import io import io
import socket
import ssl
import warnings import warnings
import zipfile import zipfile
from pathlib import Path from pathlib import Path
@@ -21,13 +23,20 @@ class ExternalFactorFormatError(ValueError):
pass pass
class ExternalFactorDownloadError(OSError):
pass
def _download_kf_zip_bytes() -> bytes: def _download_kf_zip_bytes() -> bytes:
request = Request( request = Request(
KEN_FRENCH_DAILY_FF5_ZIP_URL, KEN_FRENCH_DAILY_FF5_ZIP_URL,
headers={"User-Agent": "quant-factor-attribution/0.1"}, headers={"User-Agent": "quant-factor-attribution/0.1"},
) )
try:
with urlopen(request, timeout=30) as response: with urlopen(request, timeout=30) as response:
return response.read() return response.read()
except (URLError, TimeoutError, ConnectionError, socket.timeout, ssl.SSLError) as exc:
raise ExternalFactorDownloadError(f"Failed to download external factor data: {exc}") from exc
def _parse_kf_daily_csv(raw_bytes: bytes) -> pd.DataFrame: def _parse_kf_daily_csv(raw_bytes: bytes) -> pd.DataFrame:
@@ -99,7 +108,7 @@ def load_external_us_factors(cache_dir: Path | str = "data/factors") -> pd.DataF
try: try:
raw_bytes = _download_kf_zip_bytes() raw_bytes = _download_kf_zip_bytes()
except (URLError, TimeoutError, ConnectionError, OSError) as exc: except ExternalFactorDownloadError as exc:
if cache_path.exists(): if cache_path.exists():
return _warn_and_load_cached_factors(cache_path, f"download failed: {exc}") return _warn_and_load_cached_factors(cache_path, f"download failed: {exc}")
raise raise

View File

@@ -1,4 +1,6 @@
import io import io
import socket
import ssl
import tempfile import tempfile
import unittest import unittest
import zipfile import zipfile
@@ -9,6 +11,7 @@ from unittest import mock
import pandas as pd import pandas as pd
from factor_attribution import ( from factor_attribution import (
ExternalFactorDownloadError,
ExternalFactorFormatError, ExternalFactorFormatError,
KEN_FRENCH_DAILY_FF5_ZIP_URL, KEN_FRENCH_DAILY_FF5_ZIP_URL,
_download_kf_zip_bytes, _download_kf_zip_bytes,
@@ -32,6 +35,19 @@ class ExternalFactorLoaderTests(unittest.TestCase):
self.assertEqual(request.full_url, KEN_FRENCH_DAILY_FF5_ZIP_URL) self.assertEqual(request.full_url, KEN_FRENCH_DAILY_FF5_ZIP_URL)
self.assertEqual(mocked_urlopen.call_args.kwargs["timeout"], 30) self.assertEqual(mocked_urlopen.call_args.kwargs["timeout"], 30)
def test_download_kf_zip_bytes_wraps_transport_errors(self):
for error in (
URLError("boom"),
TimeoutError("timed out"),
ConnectionError("conn reset"),
socket.timeout("socket timed out"),
ssl.SSLError("tls failed"),
):
with self.subTest(error_type=type(error).__name__):
with mock.patch("factor_attribution.urlopen", side_effect=error):
with self.assertRaises(ExternalFactorDownloadError):
_download_kf_zip_bytes()
def test_load_external_us_factors_parses_percent_values_and_dates_from_zip_payload(self): def test_load_external_us_factors_parses_percent_values_and_dates_from_zip_payload(self):
csv_text = ( csv_text = (
"This line is ignored\n" "This line is ignored\n"
@@ -78,33 +94,7 @@ class ExternalFactorLoaderTests(unittest.TestCase):
cached.to_csv(cache_dir / "ff5_us_daily.csv") cached.to_csv(cache_dir / "ff5_us_daily.csv")
with mock.patch( with mock.patch(
"factor_attribution._download_kf_zip_bytes", "factor_attribution._download_kf_zip_bytes",
side_effect=URLError("boom"), side_effect=ExternalFactorDownloadError("boom"),
):
with self.assertWarnsRegex(UserWarning, "cached data"):
factors = load_external_us_factors(cache_dir=cache_dir)
self.assertEqual(len(factors), 1)
self.assertAlmostEqual(factors.iloc[0]["MKT_RF"], 0.01)
def test_load_external_us_factors_falls_back_to_cache_when_download_raises_oserror(self):
cached = pd.DataFrame(
{
"MKT_RF": [0.01],
"SMB": [0.0],
"HML": [0.0],
"RMW": [0.0],
"CMA": [0.0],
"RF": [0.0001],
},
index=pd.to_datetime(["2026-01-02"]),
)
with tempfile.TemporaryDirectory() as tmpdir:
cache_dir = Path(tmpdir)
cached.to_csv(cache_dir / "ff5_us_daily.csv")
with mock.patch(
"factor_attribution._download_kf_zip_bytes",
side_effect=OSError("transport reset"),
): ):
with self.assertWarnsRegex(UserWarning, "cached data"): with self.assertWarnsRegex(UserWarning, "cached data"):
factors = load_external_us_factors(cache_dir=cache_dir) factors = load_external_us_factors(cache_dir=cache_dir)