Use explicit download errors for factor loader fallback
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import socket
|
||||
import ssl
|
||||
import warnings
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
@@ -21,13 +23,20 @@ class ExternalFactorFormatError(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class ExternalFactorDownloadError(OSError):
|
||||
pass
|
||||
|
||||
|
||||
def _download_kf_zip_bytes() -> bytes:
|
||||
request = Request(
|
||||
KEN_FRENCH_DAILY_FF5_ZIP_URL,
|
||||
headers={"User-Agent": "quant-factor-attribution/0.1"},
|
||||
)
|
||||
try:
|
||||
with urlopen(request, timeout=30) as response:
|
||||
return response.read()
|
||||
except (URLError, TimeoutError, ConnectionError, socket.timeout, ssl.SSLError) as exc:
|
||||
raise ExternalFactorDownloadError(f"Failed to download external factor data: {exc}") from exc
|
||||
|
||||
|
||||
def _parse_kf_daily_csv(raw_bytes: bytes) -> pd.DataFrame:
|
||||
@@ -99,7 +108,7 @@ def load_external_us_factors(cache_dir: Path | str = "data/factors") -> pd.DataF
|
||||
|
||||
try:
|
||||
raw_bytes = _download_kf_zip_bytes()
|
||||
except (URLError, TimeoutError, ConnectionError, OSError) as exc:
|
||||
except ExternalFactorDownloadError as exc:
|
||||
if cache_path.exists():
|
||||
return _warn_and_load_cached_factors(cache_path, f"download failed: {exc}")
|
||||
raise
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
import io
|
||||
import socket
|
||||
import ssl
|
||||
import tempfile
|
||||
import unittest
|
||||
import zipfile
|
||||
@@ -9,6 +11,7 @@ from unittest import mock
|
||||
import pandas as pd
|
||||
|
||||
from factor_attribution import (
|
||||
ExternalFactorDownloadError,
|
||||
ExternalFactorFormatError,
|
||||
KEN_FRENCH_DAILY_FF5_ZIP_URL,
|
||||
_download_kf_zip_bytes,
|
||||
@@ -32,6 +35,19 @@ class ExternalFactorLoaderTests(unittest.TestCase):
|
||||
self.assertEqual(request.full_url, KEN_FRENCH_DAILY_FF5_ZIP_URL)
|
||||
self.assertEqual(mocked_urlopen.call_args.kwargs["timeout"], 30)
|
||||
|
||||
def test_download_kf_zip_bytes_wraps_transport_errors(self):
|
||||
for error in (
|
||||
URLError("boom"),
|
||||
TimeoutError("timed out"),
|
||||
ConnectionError("conn reset"),
|
||||
socket.timeout("socket timed out"),
|
||||
ssl.SSLError("tls failed"),
|
||||
):
|
||||
with self.subTest(error_type=type(error).__name__):
|
||||
with mock.patch("factor_attribution.urlopen", side_effect=error):
|
||||
with self.assertRaises(ExternalFactorDownloadError):
|
||||
_download_kf_zip_bytes()
|
||||
|
||||
def test_load_external_us_factors_parses_percent_values_and_dates_from_zip_payload(self):
|
||||
csv_text = (
|
||||
"This line is ignored\n"
|
||||
@@ -78,33 +94,7 @@ class ExternalFactorLoaderTests(unittest.TestCase):
|
||||
cached.to_csv(cache_dir / "ff5_us_daily.csv")
|
||||
with mock.patch(
|
||||
"factor_attribution._download_kf_zip_bytes",
|
||||
side_effect=URLError("boom"),
|
||||
):
|
||||
with self.assertWarnsRegex(UserWarning, "cached data"):
|
||||
factors = load_external_us_factors(cache_dir=cache_dir)
|
||||
|
||||
self.assertEqual(len(factors), 1)
|
||||
self.assertAlmostEqual(factors.iloc[0]["MKT_RF"], 0.01)
|
||||
|
||||
def test_load_external_us_factors_falls_back_to_cache_when_download_raises_oserror(self):
|
||||
cached = pd.DataFrame(
|
||||
{
|
||||
"MKT_RF": [0.01],
|
||||
"SMB": [0.0],
|
||||
"HML": [0.0],
|
||||
"RMW": [0.0],
|
||||
"CMA": [0.0],
|
||||
"RF": [0.0001],
|
||||
},
|
||||
index=pd.to_datetime(["2026-01-02"]),
|
||||
)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
cache_dir = Path(tmpdir)
|
||||
cached.to_csv(cache_dir / "ff5_us_daily.csv")
|
||||
with mock.patch(
|
||||
"factor_attribution._download_kf_zip_bytes",
|
||||
side_effect=OSError("transport reset"),
|
||||
side_effect=ExternalFactorDownloadError("boom"),
|
||||
):
|
||||
with self.assertWarnsRegex(UserWarning, "cached data"):
|
||||
factors = load_external_us_factors(cache_dir=cache_dir)
|
||||
|
||||
Reference in New Issue
Block a user