Use explicit download errors for factor loader fallback
This commit is contained in:
@@ -1,6 +1,8 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import io
|
import io
|
||||||
|
import socket
|
||||||
|
import ssl
|
||||||
import warnings
|
import warnings
|
||||||
import zipfile
|
import zipfile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -21,13 +23,20 @@ class ExternalFactorFormatError(ValueError):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class ExternalFactorDownloadError(OSError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def _download_kf_zip_bytes() -> bytes:
|
def _download_kf_zip_bytes() -> bytes:
|
||||||
request = Request(
|
request = Request(
|
||||||
KEN_FRENCH_DAILY_FF5_ZIP_URL,
|
KEN_FRENCH_DAILY_FF5_ZIP_URL,
|
||||||
headers={"User-Agent": "quant-factor-attribution/0.1"},
|
headers={"User-Agent": "quant-factor-attribution/0.1"},
|
||||||
)
|
)
|
||||||
|
try:
|
||||||
with urlopen(request, timeout=30) as response:
|
with urlopen(request, timeout=30) as response:
|
||||||
return response.read()
|
return response.read()
|
||||||
|
except (URLError, TimeoutError, ConnectionError, socket.timeout, ssl.SSLError) as exc:
|
||||||
|
raise ExternalFactorDownloadError(f"Failed to download external factor data: {exc}") from exc
|
||||||
|
|
||||||
|
|
||||||
def _parse_kf_daily_csv(raw_bytes: bytes) -> pd.DataFrame:
|
def _parse_kf_daily_csv(raw_bytes: bytes) -> pd.DataFrame:
|
||||||
@@ -99,7 +108,7 @@ def load_external_us_factors(cache_dir: Path | str = "data/factors") -> pd.DataF
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
raw_bytes = _download_kf_zip_bytes()
|
raw_bytes = _download_kf_zip_bytes()
|
||||||
except (URLError, TimeoutError, ConnectionError, OSError) as exc:
|
except ExternalFactorDownloadError as exc:
|
||||||
if cache_path.exists():
|
if cache_path.exists():
|
||||||
return _warn_and_load_cached_factors(cache_path, f"download failed: {exc}")
|
return _warn_and_load_cached_factors(cache_path, f"download failed: {exc}")
|
||||||
raise
|
raise
|
||||||
|
|||||||
@@ -1,4 +1,6 @@
|
|||||||
import io
|
import io
|
||||||
|
import socket
|
||||||
|
import ssl
|
||||||
import tempfile
|
import tempfile
|
||||||
import unittest
|
import unittest
|
||||||
import zipfile
|
import zipfile
|
||||||
@@ -9,6 +11,7 @@ from unittest import mock
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from factor_attribution import (
|
from factor_attribution import (
|
||||||
|
ExternalFactorDownloadError,
|
||||||
ExternalFactorFormatError,
|
ExternalFactorFormatError,
|
||||||
KEN_FRENCH_DAILY_FF5_ZIP_URL,
|
KEN_FRENCH_DAILY_FF5_ZIP_URL,
|
||||||
_download_kf_zip_bytes,
|
_download_kf_zip_bytes,
|
||||||
@@ -32,6 +35,19 @@ class ExternalFactorLoaderTests(unittest.TestCase):
|
|||||||
self.assertEqual(request.full_url, KEN_FRENCH_DAILY_FF5_ZIP_URL)
|
self.assertEqual(request.full_url, KEN_FRENCH_DAILY_FF5_ZIP_URL)
|
||||||
self.assertEqual(mocked_urlopen.call_args.kwargs["timeout"], 30)
|
self.assertEqual(mocked_urlopen.call_args.kwargs["timeout"], 30)
|
||||||
|
|
||||||
|
def test_download_kf_zip_bytes_wraps_transport_errors(self):
|
||||||
|
for error in (
|
||||||
|
URLError("boom"),
|
||||||
|
TimeoutError("timed out"),
|
||||||
|
ConnectionError("conn reset"),
|
||||||
|
socket.timeout("socket timed out"),
|
||||||
|
ssl.SSLError("tls failed"),
|
||||||
|
):
|
||||||
|
with self.subTest(error_type=type(error).__name__):
|
||||||
|
with mock.patch("factor_attribution.urlopen", side_effect=error):
|
||||||
|
with self.assertRaises(ExternalFactorDownloadError):
|
||||||
|
_download_kf_zip_bytes()
|
||||||
|
|
||||||
def test_load_external_us_factors_parses_percent_values_and_dates_from_zip_payload(self):
|
def test_load_external_us_factors_parses_percent_values_and_dates_from_zip_payload(self):
|
||||||
csv_text = (
|
csv_text = (
|
||||||
"This line is ignored\n"
|
"This line is ignored\n"
|
||||||
@@ -78,33 +94,7 @@ class ExternalFactorLoaderTests(unittest.TestCase):
|
|||||||
cached.to_csv(cache_dir / "ff5_us_daily.csv")
|
cached.to_csv(cache_dir / "ff5_us_daily.csv")
|
||||||
with mock.patch(
|
with mock.patch(
|
||||||
"factor_attribution._download_kf_zip_bytes",
|
"factor_attribution._download_kf_zip_bytes",
|
||||||
side_effect=URLError("boom"),
|
side_effect=ExternalFactorDownloadError("boom"),
|
||||||
):
|
|
||||||
with self.assertWarnsRegex(UserWarning, "cached data"):
|
|
||||||
factors = load_external_us_factors(cache_dir=cache_dir)
|
|
||||||
|
|
||||||
self.assertEqual(len(factors), 1)
|
|
||||||
self.assertAlmostEqual(factors.iloc[0]["MKT_RF"], 0.01)
|
|
||||||
|
|
||||||
def test_load_external_us_factors_falls_back_to_cache_when_download_raises_oserror(self):
|
|
||||||
cached = pd.DataFrame(
|
|
||||||
{
|
|
||||||
"MKT_RF": [0.01],
|
|
||||||
"SMB": [0.0],
|
|
||||||
"HML": [0.0],
|
|
||||||
"RMW": [0.0],
|
|
||||||
"CMA": [0.0],
|
|
||||||
"RF": [0.0001],
|
|
||||||
},
|
|
||||||
index=pd.to_datetime(["2026-01-02"]),
|
|
||||||
)
|
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
|
||||||
cache_dir = Path(tmpdir)
|
|
||||||
cached.to_csv(cache_dir / "ff5_us_daily.csv")
|
|
||||||
with mock.patch(
|
|
||||||
"factor_attribution._download_kf_zip_bytes",
|
|
||||||
side_effect=OSError("transport reset"),
|
|
||||||
):
|
):
|
||||||
with self.assertWarnsRegex(UserWarning, "cached data"):
|
with self.assertWarnsRegex(UserWarning, "cached data"):
|
||||||
factors = load_external_us_factors(cache_dir=cache_dir)
|
factors = load_external_us_factors(cache_dir=cache_dir)
|
||||||
|
|||||||
Reference in New Issue
Block a user