Implement Ken French factor download and cache fallback

This commit is contained in:
2026-04-07 15:44:46 +08:00
parent e70922d9af
commit 9e6da727a3
2 changed files with 73 additions and 6 deletions

View File

@@ -8,10 +8,28 @@ from unittest import mock
import pandas as pd
from factor_attribution import load_external_us_factors
from factor_attribution import (
KEN_FRENCH_DAILY_FF5_ZIP_URL,
_download_kf_zip_bytes,
load_external_us_factors,
)
class ExternalFactorLoaderTests(unittest.TestCase):
def test_download_kf_zip_bytes_fetches_official_ken_french_zip(self):
response = mock.MagicMock()
response.read.return_value = b"zip-bytes"
response.__enter__.return_value = response
response.__exit__.return_value = False
with mock.patch("factor_attribution.urlopen", return_value=response) as mocked_urlopen:
raw_bytes = _download_kf_zip_bytes()
self.assertEqual(raw_bytes, b"zip-bytes")
request = mocked_urlopen.call_args.args[0]
self.assertEqual(request.full_url, KEN_FRENCH_DAILY_FF5_ZIP_URL)
self.assertEqual(mocked_urlopen.call_args.kwargs["timeout"], 30)
def test_load_external_us_factors_parses_percent_values_and_dates_from_zip_payload(self):
csv_text = (
"This line is ignored\n"
@@ -65,7 +83,7 @@ class ExternalFactorLoaderTests(unittest.TestCase):
self.assertEqual(len(factors), 1)
self.assertAlmostEqual(factors.iloc[0]["MKT_RF"], 0.01)
def test_load_external_us_factors_raises_parse_errors_instead_of_using_cache(self):
def test_load_external_us_factors_falls_back_to_cache_when_parse_fails(self):
cached = pd.DataFrame(
{
"MKT_RF": [0.01],
@@ -85,8 +103,31 @@ class ExternalFactorLoaderTests(unittest.TestCase):
"factor_attribution._download_kf_zip_bytes",
return_value=b"not-a-zip-file",
):
with self.assertRaises(zipfile.BadZipFile):
load_external_us_factors(cache_dir=cache_dir)
factors = load_external_us_factors(cache_dir=cache_dir)
self.assertEqual(len(factors), 1)
self.assertAlmostEqual(factors.iloc[0]["MKT_RF"], 0.01)
def test_load_external_us_factors_surfaces_cache_write_failures(self):
csv_text = (
"This line is ignored\n"
",Mkt-RF,SMB,HML,RMW,CMA,RF\n"
"20260102,1.00,0.50,-0.25,0.10,-0.05,0.02\n"
"\n"
)
zip_bytes = self._make_zip_bytes(
"F-F_Research_Data_5_Factors_2x3_daily.csv",
csv_text,
)
with tempfile.TemporaryDirectory() as tmpdir:
with mock.patch(
"factor_attribution._download_kf_zip_bytes",
return_value=zip_bytes,
):
with mock.patch("pandas.DataFrame.to_csv", side_effect=OSError("disk full")):
with self.assertRaises(OSError):
load_external_us_factors(cache_dir=Path(tmpdir))
def _make_zip_bytes(self, filename: str, contents: str) -> bytes:
buffer = io.BytesIO()