Files
quant/tests/test_factor_attribution.py

137 lines
4.8 KiB
Python

import io
import tempfile
import unittest
import zipfile
from pathlib import Path
from urllib.error import URLError
from unittest import mock
import pandas as pd
from factor_attribution import (
KEN_FRENCH_DAILY_FF5_ZIP_URL,
_download_kf_zip_bytes,
load_external_us_factors,
)
class ExternalFactorLoaderTests(unittest.TestCase):
def test_download_kf_zip_bytes_fetches_official_ken_french_zip(self):
response = mock.MagicMock()
response.read.return_value = b"zip-bytes"
response.__enter__.return_value = response
response.__exit__.return_value = False
with mock.patch("factor_attribution.urlopen", return_value=response) as mocked_urlopen:
raw_bytes = _download_kf_zip_bytes()
self.assertEqual(raw_bytes, b"zip-bytes")
request = mocked_urlopen.call_args.args[0]
self.assertEqual(request.full_url, KEN_FRENCH_DAILY_FF5_ZIP_URL)
self.assertEqual(mocked_urlopen.call_args.kwargs["timeout"], 30)
def test_load_external_us_factors_parses_percent_values_and_dates_from_zip_payload(self):
csv_text = (
"This line is ignored\n"
",Mkt-RF,SMB,HML,RMW,CMA,RF\n"
"20260102,1.00,0.50,-0.25,0.10,-0.05,0.02\n"
"20260105,-0.20,0.10,0.30,-0.15,0.05,0.02\n"
"\n"
)
zip_bytes = self._make_zip_bytes(
"F-F_Research_Data_5_Factors_2x3_daily.csv",
csv_text,
)
with tempfile.TemporaryDirectory() as tmpdir:
with mock.patch(
"factor_attribution._download_kf_zip_bytes",
return_value=zip_bytes,
):
factors = load_external_us_factors(cache_dir=Path(tmpdir))
self.assertListEqual(
list(factors.columns),
["MKT_RF", "SMB", "HML", "RMW", "CMA", "RF"],
)
self.assertAlmostEqual(factors.iloc[0]["MKT_RF"], 0.01)
self.assertAlmostEqual(factors.iloc[0]["RF"], 0.0002)
self.assertEqual(str(factors.index[0].date()), "2026-01-02")
def test_load_external_us_factors_falls_back_to_cache_when_download_fails(self):
cached = pd.DataFrame(
{
"MKT_RF": [0.01],
"SMB": [0.0],
"HML": [0.0],
"RMW": [0.0],
"CMA": [0.0],
"RF": [0.0001],
},
index=pd.to_datetime(["2026-01-02"]),
)
with tempfile.TemporaryDirectory() as tmpdir:
cache_dir = Path(tmpdir)
cached.to_csv(cache_dir / "ff5_us_daily.csv")
with mock.patch(
"factor_attribution._download_kf_zip_bytes",
side_effect=URLError("boom"),
):
factors = load_external_us_factors(cache_dir=cache_dir)
self.assertEqual(len(factors), 1)
self.assertAlmostEqual(factors.iloc[0]["MKT_RF"], 0.01)
def test_load_external_us_factors_falls_back_to_cache_when_parse_fails(self):
cached = pd.DataFrame(
{
"MKT_RF": [0.01],
"SMB": [0.0],
"HML": [0.0],
"RMW": [0.0],
"CMA": [0.0],
"RF": [0.0001],
},
index=pd.to_datetime(["2026-01-02"]),
)
with tempfile.TemporaryDirectory() as tmpdir:
cache_dir = Path(tmpdir)
cached.to_csv(cache_dir / "ff5_us_daily.csv")
with mock.patch(
"factor_attribution._download_kf_zip_bytes",
return_value=b"not-a-zip-file",
):
factors = load_external_us_factors(cache_dir=cache_dir)
self.assertEqual(len(factors), 1)
self.assertAlmostEqual(factors.iloc[0]["MKT_RF"], 0.01)
def test_load_external_us_factors_surfaces_cache_write_failures(self):
csv_text = (
"This line is ignored\n"
",Mkt-RF,SMB,HML,RMW,CMA,RF\n"
"20260102,1.00,0.50,-0.25,0.10,-0.05,0.02\n"
"\n"
)
zip_bytes = self._make_zip_bytes(
"F-F_Research_Data_5_Factors_2x3_daily.csv",
csv_text,
)
with tempfile.TemporaryDirectory() as tmpdir:
with mock.patch(
"factor_attribution._download_kf_zip_bytes",
return_value=zip_bytes,
):
with mock.patch("pandas.DataFrame.to_csv", side_effect=OSError("disk full")):
with self.assertRaises(OSError):
load_external_us_factors(cache_dir=Path(tmpdir))
def _make_zip_bytes(self, filename: str, contents: str) -> bytes:
buffer = io.BytesIO()
with zipfile.ZipFile(buffer, mode="w") as archive:
archive.writestr(filename, contents)
return buffer.getvalue()