Implement Ken French factor download and cache fallback
This commit is contained in:
@@ -4,12 +4,32 @@ import io
|
|||||||
import zipfile
|
import zipfile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from urllib.error import URLError
|
from urllib.error import URLError
|
||||||
|
from urllib.request import Request, urlopen
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
KEN_FRENCH_DAILY_FF5_ZIP_URL = (
|
||||||
|
"https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/"
|
||||||
|
"F-F_Research_Data_5_Factors_2x3_daily_CSV.zip"
|
||||||
|
)
|
||||||
|
|
||||||
|
SOURCE_PARSE_EXCEPTIONS = (
|
||||||
|
zipfile.BadZipFile,
|
||||||
|
UnicodeDecodeError,
|
||||||
|
StopIteration,
|
||||||
|
KeyError,
|
||||||
|
ValueError,
|
||||||
|
pd.errors.ParserError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _download_kf_zip_bytes() -> bytes:
|
def _download_kf_zip_bytes() -> bytes:
|
||||||
raise NotImplementedError
|
request = Request(
|
||||||
|
KEN_FRENCH_DAILY_FF5_ZIP_URL,
|
||||||
|
headers={"User-Agent": "quant-factor-attribution/0.1"},
|
||||||
|
)
|
||||||
|
with urlopen(request, timeout=30) as response:
|
||||||
|
return response.read()
|
||||||
|
|
||||||
|
|
||||||
def _parse_kf_daily_csv(raw_bytes: bytes) -> pd.DataFrame:
|
def _parse_kf_daily_csv(raw_bytes: bytes) -> pd.DataFrame:
|
||||||
@@ -46,6 +66,12 @@ def load_external_us_factors(cache_dir: Path | str = "data/factors") -> pd.DataF
|
|||||||
return pd.read_csv(cache_path, index_col=0, parse_dates=True)
|
return pd.read_csv(cache_path, index_col=0, parse_dates=True)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
factors = _parse_kf_daily_csv(raw_bytes)
|
try:
|
||||||
|
factors = _parse_kf_daily_csv(raw_bytes)
|
||||||
|
except SOURCE_PARSE_EXCEPTIONS:
|
||||||
|
if cache_path.exists():
|
||||||
|
return pd.read_csv(cache_path, index_col=0, parse_dates=True)
|
||||||
|
raise
|
||||||
|
|
||||||
factors.to_csv(cache_path)
|
factors.to_csv(cache_path)
|
||||||
return factors
|
return factors
|
||||||
|
|||||||
@@ -8,10 +8,28 @@ from unittest import mock
|
|||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from factor_attribution import load_external_us_factors
|
from factor_attribution import (
|
||||||
|
KEN_FRENCH_DAILY_FF5_ZIP_URL,
|
||||||
|
_download_kf_zip_bytes,
|
||||||
|
load_external_us_factors,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ExternalFactorLoaderTests(unittest.TestCase):
|
class ExternalFactorLoaderTests(unittest.TestCase):
|
||||||
|
def test_download_kf_zip_bytes_fetches_official_ken_french_zip(self):
|
||||||
|
response = mock.MagicMock()
|
||||||
|
response.read.return_value = b"zip-bytes"
|
||||||
|
response.__enter__.return_value = response
|
||||||
|
response.__exit__.return_value = False
|
||||||
|
|
||||||
|
with mock.patch("factor_attribution.urlopen", return_value=response) as mocked_urlopen:
|
||||||
|
raw_bytes = _download_kf_zip_bytes()
|
||||||
|
|
||||||
|
self.assertEqual(raw_bytes, b"zip-bytes")
|
||||||
|
request = mocked_urlopen.call_args.args[0]
|
||||||
|
self.assertEqual(request.full_url, KEN_FRENCH_DAILY_FF5_ZIP_URL)
|
||||||
|
self.assertEqual(mocked_urlopen.call_args.kwargs["timeout"], 30)
|
||||||
|
|
||||||
def test_load_external_us_factors_parses_percent_values_and_dates_from_zip_payload(self):
|
def test_load_external_us_factors_parses_percent_values_and_dates_from_zip_payload(self):
|
||||||
csv_text = (
|
csv_text = (
|
||||||
"This line is ignored\n"
|
"This line is ignored\n"
|
||||||
@@ -65,7 +83,7 @@ class ExternalFactorLoaderTests(unittest.TestCase):
|
|||||||
self.assertEqual(len(factors), 1)
|
self.assertEqual(len(factors), 1)
|
||||||
self.assertAlmostEqual(factors.iloc[0]["MKT_RF"], 0.01)
|
self.assertAlmostEqual(factors.iloc[0]["MKT_RF"], 0.01)
|
||||||
|
|
||||||
def test_load_external_us_factors_raises_parse_errors_instead_of_using_cache(self):
|
def test_load_external_us_factors_falls_back_to_cache_when_parse_fails(self):
|
||||||
cached = pd.DataFrame(
|
cached = pd.DataFrame(
|
||||||
{
|
{
|
||||||
"MKT_RF": [0.01],
|
"MKT_RF": [0.01],
|
||||||
@@ -85,8 +103,31 @@ class ExternalFactorLoaderTests(unittest.TestCase):
|
|||||||
"factor_attribution._download_kf_zip_bytes",
|
"factor_attribution._download_kf_zip_bytes",
|
||||||
return_value=b"not-a-zip-file",
|
return_value=b"not-a-zip-file",
|
||||||
):
|
):
|
||||||
with self.assertRaises(zipfile.BadZipFile):
|
factors = load_external_us_factors(cache_dir=cache_dir)
|
||||||
load_external_us_factors(cache_dir=cache_dir)
|
|
||||||
|
self.assertEqual(len(factors), 1)
|
||||||
|
self.assertAlmostEqual(factors.iloc[0]["MKT_RF"], 0.01)
|
||||||
|
|
||||||
|
def test_load_external_us_factors_surfaces_cache_write_failures(self):
|
||||||
|
csv_text = (
|
||||||
|
"This line is ignored\n"
|
||||||
|
",Mkt-RF,SMB,HML,RMW,CMA,RF\n"
|
||||||
|
"20260102,1.00,0.50,-0.25,0.10,-0.05,0.02\n"
|
||||||
|
"\n"
|
||||||
|
)
|
||||||
|
zip_bytes = self._make_zip_bytes(
|
||||||
|
"F-F_Research_Data_5_Factors_2x3_daily.csv",
|
||||||
|
csv_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
with mock.patch(
|
||||||
|
"factor_attribution._download_kf_zip_bytes",
|
||||||
|
return_value=zip_bytes,
|
||||||
|
):
|
||||||
|
with mock.patch("pandas.DataFrame.to_csv", side_effect=OSError("disk full")):
|
||||||
|
with self.assertRaises(OSError):
|
||||||
|
load_external_us_factors(cache_dir=Path(tmpdir))
|
||||||
|
|
||||||
def _make_zip_bytes(self, filename: str, contents: str) -> bytes:
|
def _make_zip_bytes(self, filename: str, contents: str) -> bytes:
|
||||||
buffer = io.BytesIO()
|
buffer = io.BytesIO()
|
||||||
|
|||||||
Reference in New Issue
Block a user