Implement Ken French factor download and cache fallback

This commit is contained in:
2026-04-07 15:44:46 +08:00
parent e70922d9af
commit 9e6da727a3
2 changed files with 73 additions and 6 deletions

View File

@@ -4,12 +4,32 @@ import io
import zipfile
from pathlib import Path
from urllib.error import URLError
from urllib.request import Request, urlopen
import pandas as pd
KEN_FRENCH_DAILY_FF5_ZIP_URL = (
"https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/"
"F-F_Research_Data_5_Factors_2x3_daily_CSV.zip"
)
SOURCE_PARSE_EXCEPTIONS = (
zipfile.BadZipFile,
UnicodeDecodeError,
StopIteration,
KeyError,
ValueError,
pd.errors.ParserError,
)
def _download_kf_zip_bytes() -> bytes:
raise NotImplementedError
request = Request(
KEN_FRENCH_DAILY_FF5_ZIP_URL,
headers={"User-Agent": "quant-factor-attribution/0.1"},
)
with urlopen(request, timeout=30) as response:
return response.read()
def _parse_kf_daily_csv(raw_bytes: bytes) -> pd.DataFrame:
@@ -46,6 +66,12 @@ def load_external_us_factors(cache_dir: Path | str = "data/factors") -> pd.DataF
return pd.read_csv(cache_path, index_col=0, parse_dates=True)
raise
factors = _parse_kf_daily_csv(raw_bytes)
try:
factors = _parse_kf_daily_csv(raw_bytes)
except SOURCE_PARSE_EXCEPTIONS:
if cache_path.exists():
return pd.read_csv(cache_path, index_col=0, parse_dates=True)
raise
factors.to_csv(cache_path)
return factors