Implement Ken French factor download and cache fallback
This commit is contained in:
@@ -4,12 +4,32 @@ import io
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
from urllib.error import URLError
|
||||
from urllib.request import Request, urlopen
|
||||
|
||||
import pandas as pd
|
||||
|
||||
KEN_FRENCH_DAILY_FF5_ZIP_URL = (
|
||||
"https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/"
|
||||
"F-F_Research_Data_5_Factors_2x3_daily_CSV.zip"
|
||||
)
|
||||
|
||||
SOURCE_PARSE_EXCEPTIONS = (
|
||||
zipfile.BadZipFile,
|
||||
UnicodeDecodeError,
|
||||
StopIteration,
|
||||
KeyError,
|
||||
ValueError,
|
||||
pd.errors.ParserError,
|
||||
)
|
||||
|
||||
|
||||
def _download_kf_zip_bytes() -> bytes:
|
||||
raise NotImplementedError
|
||||
request = Request(
|
||||
KEN_FRENCH_DAILY_FF5_ZIP_URL,
|
||||
headers={"User-Agent": "quant-factor-attribution/0.1"},
|
||||
)
|
||||
with urlopen(request, timeout=30) as response:
|
||||
return response.read()
|
||||
|
||||
|
||||
def _parse_kf_daily_csv(raw_bytes: bytes) -> pd.DataFrame:
|
||||
@@ -46,6 +66,12 @@ def load_external_us_factors(cache_dir: Path | str = "data/factors") -> pd.DataF
|
||||
return pd.read_csv(cache_path, index_col=0, parse_dates=True)
|
||||
raise
|
||||
|
||||
factors = _parse_kf_daily_csv(raw_bytes)
|
||||
try:
|
||||
factors = _parse_kf_daily_csv(raw_bytes)
|
||||
except SOURCE_PARSE_EXCEPTIONS:
|
||||
if cache_path.exists():
|
||||
return pd.read_csv(cache_path, index_col=0, parse_dates=True)
|
||||
raise
|
||||
|
||||
factors.to_csv(cache_path)
|
||||
return factors
|
||||
|
||||
Reference in New Issue
Block a user