Harden factor loader zip parsing and fallback

This commit is contained in:
2026-04-07 15:38:49 +08:00
parent feb1864a4d
commit e70922d9af
2 changed files with 55 additions and 8 deletions

View File

@@ -1,7 +1,9 @@
from __future__ import annotations
import io
import zipfile
from pathlib import Path
from urllib.error import URLError
import pandas as pd
@@ -11,7 +13,14 @@ def _download_kf_zip_bytes() -> bytes:
def _parse_kf_daily_csv(raw_bytes: bytes) -> pd.DataFrame:
text = raw_bytes.decode("utf-8")
with zipfile.ZipFile(io.BytesIO(raw_bytes)) as archive:
member_name = next(
name
for name in archive.namelist()
if not name.endswith("/") and name.lower().endswith((".csv", ".txt"))
)
text = archive.read(member_name).decode("utf-8-sig")
lines = [line for line in text.splitlines() if line.strip()]
header_index = next(i for i, line in enumerate(lines) if "Mkt-RF" in line)
table = "\n".join(lines[header_index:])
@@ -31,10 +40,12 @@ def load_external_us_factors(cache_dir: Path | str = "data/factors") -> pd.DataF
cache_path.parent.mkdir(parents=True, exist_ok=True)
try:
factors = _parse_kf_daily_csv(_download_kf_zip_bytes())
factors.to_csv(cache_path)
return factors
except Exception:
raw_bytes = _download_kf_zip_bytes()
except (URLError, TimeoutError, ConnectionError, OSError):
if cache_path.exists():
return pd.read_csv(cache_path, index_col=0, parse_dates=True)
raise
factors = _parse_kf_daily_csv(raw_bytes)
factors.to_csv(cache_path)
return factors