From 2382364a46ab3789dc79fde20a3e703face33254 Mon Sep 17 00:00:00 2001 From: Gahow Wang Date: Tue, 7 Apr 2026 16:12:00 +0800 Subject: [PATCH] Handle HTTP protocol errors in factor download --- factor_attribution.py | 1 + tests/test_factor_attribution.py | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/factor_attribution.py b/factor_attribution.py index a5439a4..6e79476 100644 --- a/factor_attribution.py +++ b/factor_attribution.py @@ -43,6 +43,7 @@ def _download_kf_zip_bytes() -> bytes: socket.timeout, socket.gaierror, ssl.SSLError, + http.client.HTTPException, http.client.IncompleteRead, http.client.RemoteDisconnected, ) as exc: diff --git a/tests/test_factor_attribution.py b/tests/test_factor_attribution.py index dc0fc07..279c553 100644 --- a/tests/test_factor_attribution.py +++ b/tests/test_factor_attribution.py @@ -138,6 +138,29 @@ class ExternalFactorLoaderTests(unittest.TestCase): self.assertEqual(len(factors), 1) self.assertAlmostEqual(factors.iloc[0]["MKT_RF"], 0.01) + def test_load_external_us_factors_falls_back_to_cache_when_http_status_line_is_bad(self): + cached = pd.DataFrame( + { + "MKT_RF": [0.01], + "SMB": [0.0], + "HML": [0.0], + "RMW": [0.0], + "CMA": [0.0], + "RF": [0.0001], + }, + index=pd.to_datetime(["2026-01-02"]), + ) + + with tempfile.TemporaryDirectory() as tmpdir: + cache_dir = Path(tmpdir) + cached.to_csv(cache_dir / "ff5_us_daily.csv") + with mock.patch("factor_attribution.urlopen", side_effect=http.client.BadStatusLine("HTTP/1.1 ???")): + with self.assertWarnsRegex(UserWarning, "cached data"): + factors = load_external_us_factors(cache_dir=cache_dir) + + self.assertEqual(len(factors), 1) + self.assertAlmostEqual(factors.iloc[0]["MKT_RF"], 0.01) + def test_parse_kf_daily_csv_raises_external_factor_format_error_for_missing_header(self): zip_bytes = self._make_zip_bytes( "F-F_Research_Data_5_Factors_2x3_daily.csv",