Integrate factor attribution into backtest CLI
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import http.client
|
||||
import contextlib
|
||||
import io
|
||||
import socket
|
||||
import ssl
|
||||
@@ -18,9 +19,12 @@ from factor_attribution import (
|
||||
KEN_FRENCH_DAILY_FF5_ZIP_URL,
|
||||
_download_kf_zip_bytes,
|
||||
_parse_kf_daily_csv,
|
||||
attribute_strategies,
|
||||
build_extension_factors,
|
||||
build_proxy_core_factors,
|
||||
export_attribution,
|
||||
load_external_us_factors,
|
||||
print_attribution_summary,
|
||||
prepare_factor_models,
|
||||
run_factor_regression,
|
||||
)
|
||||
@@ -573,3 +577,194 @@ class RegressionTests(unittest.TestCase):
|
||||
list(prepared["factor_frame"].columns),
|
||||
["MKT", "SMB_PROXY", "HML_PROXY", "RMW_PROXY", "CMA_PROXY", "MOM", "LOWVOL", "RECOVERY"],
|
||||
)
|
||||
|
||||
|
||||
class AttributionIntegrationTests(unittest.TestCase):
|
||||
def test_attribute_strategies_exports_standard_model_summary_and_loadings(self):
|
||||
dates = pd.date_range("2025-01-01", periods=320, freq="B")
|
||||
angles = np.linspace(0.0, 24.0, len(dates))
|
||||
factors = pd.DataFrame(
|
||||
{
|
||||
"MKT_RF": 0.010 * np.sin(angles),
|
||||
"SMB": 0.006 * np.cos(angles * 0.7),
|
||||
"HML": 0.004 * np.sin(angles * 1.3 + 0.4),
|
||||
"RMW": 0.003 * np.cos(angles * 1.1 + 0.2),
|
||||
"CMA": 0.002 * np.sin(angles * 0.5 + 0.8),
|
||||
"RF": np.full(len(dates), 0.0001),
|
||||
},
|
||||
index=dates,
|
||||
)
|
||||
strategy_returns = (
|
||||
0.0004
|
||||
+ 1.10 * factors["MKT_RF"]
|
||||
- 0.25 * factors["SMB"]
|
||||
+ 0.35 * factors["HML"]
|
||||
+ 0.10 * factors["RMW"]
|
||||
- 0.05 * factors["CMA"]
|
||||
+ factors["RF"]
|
||||
)
|
||||
benchmark_returns = 0.95 * factors["MKT_RF"] + factors["RF"]
|
||||
results = pd.DataFrame(
|
||||
{
|
||||
"Strategy": 100_000.0 * (1.0 + strategy_returns).cumprod(),
|
||||
"SPY (Benchmark)": 100_000.0 * (1.0 + benchmark_returns).cumprod(),
|
||||
},
|
||||
index=dates,
|
||||
)
|
||||
prices = self._make_price_frame(dates, benchmark="SPY")
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
summary, loadings = attribute_strategies(
|
||||
results_df=results,
|
||||
benchmark_label="SPY (Benchmark)",
|
||||
benchmark="SPY",
|
||||
price_data=prices,
|
||||
market="us",
|
||||
model_selection="ff5",
|
||||
external_factors=factors,
|
||||
)
|
||||
export_attribution(summary, loadings, tmpdir)
|
||||
|
||||
self.assertTrue((Path(tmpdir) / "summary.csv").exists())
|
||||
self.assertTrue((Path(tmpdir) / "loadings.csv").exists())
|
||||
|
||||
exported_summary = pd.read_csv(Path(tmpdir) / "summary.csv")
|
||||
exported_loadings = pd.read_csv(Path(tmpdir) / "loadings.csv")
|
||||
|
||||
self.assertEqual(len(summary), 1)
|
||||
self.assertListEqual(
|
||||
list(summary.columns),
|
||||
[
|
||||
"strategy",
|
||||
"market",
|
||||
"model",
|
||||
"factor_source",
|
||||
"proxy_only",
|
||||
"start_date",
|
||||
"end_date",
|
||||
"n_obs",
|
||||
"alpha_daily",
|
||||
"alpha_ann",
|
||||
"alpha_t_stat",
|
||||
"alpha_p_value",
|
||||
"r_squared",
|
||||
"adj_r_squared",
|
||||
"residual_vol_ann",
|
||||
"beta_mkt",
|
||||
"beta_smb",
|
||||
"beta_hml",
|
||||
"beta_rmw",
|
||||
"beta_cma",
|
||||
"beta_mom",
|
||||
"beta_lowvol",
|
||||
"beta_recovery",
|
||||
],
|
||||
)
|
||||
self.assertEqual(summary.loc[0, "strategy"], "Strategy")
|
||||
self.assertEqual(summary.loc[0, "model"], "ff5")
|
||||
self.assertEqual(summary.loc[0, "factor_source"], "external+local")
|
||||
self.assertFalse(bool(summary.loc[0, "proxy_only"]))
|
||||
self.assertAlmostEqual(summary.loc[0, "beta_mkt"], 1.10, places=3)
|
||||
self.assertAlmostEqual(summary.loc[0, "beta_smb"], -0.25, places=3)
|
||||
self.assertAlmostEqual(summary.loc[0, "beta_hml"], 0.35, places=3)
|
||||
self.assertTrue(np.isnan(summary.loc[0, "beta_mom"]))
|
||||
|
||||
self.assertListEqual(
|
||||
list(loadings.columns),
|
||||
["strategy", "market", "model", "factor_source", "proxy_only", "factor", "beta", "t_stat", "p_value"],
|
||||
)
|
||||
self.assertEqual(set(loadings["factor"]), {"MKT_RF", "SMB", "HML", "RMW", "CMA"})
|
||||
self.assertEqual(len(loadings), 5)
|
||||
pd.testing.assert_frame_equal(summary, exported_summary, check_dtype=False)
|
||||
pd.testing.assert_frame_equal(loadings, exported_loadings, check_dtype=False)
|
||||
|
||||
def test_attribute_strategies_uses_proxy_model_for_cn_runs(self):
|
||||
dates = pd.date_range("2025-01-01", periods=320, freq="B")
|
||||
prices = self._make_price_frame(dates, benchmark="000300.SS")
|
||||
returns = prices["000300.SS"].pct_change().fillna(0.0) * 0.7 + 0.0002
|
||||
results = pd.DataFrame(
|
||||
{
|
||||
"Strategy": 100_000.0 * (1.0 + returns).cumprod(),
|
||||
"CSI 300 (Benchmark)": 100_000.0 * (1.0 + prices["000300.SS"].pct_change().fillna(0.0)).cumprod(),
|
||||
},
|
||||
index=dates,
|
||||
)
|
||||
|
||||
summary, loadings = attribute_strategies(
|
||||
results_df=results,
|
||||
benchmark_label="CSI 300 (Benchmark)",
|
||||
benchmark="000300.SS",
|
||||
price_data=prices,
|
||||
market="cn",
|
||||
model_selection="ff5",
|
||||
external_factors=None,
|
||||
)
|
||||
|
||||
self.assertEqual(len(summary), 1)
|
||||
self.assertEqual(summary.loc[0, "model"], "proxy")
|
||||
self.assertEqual(summary.loc[0, "factor_source"], "proxy_only")
|
||||
self.assertTrue(bool(summary.loc[0, "proxy_only"]))
|
||||
self.assertEqual(
|
||||
set(loadings["factor"]),
|
||||
{"MKT", "SMB_PROXY", "HML_PROXY", "RMW_PROXY", "CMA_PROXY", "MOM", "LOWVOL", "RECOVERY"},
|
||||
)
|
||||
|
||||
def test_print_attribution_summary_prints_compact_table_and_interpretation(self):
|
||||
summary = pd.DataFrame(
|
||||
[
|
||||
{
|
||||
"strategy": "Strategy",
|
||||
"market": "us",
|
||||
"model": "ff5",
|
||||
"factor_source": "external+local",
|
||||
"proxy_only": False,
|
||||
"start_date": "2025-01-02",
|
||||
"end_date": "2026-03-24",
|
||||
"n_obs": 319,
|
||||
"alpha_daily": 0.0004,
|
||||
"alpha_ann": 0.1008,
|
||||
"alpha_t_stat": 2.1,
|
||||
"alpha_p_value": 0.04,
|
||||
"r_squared": 0.82,
|
||||
"adj_r_squared": 0.81,
|
||||
"residual_vol_ann": 0.12,
|
||||
"beta_mkt": 1.05,
|
||||
"beta_smb": -0.20,
|
||||
"beta_hml": 0.30,
|
||||
"beta_rmw": 0.05,
|
||||
"beta_cma": np.nan,
|
||||
"beta_mom": np.nan,
|
||||
"beta_lowvol": np.nan,
|
||||
"beta_recovery": np.nan,
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
buffer = io.StringIO()
|
||||
with contextlib.redirect_stdout(buffer):
|
||||
print_attribution_summary(summary)
|
||||
|
||||
output = buffer.getvalue()
|
||||
self.assertIn("Factor attribution", output)
|
||||
self.assertIn("Strategy", output)
|
||||
self.assertIn("ff5", output)
|
||||
self.assertIn("alpha_ann", output)
|
||||
self.assertIn("Interpretation", output)
|
||||
|
||||
def _make_price_frame(self, dates: pd.DatetimeIndex, benchmark: str) -> pd.DataFrame:
|
||||
steps = np.arange(len(dates), dtype=float)
|
||||
data = {}
|
||||
for symbol, base, drift, amplitude, frequency, phase in (
|
||||
("AAA", 45.0, 0.0005, 0.030, 19.0, 0.1),
|
||||
("BBB", 60.0, 0.0002, 0.025, 23.0, 0.8),
|
||||
("CCC", 75.0, -0.0001, 0.035, 17.0, 1.4),
|
||||
("DDD", 90.0, 0.0007, 0.020, 29.0, 0.5),
|
||||
("EEE", 55.0, -0.0002, 0.028, 31.0, 1.9),
|
||||
("FFF", 70.0, 0.0004, 0.032, 21.0, 2.5),
|
||||
):
|
||||
log_path = drift * steps + amplitude * np.sin(steps / frequency + phase)
|
||||
data[symbol] = base * np.exp(log_path)
|
||||
|
||||
benchmark_path = 0.0004 * steps + 0.018 * np.sin(steps / 27.0 + 0.3)
|
||||
data[benchmark] = 250.0 * np.exp(benchmark_path)
|
||||
return pd.DataFrame(data, index=dates)
|
||||
|
||||
Reference in New Issue
Block a user