diff --git a/factor_attribution.py b/factor_attribution.py index cee869f..c851457 100644 --- a/factor_attribution.py +++ b/factor_attribution.py @@ -1,5 +1,6 @@ from __future__ import annotations +import json import http.client import io import socket @@ -54,6 +55,7 @@ SUMMARY_COLUMNS = [ "model", "factor_source", "proxy_only", + "beta_semantics", "start_date", "end_date", "n_obs", @@ -436,6 +438,32 @@ def _resolve_benchmark_symbol(benchmark: str | None) -> str: return benchmark +def _beta_semantics_map(proxy_only: bool) -> dict[str, str]: + return { + "beta_mkt": "MKT" if proxy_only else "MKT_RF", + "beta_smb": "SMB_PROXY" if proxy_only else "SMB", + "beta_hml": "HML_PROXY" if proxy_only else "HML", + "beta_rmw": "RMW_PROXY" if proxy_only else "RMW", + "beta_cma": "CMA_PROXY" if proxy_only else "CMA", + "beta_mom": "MOM", + "beta_lowvol": "LOWVOL", + "beta_recovery": "RECOVERY", + } + + +def _parse_beta_semantics(row: pd.Series) -> dict[str, str]: + raw_value = row.get("beta_semantics") + if isinstance(raw_value, str) and raw_value: + try: + parsed = json.loads(raw_value) + except json.JSONDecodeError: + parsed = None + else: + if isinstance(parsed, dict): + return {str(key): str(value) for key, value in parsed.items()} + return _beta_semantics_map(bool(row.get("proxy_only", False))) + + def attribute_strategies( results_df: pd.DataFrame, benchmark_label: str, @@ -507,6 +535,7 @@ def attribute_strategies( "model": model_name, "factor_source": prepared["factor_source"], "proxy_only": prepared["proxy_only"], + "beta_semantics": json.dumps(_beta_semantics_map(bool(prepared["proxy_only"])), sort_keys=True), "start_date": regression_result["start_date"], "end_date": regression_result["end_date"], "n_obs": regression_result["n_obs"], @@ -580,28 +609,7 @@ def _describe_fit(r_squared: float) -> str: def _top_loading_descriptions(row: pd.Series, limit: int = 2) -> str: beta_columns = [column for column in row.index if column.startswith("beta_")] - if bool(row.get("proxy_only", False)): - factor_labels = { - "beta_mkt": "MKT", - "beta_smb": "SMB_PROXY", - "beta_hml": "HML_PROXY", - "beta_rmw": "RMW_PROXY", - "beta_cma": "CMA_PROXY", - "beta_mom": "MOM", - "beta_lowvol": "LOWVOL", - "beta_recovery": "RECOVERY", - } - else: - factor_labels = { - "beta_mkt": "MKT", - "beta_smb": "SMB", - "beta_hml": "HML", - "beta_rmw": "RMW", - "beta_cma": "CMA", - "beta_mom": "MOM", - "beta_lowvol": "LOWVOL", - "beta_recovery": "RECOVERY", - } + factor_labels = _parse_beta_semantics(row) present = [] for column in beta_columns: value = row.get(column) @@ -616,11 +624,7 @@ def _top_loading_descriptions(row: pd.Series, limit: int = 2) -> str: return ", ".join(f"{name} {value:.2f}" for name, value in top_loadings) -def print_attribution_summary(summary_df: pd.DataFrame) -> None: - if summary_df.empty: - print("Factor attribution: no usable regressions were produced.") - return - +def _print_attribution_section(summary_df: pd.DataFrame, title: str, proxy_labels: bool) -> None: display_columns = [ "strategy", "market", @@ -640,7 +644,7 @@ def print_attribution_summary(summary_df: pd.DataFrame) -> None: "beta_recovery", ] table = summary_df.reindex(columns=display_columns).copy() - if bool(table["proxy_only"].fillna(False).all()): + if proxy_labels: table = table.rename( columns={ "beta_smb": "beta_smb_proxy", @@ -656,8 +660,32 @@ def print_attribution_summary(summary_df: pd.DataFrame) -> None: ] table.loc[:, numeric_columns] = table.loc[:, numeric_columns].round(4) - print("\nFactor attribution") + print(f"\n{title}") print(table.to_string(index=False, na_rep="")) + + +def print_attribution_summary(summary_df: pd.DataFrame) -> None: + if summary_df.empty: + print("Factor attribution: no usable regressions were produced.") + return + + proxy_mask = summary_df["proxy_only"].fillna(False).astype(bool) + standard_rows = summary_df.loc[~proxy_mask] + proxy_rows = summary_df.loc[proxy_mask] + + print("\nFactor attribution") + if not standard_rows.empty: + _print_attribution_section( + standard_rows, + title="Standard factor attribution", + proxy_labels=False, + ) + if not proxy_rows.empty: + _print_attribution_section( + proxy_rows, + title="Proxy factor attribution", + proxy_labels=True, + ) print("\nInterpretation") for _, row in summary_df.iterrows(): print( diff --git a/tests/test_factor_attribution.py b/tests/test_factor_attribution.py index d172ed7..e9396b7 100644 --- a/tests/test_factor_attribution.py +++ b/tests/test_factor_attribution.py @@ -1,5 +1,6 @@ import http.client import contextlib +import json import io import socket import ssl @@ -640,6 +641,7 @@ class AttributionIntegrationTests(unittest.TestCase): "model", "factor_source", "proxy_only", + "beta_semantics", "start_date", "end_date", "n_obs", @@ -664,6 +666,19 @@ class AttributionIntegrationTests(unittest.TestCase): self.assertEqual(summary.loc[0, "model"], "ff5") self.assertEqual(summary.loc[0, "factor_source"], "external+local") self.assertFalse(bool(summary.loc[0, "proxy_only"])) + self.assertEqual( + json.loads(summary.loc[0, "beta_semantics"]), + { + "beta_mkt": "MKT_RF", + "beta_smb": "SMB", + "beta_hml": "HML", + "beta_rmw": "RMW", + "beta_cma": "CMA", + "beta_mom": "MOM", + "beta_lowvol": "LOWVOL", + "beta_recovery": "RECOVERY", + }, + ) self.assertAlmostEqual(summary.loc[0, "beta_mkt"], 1.10, places=3) self.assertAlmostEqual(summary.loc[0, "beta_smb"], -0.25, places=3) self.assertAlmostEqual(summary.loc[0, "beta_hml"], 0.35, places=3) @@ -704,6 +719,19 @@ class AttributionIntegrationTests(unittest.TestCase): self.assertEqual(summary.loc[0, "model"], "proxy") self.assertEqual(summary.loc[0, "factor_source"], "proxy_only") self.assertTrue(bool(summary.loc[0, "proxy_only"])) + self.assertEqual( + json.loads(summary.loc[0, "beta_semantics"]), + { + "beta_mkt": "MKT", + "beta_smb": "SMB_PROXY", + "beta_hml": "HML_PROXY", + "beta_rmw": "RMW_PROXY", + "beta_cma": "CMA_PROXY", + "beta_mom": "MOM", + "beta_lowvol": "LOWVOL", + "beta_recovery": "RECOVERY", + }, + ) self.assertNotIn("beta_smb_proxy", summary.columns) self.assertNotIn("beta_hml_proxy", summary.columns) self.assertNotIn("beta_rmw_proxy", summary.columns) @@ -716,6 +744,13 @@ class AttributionIntegrationTests(unittest.TestCase): set(loadings["factor"]), {"MKT", "SMB_PROXY", "HML_PROXY", "RMW_PROXY", "CMA_PROXY", "MOM", "LOWVOL", "RECOVERY"}, ) + loadings_by_factor = loadings.set_index("factor")["beta"] + semantics = json.loads(summary.loc[0, "beta_semantics"]) + self.assertAlmostEqual(summary.loc[0, "beta_mkt"], loadings_by_factor[semantics["beta_mkt"]], places=10) + self.assertAlmostEqual(summary.loc[0, "beta_smb"], loadings_by_factor[semantics["beta_smb"]], places=10) + self.assertAlmostEqual(summary.loc[0, "beta_hml"], loadings_by_factor[semantics["beta_hml"]], places=10) + self.assertAlmostEqual(summary.loc[0, "beta_rmw"], loadings_by_factor[semantics["beta_rmw"]], places=10) + self.assertAlmostEqual(summary.loc[0, "beta_cma"], loadings_by_factor[semantics["beta_cma"]], places=10) def test_attribute_strategies_without_benchmark_uses_equal_weight_proxy_market(self): dates = pd.date_range("2025-01-01", periods=320, freq="B") @@ -802,6 +837,18 @@ class AttributionIntegrationTests(unittest.TestCase): "model": "proxy", "factor_source": "proxy_only", "proxy_only": True, + "beta_semantics": json.dumps( + { + "beta_mkt": "MKT", + "beta_smb": "SMB_PROXY", + "beta_hml": "HML_PROXY", + "beta_rmw": "RMW_PROXY", + "beta_cma": "CMA_PROXY", + "beta_mom": "MOM", + "beta_lowvol": "LOWVOL", + "beta_recovery": "RECOVERY", + } + ), "start_date": "2025-01-02", "end_date": "2026-03-24", "n_obs": 319, @@ -834,6 +881,96 @@ class AttributionIntegrationTests(unittest.TestCase): self.assertIn("SMB_PROXY", output) self.assertNotIn(" beta_smb ", output) + def test_print_attribution_summary_splits_standard_and_proxy_sections_for_mixed_frames(self): + summary = pd.DataFrame( + [ + { + "strategy": "US Strategy", + "market": "us", + "model": "ff5", + "factor_source": "external+local", + "proxy_only": False, + "beta_semantics": json.dumps( + { + "beta_mkt": "MKT_RF", + "beta_smb": "SMB", + "beta_hml": "HML", + "beta_rmw": "RMW", + "beta_cma": "CMA", + "beta_mom": "MOM", + "beta_lowvol": "LOWVOL", + "beta_recovery": "RECOVERY", + } + ), + "start_date": "2025-01-02", + "end_date": "2026-03-24", + "n_obs": 319, + "alpha_daily": 0.0004, + "alpha_ann": 0.1008, + "alpha_t_stat": 2.1, + "alpha_p_value": 0.04, + "r_squared": 0.82, + "adj_r_squared": 0.81, + "residual_vol_ann": 0.12, + "beta_mkt": 1.05, + "beta_smb": -0.20, + "beta_hml": 0.30, + "beta_rmw": 0.05, + "beta_cma": np.nan, + "beta_mom": np.nan, + "beta_lowvol": np.nan, + "beta_recovery": np.nan, + }, + { + "strategy": "CN Strategy", + "market": "cn", + "model": "proxy", + "factor_source": "proxy_only", + "proxy_only": True, + "beta_semantics": json.dumps( + { + "beta_mkt": "MKT", + "beta_smb": "SMB_PROXY", + "beta_hml": "HML_PROXY", + "beta_rmw": "RMW_PROXY", + "beta_cma": "CMA_PROXY", + "beta_mom": "MOM", + "beta_lowvol": "LOWVOL", + "beta_recovery": "RECOVERY", + } + ), + "start_date": "2025-01-02", + "end_date": "2026-03-24", + "n_obs": 319, + "alpha_daily": 0.0002, + "alpha_ann": 0.0504, + "alpha_t_stat": 1.5, + "alpha_p_value": 0.12, + "r_squared": 0.72, + "adj_r_squared": 0.70, + "residual_vol_ann": 0.14, + "beta_mkt": 0.85, + "beta_smb": -0.30, + "beta_hml": 0.25, + "beta_rmw": 0.10, + "beta_cma": -0.05, + "beta_mom": 0.20, + "beta_lowvol": np.nan, + "beta_recovery": np.nan, + }, + ] + ) + + buffer = io.StringIO() + with contextlib.redirect_stdout(buffer): + print_attribution_summary(summary) + + output = buffer.getvalue() + self.assertIn("Standard factor attribution", output) + self.assertIn("Proxy factor attribution", output) + self.assertIn("beta_smb_proxy", output) + self.assertIn("beta_smb ", output) + def _make_price_frame(self, dates: pd.DatetimeIndex, benchmark: str) -> pd.DataFrame: steps = np.arange(len(dates), dtype=float) data = {}