diff --git a/factor_attribution.py b/factor_attribution.py index aa2d3f7..cee869f 100644 --- a/factor_attribution.py +++ b/factor_attribution.py @@ -37,13 +37,13 @@ SUMMARY_BETA_COLUMN_BY_FACTOR = { "MKT_RF": "beta_mkt", "MKT": "beta_mkt", "SMB": "beta_smb", - "SMB_PROXY": "beta_smb_proxy", + "SMB_PROXY": "beta_smb", "HML": "beta_hml", - "HML_PROXY": "beta_hml_proxy", + "HML_PROXY": "beta_hml", "RMW": "beta_rmw", - "RMW_PROXY": "beta_rmw_proxy", + "RMW_PROXY": "beta_rmw", "CMA": "beta_cma", - "CMA_PROXY": "beta_cma_proxy", + "CMA_PROXY": "beta_cma", "MOM": "beta_mom", "LOWVOL": "beta_lowvol", "RECOVERY": "beta_recovery", @@ -69,10 +69,6 @@ SUMMARY_COLUMNS = [ "beta_hml", "beta_rmw", "beta_cma", - "beta_smb_proxy", - "beta_hml_proxy", - "beta_rmw_proxy", - "beta_cma_proxy", "beta_mom", "beta_lowvol", "beta_recovery", @@ -526,10 +522,6 @@ def attribute_strategies( "beta_hml": np.nan, "beta_rmw": np.nan, "beta_cma": np.nan, - "beta_smb_proxy": np.nan, - "beta_hml_proxy": np.nan, - "beta_rmw_proxy": np.nan, - "beta_cma_proxy": np.nan, "beta_mom": np.nan, "beta_lowvol": np.nan, "beta_recovery": np.nan, @@ -588,11 +580,34 @@ def _describe_fit(r_squared: float) -> str: def _top_loading_descriptions(row: pd.Series, limit: int = 2) -> str: beta_columns = [column for column in row.index if column.startswith("beta_")] + if bool(row.get("proxy_only", False)): + factor_labels = { + "beta_mkt": "MKT", + "beta_smb": "SMB_PROXY", + "beta_hml": "HML_PROXY", + "beta_rmw": "RMW_PROXY", + "beta_cma": "CMA_PROXY", + "beta_mom": "MOM", + "beta_lowvol": "LOWVOL", + "beta_recovery": "RECOVERY", + } + else: + factor_labels = { + "beta_mkt": "MKT", + "beta_smb": "SMB", + "beta_hml": "HML", + "beta_rmw": "RMW", + "beta_cma": "CMA", + "beta_mom": "MOM", + "beta_lowvol": "LOWVOL", + "beta_recovery": "RECOVERY", + } present = [] for column in beta_columns: value = row.get(column) - if pd.notna(value): - present.append((column.removeprefix("beta_").upper(), float(value))) + label = factor_labels.get(column) + if label is not None and pd.notna(value): + present.append((label, float(value))) if not present: return "no material factor loadings were estimated" @@ -610,6 +625,8 @@ def print_attribution_summary(summary_df: pd.DataFrame) -> None: "strategy", "market", "model", + "factor_source", + "proxy_only", "alpha_ann", "r_squared", "residual_vol_ann", @@ -618,16 +635,25 @@ def print_attribution_summary(summary_df: pd.DataFrame) -> None: "beta_hml", "beta_rmw", "beta_cma", - "beta_smb_proxy", - "beta_hml_proxy", - "beta_rmw_proxy", - "beta_cma_proxy", "beta_mom", "beta_lowvol", "beta_recovery", ] table = summary_df.reindex(columns=display_columns).copy() - numeric_columns = [column for column in display_columns if column not in {"strategy", "market", "model"}] + if bool(table["proxy_only"].fillna(False).all()): + table = table.rename( + columns={ + "beta_smb": "beta_smb_proxy", + "beta_hml": "beta_hml_proxy", + "beta_rmw": "beta_rmw_proxy", + "beta_cma": "beta_cma_proxy", + } + ) + numeric_columns = [ + column + for column in table.columns + if column not in {"strategy", "market", "model", "factor_source", "proxy_only"} + ] table.loc[:, numeric_columns] = table.loc[:, numeric_columns].round(4) print("\nFactor attribution") diff --git a/tests/test_factor_attribution.py b/tests/test_factor_attribution.py index b1eeb4d..d172ed7 100644 --- a/tests/test_factor_attribution.py +++ b/tests/test_factor_attribution.py @@ -655,10 +655,6 @@ class AttributionIntegrationTests(unittest.TestCase): "beta_hml", "beta_rmw", "beta_cma", - "beta_smb_proxy", - "beta_hml_proxy", - "beta_rmw_proxy", - "beta_cma_proxy", "beta_mom", "beta_lowvol", "beta_recovery", @@ -671,7 +667,6 @@ class AttributionIntegrationTests(unittest.TestCase): self.assertAlmostEqual(summary.loc[0, "beta_mkt"], 1.10, places=3) self.assertAlmostEqual(summary.loc[0, "beta_smb"], -0.25, places=3) self.assertAlmostEqual(summary.loc[0, "beta_hml"], 0.35, places=3) - self.assertTrue(np.isnan(summary.loc[0, "beta_smb_proxy"])) self.assertTrue(np.isnan(summary.loc[0, "beta_mom"])) self.assertListEqual( @@ -709,14 +704,14 @@ class AttributionIntegrationTests(unittest.TestCase): self.assertEqual(summary.loc[0, "model"], "proxy") self.assertEqual(summary.loc[0, "factor_source"], "proxy_only") self.assertTrue(bool(summary.loc[0, "proxy_only"])) - self.assertIn("beta_smb_proxy", summary.columns) - self.assertIn("beta_hml_proxy", summary.columns) - self.assertIn("beta_rmw_proxy", summary.columns) - self.assertIn("beta_cma_proxy", summary.columns) - self.assertTrue(np.isnan(summary.loc[0, "beta_smb"])) - self.assertTrue(np.isnan(summary.loc[0, "beta_hml"])) - self.assertTrue(np.isnan(summary.loc[0, "beta_rmw"])) - self.assertTrue(np.isnan(summary.loc[0, "beta_cma"])) + self.assertNotIn("beta_smb_proxy", summary.columns) + self.assertNotIn("beta_hml_proxy", summary.columns) + self.assertNotIn("beta_rmw_proxy", summary.columns) + self.assertNotIn("beta_cma_proxy", summary.columns) + self.assertFalse(np.isnan(summary.loc[0, "beta_smb"])) + self.assertFalse(np.isnan(summary.loc[0, "beta_hml"])) + self.assertFalse(np.isnan(summary.loc[0, "beta_rmw"])) + self.assertFalse(np.isnan(summary.loc[0, "beta_cma"])) self.assertEqual( set(loadings["factor"]), {"MKT", "SMB_PROXY", "HML_PROXY", "RMW_PROXY", "CMA_PROXY", "MOM", "LOWVOL", "RECOVERY"}, @@ -818,14 +813,10 @@ class AttributionIntegrationTests(unittest.TestCase): "adj_r_squared": 0.70, "residual_vol_ann": 0.14, "beta_mkt": 0.85, - "beta_smb": np.nan, - "beta_hml": np.nan, - "beta_rmw": np.nan, - "beta_cma": np.nan, - "beta_smb_proxy": -0.30, - "beta_hml_proxy": 0.25, - "beta_rmw_proxy": 0.10, - "beta_cma_proxy": -0.05, + "beta_smb": -0.30, + "beta_hml": 0.25, + "beta_rmw": 0.10, + "beta_cma": -0.05, "beta_mom": 0.20, "beta_lowvol": np.nan, "beta_recovery": np.nan, @@ -841,6 +832,7 @@ class AttributionIntegrationTests(unittest.TestCase): self.assertIn("beta_smb_proxy", output) self.assertIn("beta_hml_proxy", output) self.assertIn("SMB_PROXY", output) + self.assertNotIn(" beta_smb ", output) def _make_price_frame(self, dates: pd.DatetimeIndex, benchmark: str) -> pd.DataFrame: steps = np.arange(len(dates), dtype=float)