Restore summary schema for proxy attribution

This commit is contained in:
2026-04-07 17:25:10 +08:00
parent 69a03f52d9
commit 82a3e63c2b
2 changed files with 58 additions and 40 deletions

View File

@@ -37,13 +37,13 @@ SUMMARY_BETA_COLUMN_BY_FACTOR = {
"MKT_RF": "beta_mkt", "MKT_RF": "beta_mkt",
"MKT": "beta_mkt", "MKT": "beta_mkt",
"SMB": "beta_smb", "SMB": "beta_smb",
"SMB_PROXY": "beta_smb_proxy", "SMB_PROXY": "beta_smb",
"HML": "beta_hml", "HML": "beta_hml",
"HML_PROXY": "beta_hml_proxy", "HML_PROXY": "beta_hml",
"RMW": "beta_rmw", "RMW": "beta_rmw",
"RMW_PROXY": "beta_rmw_proxy", "RMW_PROXY": "beta_rmw",
"CMA": "beta_cma", "CMA": "beta_cma",
"CMA_PROXY": "beta_cma_proxy", "CMA_PROXY": "beta_cma",
"MOM": "beta_mom", "MOM": "beta_mom",
"LOWVOL": "beta_lowvol", "LOWVOL": "beta_lowvol",
"RECOVERY": "beta_recovery", "RECOVERY": "beta_recovery",
@@ -69,10 +69,6 @@ SUMMARY_COLUMNS = [
"beta_hml", "beta_hml",
"beta_rmw", "beta_rmw",
"beta_cma", "beta_cma",
"beta_smb_proxy",
"beta_hml_proxy",
"beta_rmw_proxy",
"beta_cma_proxy",
"beta_mom", "beta_mom",
"beta_lowvol", "beta_lowvol",
"beta_recovery", "beta_recovery",
@@ -526,10 +522,6 @@ def attribute_strategies(
"beta_hml": np.nan, "beta_hml": np.nan,
"beta_rmw": np.nan, "beta_rmw": np.nan,
"beta_cma": np.nan, "beta_cma": np.nan,
"beta_smb_proxy": np.nan,
"beta_hml_proxy": np.nan,
"beta_rmw_proxy": np.nan,
"beta_cma_proxy": np.nan,
"beta_mom": np.nan, "beta_mom": np.nan,
"beta_lowvol": np.nan, "beta_lowvol": np.nan,
"beta_recovery": np.nan, "beta_recovery": np.nan,
@@ -588,11 +580,34 @@ def _describe_fit(r_squared: float) -> str:
def _top_loading_descriptions(row: pd.Series, limit: int = 2) -> str: def _top_loading_descriptions(row: pd.Series, limit: int = 2) -> str:
beta_columns = [column for column in row.index if column.startswith("beta_")] beta_columns = [column for column in row.index if column.startswith("beta_")]
if bool(row.get("proxy_only", False)):
factor_labels = {
"beta_mkt": "MKT",
"beta_smb": "SMB_PROXY",
"beta_hml": "HML_PROXY",
"beta_rmw": "RMW_PROXY",
"beta_cma": "CMA_PROXY",
"beta_mom": "MOM",
"beta_lowvol": "LOWVOL",
"beta_recovery": "RECOVERY",
}
else:
factor_labels = {
"beta_mkt": "MKT",
"beta_smb": "SMB",
"beta_hml": "HML",
"beta_rmw": "RMW",
"beta_cma": "CMA",
"beta_mom": "MOM",
"beta_lowvol": "LOWVOL",
"beta_recovery": "RECOVERY",
}
present = [] present = []
for column in beta_columns: for column in beta_columns:
value = row.get(column) value = row.get(column)
if pd.notna(value): label = factor_labels.get(column)
present.append((column.removeprefix("beta_").upper(), float(value))) if label is not None and pd.notna(value):
present.append((label, float(value)))
if not present: if not present:
return "no material factor loadings were estimated" return "no material factor loadings were estimated"
@@ -610,6 +625,8 @@ def print_attribution_summary(summary_df: pd.DataFrame) -> None:
"strategy", "strategy",
"market", "market",
"model", "model",
"factor_source",
"proxy_only",
"alpha_ann", "alpha_ann",
"r_squared", "r_squared",
"residual_vol_ann", "residual_vol_ann",
@@ -618,16 +635,25 @@ def print_attribution_summary(summary_df: pd.DataFrame) -> None:
"beta_hml", "beta_hml",
"beta_rmw", "beta_rmw",
"beta_cma", "beta_cma",
"beta_smb_proxy",
"beta_hml_proxy",
"beta_rmw_proxy",
"beta_cma_proxy",
"beta_mom", "beta_mom",
"beta_lowvol", "beta_lowvol",
"beta_recovery", "beta_recovery",
] ]
table = summary_df.reindex(columns=display_columns).copy() table = summary_df.reindex(columns=display_columns).copy()
numeric_columns = [column for column in display_columns if column not in {"strategy", "market", "model"}] if bool(table["proxy_only"].fillna(False).all()):
table = table.rename(
columns={
"beta_smb": "beta_smb_proxy",
"beta_hml": "beta_hml_proxy",
"beta_rmw": "beta_rmw_proxy",
"beta_cma": "beta_cma_proxy",
}
)
numeric_columns = [
column
for column in table.columns
if column not in {"strategy", "market", "model", "factor_source", "proxy_only"}
]
table.loc[:, numeric_columns] = table.loc[:, numeric_columns].round(4) table.loc[:, numeric_columns] = table.loc[:, numeric_columns].round(4)
print("\nFactor attribution") print("\nFactor attribution")

View File

@@ -655,10 +655,6 @@ class AttributionIntegrationTests(unittest.TestCase):
"beta_hml", "beta_hml",
"beta_rmw", "beta_rmw",
"beta_cma", "beta_cma",
"beta_smb_proxy",
"beta_hml_proxy",
"beta_rmw_proxy",
"beta_cma_proxy",
"beta_mom", "beta_mom",
"beta_lowvol", "beta_lowvol",
"beta_recovery", "beta_recovery",
@@ -671,7 +667,6 @@ class AttributionIntegrationTests(unittest.TestCase):
self.assertAlmostEqual(summary.loc[0, "beta_mkt"], 1.10, places=3) self.assertAlmostEqual(summary.loc[0, "beta_mkt"], 1.10, places=3)
self.assertAlmostEqual(summary.loc[0, "beta_smb"], -0.25, places=3) self.assertAlmostEqual(summary.loc[0, "beta_smb"], -0.25, places=3)
self.assertAlmostEqual(summary.loc[0, "beta_hml"], 0.35, places=3) self.assertAlmostEqual(summary.loc[0, "beta_hml"], 0.35, places=3)
self.assertTrue(np.isnan(summary.loc[0, "beta_smb_proxy"]))
self.assertTrue(np.isnan(summary.loc[0, "beta_mom"])) self.assertTrue(np.isnan(summary.loc[0, "beta_mom"]))
self.assertListEqual( self.assertListEqual(
@@ -709,14 +704,14 @@ class AttributionIntegrationTests(unittest.TestCase):
self.assertEqual(summary.loc[0, "model"], "proxy") self.assertEqual(summary.loc[0, "model"], "proxy")
self.assertEqual(summary.loc[0, "factor_source"], "proxy_only") self.assertEqual(summary.loc[0, "factor_source"], "proxy_only")
self.assertTrue(bool(summary.loc[0, "proxy_only"])) self.assertTrue(bool(summary.loc[0, "proxy_only"]))
self.assertIn("beta_smb_proxy", summary.columns) self.assertNotIn("beta_smb_proxy", summary.columns)
self.assertIn("beta_hml_proxy", summary.columns) self.assertNotIn("beta_hml_proxy", summary.columns)
self.assertIn("beta_rmw_proxy", summary.columns) self.assertNotIn("beta_rmw_proxy", summary.columns)
self.assertIn("beta_cma_proxy", summary.columns) self.assertNotIn("beta_cma_proxy", summary.columns)
self.assertTrue(np.isnan(summary.loc[0, "beta_smb"])) self.assertFalse(np.isnan(summary.loc[0, "beta_smb"]))
self.assertTrue(np.isnan(summary.loc[0, "beta_hml"])) self.assertFalse(np.isnan(summary.loc[0, "beta_hml"]))
self.assertTrue(np.isnan(summary.loc[0, "beta_rmw"])) self.assertFalse(np.isnan(summary.loc[0, "beta_rmw"]))
self.assertTrue(np.isnan(summary.loc[0, "beta_cma"])) self.assertFalse(np.isnan(summary.loc[0, "beta_cma"]))
self.assertEqual( self.assertEqual(
set(loadings["factor"]), set(loadings["factor"]),
{"MKT", "SMB_PROXY", "HML_PROXY", "RMW_PROXY", "CMA_PROXY", "MOM", "LOWVOL", "RECOVERY"}, {"MKT", "SMB_PROXY", "HML_PROXY", "RMW_PROXY", "CMA_PROXY", "MOM", "LOWVOL", "RECOVERY"},
@@ -818,14 +813,10 @@ class AttributionIntegrationTests(unittest.TestCase):
"adj_r_squared": 0.70, "adj_r_squared": 0.70,
"residual_vol_ann": 0.14, "residual_vol_ann": 0.14,
"beta_mkt": 0.85, "beta_mkt": 0.85,
"beta_smb": np.nan, "beta_smb": -0.30,
"beta_hml": np.nan, "beta_hml": 0.25,
"beta_rmw": np.nan, "beta_rmw": 0.10,
"beta_cma": np.nan, "beta_cma": -0.05,
"beta_smb_proxy": -0.30,
"beta_hml_proxy": 0.25,
"beta_rmw_proxy": 0.10,
"beta_cma_proxy": -0.05,
"beta_mom": 0.20, "beta_mom": 0.20,
"beta_lowvol": np.nan, "beta_lowvol": np.nan,
"beta_recovery": np.nan, "beta_recovery": np.nan,
@@ -841,6 +832,7 @@ class AttributionIntegrationTests(unittest.TestCase):
self.assertIn("beta_smb_proxy", output) self.assertIn("beta_smb_proxy", output)
self.assertIn("beta_hml_proxy", output) self.assertIn("beta_hml_proxy", output)
self.assertIn("SMB_PROXY", output) self.assertIn("SMB_PROXY", output)
self.assertNotIn(" beta_smb ", output)
def _make_price_frame(self, dates: pd.DatetimeIndex, benchmark: str) -> pd.DataFrame: def _make_price_frame(self, dates: pd.DatetimeIndex, benchmark: str) -> pd.DataFrame:
steps = np.arange(len(dates), dtype=float) steps = np.arange(len(dates), dtype=float)