Reject empty attribution semantics headers

This commit is contained in:
2026-04-07 18:05:44 +08:00
parent 88d765713e
commit a66b039d2d
2 changed files with 109 additions and 0 deletions

View File

@@ -490,6 +490,8 @@ def _beta_header_name(factor_name: str) -> str:
def _semantics_have_unique_headers(semantics: dict[str, str]) -> bool:
headers = [_beta_header_name(semantics[column]) for column in SEMANTIC_BETA_COLUMNS]
if any(header == "beta_" for header in headers):
return False
return len(headers) == len(set(headers))

View File

@@ -1123,6 +1123,113 @@ class AttributionIntegrationTests(unittest.TestCase):
self.assertNotIn("beta_a_b", output)
self.assertNotIn("A B -0.30", output)
def test_print_attribution_summary_ignores_punctuation_only_beta_semantics_labels(self):
summary = pd.DataFrame(
[
{
"strategy": "CN Strategy",
"market": "cn",
"model": "proxy",
"factor_source": "proxy_only",
"proxy_only": True,
"beta_semantics": json.dumps(
{
"beta_mkt": "!!!",
"beta_smb": "SIZE_PROXY_CUSTOM",
"beta_hml": "VALUE_PROXY_CUSTOM",
"beta_rmw": "QUALITY_PROXY_CUSTOM",
"beta_cma": "INVEST_PROXY_CUSTOM",
"beta_mom": "TREND",
"beta_lowvol": "DEFENSIVE",
"beta_recovery": "RECOVERY_PROXY",
}
),
"start_date": "2025-01-02",
"end_date": "2026-03-24",
"n_obs": 319,
"alpha_daily": 0.0002,
"alpha_ann": 0.0504,
"alpha_t_stat": 1.5,
"alpha_p_value": 0.12,
"r_squared": 0.72,
"adj_r_squared": 0.70,
"residual_vol_ann": 0.14,
"beta_mkt": 0.85,
"beta_smb": -0.30,
"beta_hml": 0.25,
"beta_rmw": 0.10,
"beta_cma": -0.05,
"beta_mom": 0.20,
"beta_lowvol": np.nan,
"beta_recovery": np.nan,
}
]
)
buffer = io.StringIO()
with contextlib.redirect_stdout(buffer):
print_attribution_summary(summary)
output = buffer.getvalue()
self.assertIn("beta_smb_proxy", output)
self.assertIn("SMB_PROXY -0.30", output)
self.assertIn("MKT 0.85", output)
self.assertNotIn("beta_size_proxy_custom", output)
self.assertNotIn("!!! 0.85", output)
def test_print_attribution_summary_ignores_labels_that_normalize_to_empty_suffix(self):
summary = pd.DataFrame(
[
{
"strategy": "US Strategy",
"market": "us",
"model": "ff5",
"factor_source": "external+local",
"proxy_only": False,
"beta_semantics": json.dumps(
{
"beta_mkt": "___",
"beta_smb": "SIZE",
"beta_hml": "VALUE",
"beta_rmw": "QUALITY",
"beta_cma": "INVESTMENT",
"beta_mom": "MOMENTUM",
"beta_lowvol": "MINVOL",
"beta_recovery": "BOUNCE",
}
),
"start_date": "2025-01-02",
"end_date": "2026-03-24",
"n_obs": 319,
"alpha_daily": 0.0004,
"alpha_ann": 0.1008,
"alpha_t_stat": 2.1,
"alpha_p_value": 0.04,
"r_squared": 0.82,
"adj_r_squared": 0.81,
"residual_vol_ann": 0.12,
"beta_mkt": 1.05,
"beta_smb": -0.20,
"beta_hml": 0.30,
"beta_rmw": 0.05,
"beta_cma": np.nan,
"beta_mom": np.nan,
"beta_lowvol": np.nan,
"beta_recovery": np.nan,
}
]
)
buffer = io.StringIO()
with contextlib.redirect_stdout(buffer):
print_attribution_summary(summary)
output = buffer.getvalue()
self.assertIn("beta_smb", output)
self.assertIn("MKT_RF 1.05", output)
self.assertNotIn("beta_size", output)
self.assertNotIn("___ 1.05", output)
def test_print_attribution_summary_splits_standard_and_proxy_sections_for_mixed_frames(self):
summary = pd.DataFrame(
[