From a66b039d2d755edb4040f2310fc92780e9c7fa99 Mon Sep 17 00:00:00 2001 From: Gahow Wang Date: Tue, 7 Apr 2026 18:05:44 +0800 Subject: [PATCH] Reject empty attribution semantics headers --- factor_attribution.py | 2 + tests/test_factor_attribution.py | 107 +++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+) diff --git a/factor_attribution.py b/factor_attribution.py index f042302..5e73c79 100644 --- a/factor_attribution.py +++ b/factor_attribution.py @@ -490,6 +490,8 @@ def _beta_header_name(factor_name: str) -> str: def _semantics_have_unique_headers(semantics: dict[str, str]) -> bool: headers = [_beta_header_name(semantics[column]) for column in SEMANTIC_BETA_COLUMNS] + if any(header == "beta_" for header in headers): + return False return len(headers) == len(set(headers)) diff --git a/tests/test_factor_attribution.py b/tests/test_factor_attribution.py index f418e7d..06ddbac 100644 --- a/tests/test_factor_attribution.py +++ b/tests/test_factor_attribution.py @@ -1123,6 +1123,113 @@ class AttributionIntegrationTests(unittest.TestCase): self.assertNotIn("beta_a_b", output) self.assertNotIn("A B -0.30", output) + def test_print_attribution_summary_ignores_punctuation_only_beta_semantics_labels(self): + summary = pd.DataFrame( + [ + { + "strategy": "CN Strategy", + "market": "cn", + "model": "proxy", + "factor_source": "proxy_only", + "proxy_only": True, + "beta_semantics": json.dumps( + { + "beta_mkt": "!!!", + "beta_smb": "SIZE_PROXY_CUSTOM", + "beta_hml": "VALUE_PROXY_CUSTOM", + "beta_rmw": "QUALITY_PROXY_CUSTOM", + "beta_cma": "INVEST_PROXY_CUSTOM", + "beta_mom": "TREND", + "beta_lowvol": "DEFENSIVE", + "beta_recovery": "RECOVERY_PROXY", + } + ), + "start_date": "2025-01-02", + "end_date": "2026-03-24", + "n_obs": 319, + "alpha_daily": 0.0002, + "alpha_ann": 0.0504, + "alpha_t_stat": 1.5, + "alpha_p_value": 0.12, + "r_squared": 0.72, + "adj_r_squared": 0.70, + "residual_vol_ann": 0.14, + "beta_mkt": 0.85, + "beta_smb": -0.30, + "beta_hml": 0.25, + "beta_rmw": 0.10, + "beta_cma": -0.05, + "beta_mom": 0.20, + "beta_lowvol": np.nan, + "beta_recovery": np.nan, + } + ] + ) + + buffer = io.StringIO() + with contextlib.redirect_stdout(buffer): + print_attribution_summary(summary) + + output = buffer.getvalue() + self.assertIn("beta_smb_proxy", output) + self.assertIn("SMB_PROXY -0.30", output) + self.assertIn("MKT 0.85", output) + self.assertNotIn("beta_size_proxy_custom", output) + self.assertNotIn("!!! 0.85", output) + + def test_print_attribution_summary_ignores_labels_that_normalize_to_empty_suffix(self): + summary = pd.DataFrame( + [ + { + "strategy": "US Strategy", + "market": "us", + "model": "ff5", + "factor_source": "external+local", + "proxy_only": False, + "beta_semantics": json.dumps( + { + "beta_mkt": "___", + "beta_smb": "SIZE", + "beta_hml": "VALUE", + "beta_rmw": "QUALITY", + "beta_cma": "INVESTMENT", + "beta_mom": "MOMENTUM", + "beta_lowvol": "MINVOL", + "beta_recovery": "BOUNCE", + } + ), + "start_date": "2025-01-02", + "end_date": "2026-03-24", + "n_obs": 319, + "alpha_daily": 0.0004, + "alpha_ann": 0.1008, + "alpha_t_stat": 2.1, + "alpha_p_value": 0.04, + "r_squared": 0.82, + "adj_r_squared": 0.81, + "residual_vol_ann": 0.12, + "beta_mkt": 1.05, + "beta_smb": -0.20, + "beta_hml": 0.30, + "beta_rmw": 0.05, + "beta_cma": np.nan, + "beta_mom": np.nan, + "beta_lowvol": np.nan, + "beta_recovery": np.nan, + } + ] + ) + + buffer = io.StringIO() + with contextlib.redirect_stdout(buffer): + print_attribution_summary(summary) + + output = buffer.getvalue() + self.assertIn("beta_smb", output) + self.assertIn("MKT_RF 1.05", output) + self.assertNotIn("beta_size", output) + self.assertNotIn("___ 1.05", output) + def test_print_attribution_summary_splits_standard_and_proxy_sections_for_mixed_frames(self): summary = pd.DataFrame( [