diff --git a/factor_attribution.py b/factor_attribution.py index 47979e8..f042302 100644 --- a/factor_attribution.py +++ b/factor_attribution.py @@ -475,7 +475,7 @@ def _resolve_beta_semantics(row: pd.Series) -> dict[str, str]: parsed_mapping = {str(key): str(value) for key, value in parsed.items()} if set(parsed_mapping) == set(SEMANTIC_BETA_COLUMNS) and all( value.strip() for value in parsed_mapping.values() - ): + ) and _semantics_have_unique_headers(parsed_mapping): return parsed_mapping return canonical @@ -488,6 +488,11 @@ def _beta_header_name(factor_name: str) -> str: return f"beta_{suffix}" +def _semantics_have_unique_headers(semantics: dict[str, str]) -> bool: + headers = [_beta_header_name(semantics[column]) for column in SEMANTIC_BETA_COLUMNS] + return len(headers) == len(set(headers)) + + def _section_beta_header_map(semantics: dict[str, str]) -> dict[str, str]: header_map: dict[str, str] = {} for beta_column, factor_name in semantics.items(): diff --git a/tests/test_factor_attribution.py b/tests/test_factor_attribution.py index 1a52991..f418e7d 100644 --- a/tests/test_factor_attribution.py +++ b/tests/test_factor_attribution.py @@ -1017,6 +1017,112 @@ class AttributionIntegrationTests(unittest.TestCase): self.assertNotIn("MKT_RF 1.05", output) self.assertNotIn("SMB_PROXY -0.30", output) + def test_print_attribution_summary_ignores_duplicate_beta_semantics_labels(self): + summary = pd.DataFrame( + [ + { + "strategy": "US Strategy", + "market": "us", + "model": "ff5", + "factor_source": "external+local", + "proxy_only": False, + "beta_semantics": json.dumps( + { + "beta_mkt": "DUPLICATE", + "beta_smb": "DUPLICATE", + "beta_hml": "VALUE", + "beta_rmw": "QUALITY", + "beta_cma": "INVESTMENT", + "beta_mom": "MOMENTUM", + "beta_lowvol": "MINVOL", + "beta_recovery": "BOUNCE", + } + ), + "start_date": "2025-01-02", + "end_date": "2026-03-24", + "n_obs": 319, + "alpha_daily": 0.0004, + "alpha_ann": 0.1008, + "alpha_t_stat": 2.1, + "alpha_p_value": 0.04, + "r_squared": 0.82, + "adj_r_squared": 0.81, + "residual_vol_ann": 0.12, + "beta_mkt": 1.05, + "beta_smb": -0.20, + "beta_hml": 0.30, + "beta_rmw": 0.05, + "beta_cma": np.nan, + "beta_mom": np.nan, + "beta_lowvol": np.nan, + "beta_recovery": np.nan, + } + ] + ) + + buffer = io.StringIO() + with contextlib.redirect_stdout(buffer): + print_attribution_summary(summary) + + output = buffer.getvalue() + self.assertIn("beta_smb", output) + self.assertIn("MKT_RF 1.05", output) + self.assertNotIn("beta_duplicate", output) + self.assertNotIn("DUPLICATE 1.05", output) + + def test_print_attribution_summary_ignores_colliding_normalized_beta_semantics_labels(self): + summary = pd.DataFrame( + [ + { + "strategy": "CN Strategy", + "market": "cn", + "model": "proxy", + "factor_source": "proxy_only", + "proxy_only": True, + "beta_semantics": json.dumps( + { + "beta_mkt": "A-B", + "beta_smb": "A B", + "beta_hml": "VALUE_PROXY_CUSTOM", + "beta_rmw": "QUALITY_PROXY_CUSTOM", + "beta_cma": "INVEST_PROXY_CUSTOM", + "beta_mom": "TREND", + "beta_lowvol": "DEFENSIVE", + "beta_recovery": "RECOVERY_PROXY", + } + ), + "start_date": "2025-01-02", + "end_date": "2026-03-24", + "n_obs": 319, + "alpha_daily": 0.0002, + "alpha_ann": 0.0504, + "alpha_t_stat": 1.5, + "alpha_p_value": 0.12, + "r_squared": 0.72, + "adj_r_squared": 0.70, + "residual_vol_ann": 0.14, + "beta_mkt": 0.85, + "beta_smb": -0.30, + "beta_hml": 0.25, + "beta_rmw": 0.10, + "beta_cma": -0.05, + "beta_mom": 0.20, + "beta_lowvol": np.nan, + "beta_recovery": np.nan, + } + ] + ) + + buffer = io.StringIO() + with contextlib.redirect_stdout(buffer): + print_attribution_summary(summary) + + output = buffer.getvalue() + self.assertIn("beta_smb_proxy", output) + self.assertIn("SMB_PROXY -0.30", output) + self.assertNotIn("beta_a_b", output) + self.assertNotIn("A B -0.30", output) + def test_print_attribution_summary_splits_standard_and_proxy_sections_for_mixed_frames(self): summary = pd.DataFrame( [