Reject colliding attribution semantics headers
This commit is contained in:
@@ -475,7 +475,7 @@ def _resolve_beta_semantics(row: pd.Series) -> dict[str, str]:
|
|||||||
parsed_mapping = {str(key): str(value) for key, value in parsed.items()}
|
parsed_mapping = {str(key): str(value) for key, value in parsed.items()}
|
||||||
if set(parsed_mapping) == set(SEMANTIC_BETA_COLUMNS) and all(
|
if set(parsed_mapping) == set(SEMANTIC_BETA_COLUMNS) and all(
|
||||||
value.strip() for value in parsed_mapping.values()
|
value.strip() for value in parsed_mapping.values()
|
||||||
):
|
) and _semantics_have_unique_headers(parsed_mapping):
|
||||||
return parsed_mapping
|
return parsed_mapping
|
||||||
return canonical
|
return canonical
|
||||||
|
|
||||||
@@ -488,6 +488,11 @@ def _beta_header_name(factor_name: str) -> str:
|
|||||||
return f"beta_{suffix}"
|
return f"beta_{suffix}"
|
||||||
|
|
||||||
|
|
||||||
|
def _semantics_have_unique_headers(semantics: dict[str, str]) -> bool:
|
||||||
|
headers = [_beta_header_name(semantics[column]) for column in SEMANTIC_BETA_COLUMNS]
|
||||||
|
return len(headers) == len(set(headers))
|
||||||
|
|
||||||
|
|
||||||
def _section_beta_header_map(semantics: dict[str, str]) -> dict[str, str]:
|
def _section_beta_header_map(semantics: dict[str, str]) -> dict[str, str]:
|
||||||
header_map: dict[str, str] = {}
|
header_map: dict[str, str] = {}
|
||||||
for beta_column, factor_name in semantics.items():
|
for beta_column, factor_name in semantics.items():
|
||||||
|
|||||||
@@ -1017,6 +1017,112 @@ class AttributionIntegrationTests(unittest.TestCase):
|
|||||||
self.assertNotIn("MKT_RF 1.05", output)
|
self.assertNotIn("MKT_RF 1.05", output)
|
||||||
self.assertNotIn("SMB_PROXY -0.30", output)
|
self.assertNotIn("SMB_PROXY -0.30", output)
|
||||||
|
|
||||||
|
def test_print_attribution_summary_ignores_duplicate_beta_semantics_labels(self):
|
||||||
|
summary = pd.DataFrame(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"strategy": "US Strategy",
|
||||||
|
"market": "us",
|
||||||
|
"model": "ff5",
|
||||||
|
"factor_source": "external+local",
|
||||||
|
"proxy_only": False,
|
||||||
|
"beta_semantics": json.dumps(
|
||||||
|
{
|
||||||
|
"beta_mkt": "DUPLICATE",
|
||||||
|
"beta_smb": "DUPLICATE",
|
||||||
|
"beta_hml": "VALUE",
|
||||||
|
"beta_rmw": "QUALITY",
|
||||||
|
"beta_cma": "INVESTMENT",
|
||||||
|
"beta_mom": "MOMENTUM",
|
||||||
|
"beta_lowvol": "MINVOL",
|
||||||
|
"beta_recovery": "BOUNCE",
|
||||||
|
}
|
||||||
|
),
|
||||||
|
"start_date": "2025-01-02",
|
||||||
|
"end_date": "2026-03-24",
|
||||||
|
"n_obs": 319,
|
||||||
|
"alpha_daily": 0.0004,
|
||||||
|
"alpha_ann": 0.1008,
|
||||||
|
"alpha_t_stat": 2.1,
|
||||||
|
"alpha_p_value": 0.04,
|
||||||
|
"r_squared": 0.82,
|
||||||
|
"adj_r_squared": 0.81,
|
||||||
|
"residual_vol_ann": 0.12,
|
||||||
|
"beta_mkt": 1.05,
|
||||||
|
"beta_smb": -0.20,
|
||||||
|
"beta_hml": 0.30,
|
||||||
|
"beta_rmw": 0.05,
|
||||||
|
"beta_cma": np.nan,
|
||||||
|
"beta_mom": np.nan,
|
||||||
|
"beta_lowvol": np.nan,
|
||||||
|
"beta_recovery": np.nan,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
buffer = io.StringIO()
|
||||||
|
with contextlib.redirect_stdout(buffer):
|
||||||
|
print_attribution_summary(summary)
|
||||||
|
|
||||||
|
output = buffer.getvalue()
|
||||||
|
self.assertIn("beta_smb", output)
|
||||||
|
self.assertIn("MKT_RF 1.05", output)
|
||||||
|
self.assertNotIn("beta_duplicate", output)
|
||||||
|
self.assertNotIn("DUPLICATE 1.05", output)
|
||||||
|
|
||||||
|
def test_print_attribution_summary_ignores_colliding_normalized_beta_semantics_labels(self):
|
||||||
|
summary = pd.DataFrame(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"strategy": "CN Strategy",
|
||||||
|
"market": "cn",
|
||||||
|
"model": "proxy",
|
||||||
|
"factor_source": "proxy_only",
|
||||||
|
"proxy_only": True,
|
||||||
|
"beta_semantics": json.dumps(
|
||||||
|
{
|
||||||
|
"beta_mkt": "A-B",
|
||||||
|
"beta_smb": "A B",
|
||||||
|
"beta_hml": "VALUE_PROXY_CUSTOM",
|
||||||
|
"beta_rmw": "QUALITY_PROXY_CUSTOM",
|
||||||
|
"beta_cma": "INVEST_PROXY_CUSTOM",
|
||||||
|
"beta_mom": "TREND",
|
||||||
|
"beta_lowvol": "DEFENSIVE",
|
||||||
|
"beta_recovery": "RECOVERY_PROXY",
|
||||||
|
}
|
||||||
|
),
|
||||||
|
"start_date": "2025-01-02",
|
||||||
|
"end_date": "2026-03-24",
|
||||||
|
"n_obs": 319,
|
||||||
|
"alpha_daily": 0.0002,
|
||||||
|
"alpha_ann": 0.0504,
|
||||||
|
"alpha_t_stat": 1.5,
|
||||||
|
"alpha_p_value": 0.12,
|
||||||
|
"r_squared": 0.72,
|
||||||
|
"adj_r_squared": 0.70,
|
||||||
|
"residual_vol_ann": 0.14,
|
||||||
|
"beta_mkt": 0.85,
|
||||||
|
"beta_smb": -0.30,
|
||||||
|
"beta_hml": 0.25,
|
||||||
|
"beta_rmw": 0.10,
|
||||||
|
"beta_cma": -0.05,
|
||||||
|
"beta_mom": 0.20,
|
||||||
|
"beta_lowvol": np.nan,
|
||||||
|
"beta_recovery": np.nan,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
buffer = io.StringIO()
|
||||||
|
with contextlib.redirect_stdout(buffer):
|
||||||
|
print_attribution_summary(summary)
|
||||||
|
|
||||||
|
output = buffer.getvalue()
|
||||||
|
self.assertIn("beta_smb_proxy", output)
|
||||||
|
self.assertIn("SMB_PROXY -0.30", output)
|
||||||
|
self.assertNotIn("beta_a_b", output)
|
||||||
|
self.assertNotIn("A B -0.30", output)
|
||||||
|
|
||||||
def test_print_attribution_summary_splits_standard_and_proxy_sections_for_mixed_frames(self):
|
def test_print_attribution_summary_splits_standard_and_proxy_sections_for_mixed_frames(self):
|
||||||
summary = pd.DataFrame(
|
summary = pd.DataFrame(
|
||||||
[
|
[
|
||||||
|
|||||||
Reference in New Issue
Block a user