From 35a91ba6cc94088677cfa5c79447b804aa1e6f93 Mon Sep 17 00:00:00 2001 From: Gahow Wang Date: Tue, 7 Apr 2026 17:51:57 +0800 Subject: [PATCH] Honor complete attribution beta semantics labels --- factor_attribution.py | 56 ++++++++++--------- tests/test_factor_attribution.py | 94 ++++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+), 26 deletions(-) diff --git a/factor_attribution.py b/factor_attribution.py index a09c3b0..47979e8 100644 --- a/factor_attribution.py +++ b/factor_attribution.py @@ -3,6 +3,7 @@ from __future__ import annotations import json import http.client import io +import re import socket import ssl import warnings @@ -472,25 +473,33 @@ def _resolve_beta_semantics(row: pd.Series) -> dict[str, str]: else: if isinstance(parsed, dict): parsed_mapping = {str(key): str(value) for key, value in parsed.items()} - if set(parsed_mapping) == set(SEMANTIC_BETA_COLUMNS) and parsed_mapping == canonical: + if set(parsed_mapping) == set(SEMANTIC_BETA_COLUMNS) and all( + value.strip() for value in parsed_mapping.values() + ): return parsed_mapping return canonical -def _section_beta_header_map(summary_df: pd.DataFrame) -> dict[str, str]: - if summary_df.empty: - return {} +def _beta_header_name(factor_name: str) -> str: + suffix = factor_name.strip().lower() + suffix = re.sub(r"[^a-z0-9]+", "_", suffix).strip("_") + if suffix == "mkt_rf": + suffix = "mkt" + return f"beta_{suffix}" - semantics = _resolve_beta_semantics(summary_df.iloc[0]) + +def _section_beta_header_map(semantics: dict[str, str]) -> dict[str, str]: header_map: dict[str, str] = {} for beta_column, factor_name in semantics.items(): - suffix = factor_name.lower() - if suffix == "mkt_rf": - suffix = "mkt" - header_map[beta_column] = f"beta_{suffix}" + header_map[beta_column] = _beta_header_name(factor_name) return header_map +def _section_key(row: pd.Series) -> tuple[bool, tuple[tuple[str, str], ...]]: + semantics = _resolve_beta_semantics(row) + return bool(row.get("proxy_only", False)), tuple((key, semantics[key]) for key in SEMANTIC_BETA_COLUMNS) + + def attribute_strategies( results_df: pd.DataFrame, benchmark_label: str, @@ -651,7 +660,7 @@ def _top_loading_descriptions(row: pd.Series, limit: int = 2) -> str: return ", ".join(f"{name} {value:.2f}" for name, value in top_loadings) -def _print_attribution_section(summary_df: pd.DataFrame, title: str, proxy_labels: bool) -> None: +def _print_attribution_section(summary_df: pd.DataFrame, title: str, semantics: dict[str, str]) -> None: display_columns = [ "strategy", "market", @@ -671,8 +680,7 @@ def _print_attribution_section(summary_df: pd.DataFrame, title: str, proxy_label "beta_recovery", ] table = summary_df.reindex(columns=display_columns).copy() - del proxy_labels - table = table.rename(columns=_section_beta_header_map(summary_df)) + table = table.rename(columns=_section_beta_header_map(semantics)) numeric_columns = [ column for column in table.columns @@ -689,22 +697,18 @@ def print_attribution_summary(summary_df: pd.DataFrame) -> None: print("Factor attribution: no usable regressions were produced.") return - proxy_mask = summary_df["proxy_only"].fillna(False).astype(bool) - standard_rows = summary_df.loc[~proxy_mask] - proxy_rows = summary_df.loc[proxy_mask] - print("\nFactor attribution") - if not standard_rows.empty: + sections: dict[tuple[bool, tuple[tuple[str, str], ...]], list[int]] = {} + for index, row in summary_df.iterrows(): + sections.setdefault(_section_key(row), []).append(index) + + for (is_proxy, semantics_items), row_indexes in sections.items(): + section_rows = summary_df.loc[row_indexes] + title = "Proxy factor attribution" if is_proxy else "Standard factor attribution" _print_attribution_section( - standard_rows, - title="Standard factor attribution", - proxy_labels=False, - ) - if not proxy_rows.empty: - _print_attribution_section( - proxy_rows, - title="Proxy factor attribution", - proxy_labels=True, + section_rows, + title=title, + semantics=dict(semantics_items), ) print("\nInterpretation") for _, row in summary_df.iterrows(): diff --git a/tests/test_factor_attribution.py b/tests/test_factor_attribution.py index cb97aef..1a52991 100644 --- a/tests/test_factor_attribution.py +++ b/tests/test_factor_attribution.py @@ -923,6 +923,100 @@ class AttributionIntegrationTests(unittest.TestCase): self.assertIn("SMB_PROXY", output) self.assertNotIn(" beta_smb ", output) + def test_print_attribution_summary_honors_complete_noncanonical_beta_semantics(self): + summary = pd.DataFrame( + [ + { + "strategy": "US Strategy", + "market": "us", + "model": "ff5", + "factor_source": "external+local", + "proxy_only": False, + "beta_semantics": json.dumps( + { + "beta_mkt": "MARKET_EXCESS", + "beta_smb": "SIZE", + "beta_hml": "VALUE", + "beta_rmw": "QUALITY", + "beta_cma": "INVESTMENT", + "beta_mom": "MOMENTUM", + "beta_lowvol": "MINVOL", + "beta_recovery": "BOUNCE", + } + ), + "start_date": "2025-01-02", + "end_date": "2026-03-24", + "n_obs": 319, + "alpha_daily": 0.0004, + "alpha_ann": 0.1008, + "alpha_t_stat": 2.1, + "alpha_p_value": 0.04, + "r_squared": 0.82, + "adj_r_squared": 0.81, + "residual_vol_ann": 0.12, + "beta_mkt": 1.05, + "beta_smb": -0.20, + "beta_hml": 0.30, + "beta_rmw": 0.05, + "beta_cma": np.nan, + "beta_mom": np.nan, + "beta_lowvol": np.nan, + "beta_recovery": np.nan, + }, + { + "strategy": "CN Strategy", + "market": "cn", + "model": "proxy", + "factor_source": "proxy_only", + "proxy_only": True, + "beta_semantics": json.dumps( + { + "beta_mkt": "LOCAL_MARKET", + "beta_smb": "SIZE_PROXY_CUSTOM", + "beta_hml": "VALUE_PROXY_CUSTOM", + "beta_rmw": "QUALITY_PROXY_CUSTOM", + "beta_cma": "INVEST_PROXY_CUSTOM", + "beta_mom": "TREND", + "beta_lowvol": "DEFENSIVE", + "beta_recovery": "RECOVERY_PROXY", + } + ), + "start_date": "2025-01-02", + "end_date": "2026-03-24", + "n_obs": 319, + "alpha_daily": 0.0002, + "alpha_ann": 0.0504, + "alpha_t_stat": 1.5, + "alpha_p_value": 0.12, + "r_squared": 0.72, + "adj_r_squared": 0.70, + "residual_vol_ann": 0.14, + "beta_mkt": 0.85, + "beta_smb": -0.30, + "beta_hml": 0.25, + "beta_rmw": 0.10, + "beta_cma": -0.05, + "beta_mom": 0.20, + "beta_lowvol": np.nan, + "beta_recovery": np.nan, + }, + ] + ) + + buffer = io.StringIO() + with contextlib.redirect_stdout(buffer): + print_attribution_summary(summary) + + output = buffer.getvalue() + self.assertIn("Standard factor attribution", output) + self.assertIn("Proxy factor attribution", output) + self.assertIn("beta_market_excess", output) + self.assertIn("beta_size_proxy_custom", output) + self.assertIn("MARKET_EXCESS 1.05", output) + self.assertIn("SIZE_PROXY_CUSTOM -0.30", output) + self.assertNotIn("MKT_RF 1.05", output) + self.assertNotIn("SMB_PROXY -0.30", output) + def test_print_attribution_summary_splits_standard_and_proxy_sections_for_mixed_frames(self): summary = pd.DataFrame( [