Honor complete attribution beta semantics labels

2026-04-07 17:51:57 +08:00
parent b3d87b3d92
commit 35a91ba6cc
2 changed files with 124 additions and 26 deletions
--- a/factor_attribution.py
+++ b/factor_attribution.py
@@ -3,6 +3,7 @@ from __future__ import annotations
 import json
 import http.client
 import io
 import re
 import socket
 import ssl
 import warnings
@@ -472,25 +473,33 @@ def _resolve_beta_semantics(row: pd.Series) -> dict[str, str]:
        else:
            if isinstance(parsed, dict):
                parsed_mapping = {str(key): str(value) for key, value in parsed.items()}
-                if set(parsed_mapping) == set(SEMANTIC_BETA_COLUMNS) and parsed_mapping == canonical:
+                if set(parsed_mapping) == set(SEMANTIC_BETA_COLUMNS) and all(
                    value.strip() for value in parsed_mapping.values()
                ):
                    return parsed_mapping
    return canonical
-def _section_beta_header_map(summary_df: pd.DataFrame) -> dict[str, str]:
+def _beta_header_name(factor_name: str) -> str:
-    if summary_df.empty:
+    suffix = factor_name.strip().lower()
-        return {}
+    suffix = re.sub(r"[^a-z0-9]+", "_", suffix).strip("_")
    semantics = _resolve_beta_semantics(summary_df.iloc[0])
    header_map: dict[str, str] = {}
    for beta_column, factor_name in semantics.items():
        suffix = factor_name.lower()
    if suffix == "mkt_rf":
        suffix = "mkt"
-        header_map[beta_column] = f"beta_{suffix}"
+    return f"beta_{suffix}"
 def _section_beta_header_map(semantics: dict[str, str]) -> dict[str, str]:
    header_map: dict[str, str] = {}
    for beta_column, factor_name in semantics.items():
        header_map[beta_column] = _beta_header_name(factor_name)
    return header_map
 def _section_key(row: pd.Series) -> tuple[bool, tuple[tuple[str, str], ...]]:
    semantics = _resolve_beta_semantics(row)
    return bool(row.get("proxy_only", False)), tuple((key, semantics[key]) for key in SEMANTIC_BETA_COLUMNS)
 def attribute_strategies(
    results_df: pd.DataFrame,
    benchmark_label: str,
@@ -651,7 +660,7 @@ def _top_loading_descriptions(row: pd.Series, limit: int = 2) -> str:
    return ", ".join(f"{name} {value:.2f}" for name, value in top_loadings)
-def _print_attribution_section(summary_df: pd.DataFrame, title: str, proxy_labels: bool) -> None:
+def _print_attribution_section(summary_df: pd.DataFrame, title: str, semantics: dict[str, str]) -> None:
    display_columns = [
        "strategy",
        "market",
@@ -671,8 +680,7 @@ def _print_attribution_section(summary_df: pd.DataFrame, title: str, proxy_label
        "beta_recovery",
    ]
    table = summary_df.reindex(columns=display_columns).copy()
-    del proxy_labels
+    table = table.rename(columns=_section_beta_header_map(semantics))
    table = table.rename(columns=_section_beta_header_map(summary_df))
    numeric_columns = [
        column
        for column in table.columns
@@ -689,22 +697,18 @@ def print_attribution_summary(summary_df: pd.DataFrame) -> None:
        print("Factor attribution: no usable regressions were produced.")
        return
    proxy_mask = summary_df["proxy_only"].fillna(False).astype(bool)
    standard_rows = summary_df.loc[~proxy_mask]
    proxy_rows = summary_df.loc[proxy_mask]
    print("\nFactor attribution")
-    if not standard_rows.empty:
+    sections: dict[tuple[bool, tuple[tuple[str, str], ...]], list[int]] = {}
    for index, row in summary_df.iterrows():
        sections.setdefault(_section_key(row), []).append(index)
    for (is_proxy, semantics_items), row_indexes in sections.items():
        section_rows = summary_df.loc[row_indexes]
        title = "Proxy factor attribution" if is_proxy else "Standard factor attribution"
        _print_attribution_section(
-            standard_rows,
+            section_rows,
-            title="Standard factor attribution",
+            title=title,
-            proxy_labels=False,
+            semantics=dict(semantics_items),
        )
    if not proxy_rows.empty:
        _print_attribution_section(
            proxy_rows,
            title="Proxy factor attribution",
            proxy_labels=True,
        )
    print("\nInterpretation")
    for _, row in summary_df.iterrows():
--- a/tests/test_factor_attribution.py
+++ b/tests/test_factor_attribution.py
@@ -923,6 +923,100 @@ class AttributionIntegrationTests(unittest.TestCase):
        self.assertIn("SMB_PROXY", output)
        self.assertNotIn(" beta_smb ", output)
    def test_print_attribution_summary_honors_complete_noncanonical_beta_semantics(self):
        summary = pd.DataFrame(
            [
                {
                    "strategy": "US Strategy",
                    "market": "us",
                    "model": "ff5",
                    "factor_source": "external+local",
                    "proxy_only": False,
                    "beta_semantics": json.dumps(
                        {
                            "beta_mkt": "MARKET_EXCESS",
                            "beta_smb": "SIZE",
                            "beta_hml": "VALUE",
                            "beta_rmw": "QUALITY",
                            "beta_cma": "INVESTMENT",
                            "beta_mom": "MOMENTUM",
                            "beta_lowvol": "MINVOL",
                            "beta_recovery": "BOUNCE",
                        }
                    ),
                    "start_date": "2025-01-02",
                    "end_date": "2026-03-24",
                    "n_obs": 319,
                    "alpha_daily": 0.0004,
                    "alpha_ann": 0.1008,
                    "alpha_t_stat": 2.1,
                    "alpha_p_value": 0.04,
                    "r_squared": 0.82,
                    "adj_r_squared": 0.81,
                    "residual_vol_ann": 0.12,
                    "beta_mkt": 1.05,
                    "beta_smb": -0.20,
                    "beta_hml": 0.30,
                    "beta_rmw": 0.05,
                    "beta_cma": np.nan,
                    "beta_mom": np.nan,
                    "beta_lowvol": np.nan,
                    "beta_recovery": np.nan,
                },
                {
                    "strategy": "CN Strategy",
                    "market": "cn",
                    "model": "proxy",
                    "factor_source": "proxy_only",
                    "proxy_only": True,
                    "beta_semantics": json.dumps(
                        {
                            "beta_mkt": "LOCAL_MARKET",
                            "beta_smb": "SIZE_PROXY_CUSTOM",
                            "beta_hml": "VALUE_PROXY_CUSTOM",
                            "beta_rmw": "QUALITY_PROXY_CUSTOM",
                            "beta_cma": "INVEST_PROXY_CUSTOM",
                            "beta_mom": "TREND",
                            "beta_lowvol": "DEFENSIVE",
                            "beta_recovery": "RECOVERY_PROXY",
                        }
                    ),
                    "start_date": "2025-01-02",
                    "end_date": "2026-03-24",
                    "n_obs": 319,
                    "alpha_daily": 0.0002,
                    "alpha_ann": 0.0504,
                    "alpha_t_stat": 1.5,
                    "alpha_p_value": 0.12,
                    "r_squared": 0.72,
                    "adj_r_squared": 0.70,
                    "residual_vol_ann": 0.14,
                    "beta_mkt": 0.85,
                    "beta_smb": -0.30,
                    "beta_hml": 0.25,
                    "beta_rmw": 0.10,
                    "beta_cma": -0.05,
                    "beta_mom": 0.20,
                    "beta_lowvol": np.nan,
                    "beta_recovery": np.nan,
                },
            ]
        )
        buffer = io.StringIO()
        with contextlib.redirect_stdout(buffer):
            print_attribution_summary(summary)
        output = buffer.getvalue()
        self.assertIn("Standard factor attribution", output)
        self.assertIn("Proxy factor attribution", output)
        self.assertIn("beta_market_excess", output)
        self.assertIn("beta_size_proxy_custom", output)
        self.assertIn("MARKET_EXCESS 1.05", output)
        self.assertIn("SIZE_PROXY_CUSTOM -0.30", output)
        self.assertNotIn("MKT_RF 1.05", output)
        self.assertNotIn("SMB_PROXY -0.30", output)
    def test_print_attribution_summary_splits_standard_and_proxy_sections_for_mixed_frames(self):
        summary = pd.DataFrame(
            [