Honor complete attribution beta semantics labels

This commit is contained in:
2026-04-07 17:51:57 +08:00
parent b3d87b3d92
commit 35a91ba6cc
2 changed files with 124 additions and 26 deletions

View File

@@ -3,6 +3,7 @@ from __future__ import annotations
import json import json
import http.client import http.client
import io import io
import re
import socket import socket
import ssl import ssl
import warnings import warnings
@@ -472,25 +473,33 @@ def _resolve_beta_semantics(row: pd.Series) -> dict[str, str]:
else: else:
if isinstance(parsed, dict): if isinstance(parsed, dict):
parsed_mapping = {str(key): str(value) for key, value in parsed.items()} parsed_mapping = {str(key): str(value) for key, value in parsed.items()}
if set(parsed_mapping) == set(SEMANTIC_BETA_COLUMNS) and parsed_mapping == canonical: if set(parsed_mapping) == set(SEMANTIC_BETA_COLUMNS) and all(
value.strip() for value in parsed_mapping.values()
):
return parsed_mapping return parsed_mapping
return canonical return canonical
def _section_beta_header_map(summary_df: pd.DataFrame) -> dict[str, str]: def _beta_header_name(factor_name: str) -> str:
if summary_df.empty: suffix = factor_name.strip().lower()
return {} suffix = re.sub(r"[^a-z0-9]+", "_", suffix).strip("_")
semantics = _resolve_beta_semantics(summary_df.iloc[0])
header_map: dict[str, str] = {}
for beta_column, factor_name in semantics.items():
suffix = factor_name.lower()
if suffix == "mkt_rf": if suffix == "mkt_rf":
suffix = "mkt" suffix = "mkt"
header_map[beta_column] = f"beta_{suffix}" return f"beta_{suffix}"
def _section_beta_header_map(semantics: dict[str, str]) -> dict[str, str]:
header_map: dict[str, str] = {}
for beta_column, factor_name in semantics.items():
header_map[beta_column] = _beta_header_name(factor_name)
return header_map return header_map
def _section_key(row: pd.Series) -> tuple[bool, tuple[tuple[str, str], ...]]:
semantics = _resolve_beta_semantics(row)
return bool(row.get("proxy_only", False)), tuple((key, semantics[key]) for key in SEMANTIC_BETA_COLUMNS)
def attribute_strategies( def attribute_strategies(
results_df: pd.DataFrame, results_df: pd.DataFrame,
benchmark_label: str, benchmark_label: str,
@@ -651,7 +660,7 @@ def _top_loading_descriptions(row: pd.Series, limit: int = 2) -> str:
return ", ".join(f"{name} {value:.2f}" for name, value in top_loadings) return ", ".join(f"{name} {value:.2f}" for name, value in top_loadings)
def _print_attribution_section(summary_df: pd.DataFrame, title: str, proxy_labels: bool) -> None: def _print_attribution_section(summary_df: pd.DataFrame, title: str, semantics: dict[str, str]) -> None:
display_columns = [ display_columns = [
"strategy", "strategy",
"market", "market",
@@ -671,8 +680,7 @@ def _print_attribution_section(summary_df: pd.DataFrame, title: str, proxy_label
"beta_recovery", "beta_recovery",
] ]
table = summary_df.reindex(columns=display_columns).copy() table = summary_df.reindex(columns=display_columns).copy()
del proxy_labels table = table.rename(columns=_section_beta_header_map(semantics))
table = table.rename(columns=_section_beta_header_map(summary_df))
numeric_columns = [ numeric_columns = [
column column
for column in table.columns for column in table.columns
@@ -689,22 +697,18 @@ def print_attribution_summary(summary_df: pd.DataFrame) -> None:
print("Factor attribution: no usable regressions were produced.") print("Factor attribution: no usable regressions were produced.")
return return
proxy_mask = summary_df["proxy_only"].fillna(False).astype(bool)
standard_rows = summary_df.loc[~proxy_mask]
proxy_rows = summary_df.loc[proxy_mask]
print("\nFactor attribution") print("\nFactor attribution")
if not standard_rows.empty: sections: dict[tuple[bool, tuple[tuple[str, str], ...]], list[int]] = {}
for index, row in summary_df.iterrows():
sections.setdefault(_section_key(row), []).append(index)
for (is_proxy, semantics_items), row_indexes in sections.items():
section_rows = summary_df.loc[row_indexes]
title = "Proxy factor attribution" if is_proxy else "Standard factor attribution"
_print_attribution_section( _print_attribution_section(
standard_rows, section_rows,
title="Standard factor attribution", title=title,
proxy_labels=False, semantics=dict(semantics_items),
)
if not proxy_rows.empty:
_print_attribution_section(
proxy_rows,
title="Proxy factor attribution",
proxy_labels=True,
) )
print("\nInterpretation") print("\nInterpretation")
for _, row in summary_df.iterrows(): for _, row in summary_df.iterrows():

View File

@@ -923,6 +923,100 @@ class AttributionIntegrationTests(unittest.TestCase):
self.assertIn("SMB_PROXY", output) self.assertIn("SMB_PROXY", output)
self.assertNotIn(" beta_smb ", output) self.assertNotIn(" beta_smb ", output)
def test_print_attribution_summary_honors_complete_noncanonical_beta_semantics(self):
summary = pd.DataFrame(
[
{
"strategy": "US Strategy",
"market": "us",
"model": "ff5",
"factor_source": "external+local",
"proxy_only": False,
"beta_semantics": json.dumps(
{
"beta_mkt": "MARKET_EXCESS",
"beta_smb": "SIZE",
"beta_hml": "VALUE",
"beta_rmw": "QUALITY",
"beta_cma": "INVESTMENT",
"beta_mom": "MOMENTUM",
"beta_lowvol": "MINVOL",
"beta_recovery": "BOUNCE",
}
),
"start_date": "2025-01-02",
"end_date": "2026-03-24",
"n_obs": 319,
"alpha_daily": 0.0004,
"alpha_ann": 0.1008,
"alpha_t_stat": 2.1,
"alpha_p_value": 0.04,
"r_squared": 0.82,
"adj_r_squared": 0.81,
"residual_vol_ann": 0.12,
"beta_mkt": 1.05,
"beta_smb": -0.20,
"beta_hml": 0.30,
"beta_rmw": 0.05,
"beta_cma": np.nan,
"beta_mom": np.nan,
"beta_lowvol": np.nan,
"beta_recovery": np.nan,
},
{
"strategy": "CN Strategy",
"market": "cn",
"model": "proxy",
"factor_source": "proxy_only",
"proxy_only": True,
"beta_semantics": json.dumps(
{
"beta_mkt": "LOCAL_MARKET",
"beta_smb": "SIZE_PROXY_CUSTOM",
"beta_hml": "VALUE_PROXY_CUSTOM",
"beta_rmw": "QUALITY_PROXY_CUSTOM",
"beta_cma": "INVEST_PROXY_CUSTOM",
"beta_mom": "TREND",
"beta_lowvol": "DEFENSIVE",
"beta_recovery": "RECOVERY_PROXY",
}
),
"start_date": "2025-01-02",
"end_date": "2026-03-24",
"n_obs": 319,
"alpha_daily": 0.0002,
"alpha_ann": 0.0504,
"alpha_t_stat": 1.5,
"alpha_p_value": 0.12,
"r_squared": 0.72,
"adj_r_squared": 0.70,
"residual_vol_ann": 0.14,
"beta_mkt": 0.85,
"beta_smb": -0.30,
"beta_hml": 0.25,
"beta_rmw": 0.10,
"beta_cma": -0.05,
"beta_mom": 0.20,
"beta_lowvol": np.nan,
"beta_recovery": np.nan,
},
]
)
buffer = io.StringIO()
with contextlib.redirect_stdout(buffer):
print_attribution_summary(summary)
output = buffer.getvalue()
self.assertIn("Standard factor attribution", output)
self.assertIn("Proxy factor attribution", output)
self.assertIn("beta_market_excess", output)
self.assertIn("beta_size_proxy_custom", output)
self.assertIn("MARKET_EXCESS 1.05", output)
self.assertIn("SIZE_PROXY_CUSTOM -0.30", output)
self.assertNotIn("MKT_RF 1.05", output)
self.assertNotIn("SMB_PROXY -0.30", output)
def test_print_attribution_summary_splits_standard_and_proxy_sections_for_mixed_frames(self): def test_print_attribution_summary_splits_standard_and_proxy_sections_for_mixed_frames(self):
summary = pd.DataFrame( summary = pd.DataFrame(
[ [