Harden attribution beta semantics fallback
This commit is contained in:
@@ -86,6 +86,16 @@ LOADING_COLUMNS = [
|
||||
"t_stat",
|
||||
"p_value",
|
||||
]
|
||||
SEMANTIC_BETA_COLUMNS = [
|
||||
"beta_mkt",
|
||||
"beta_smb",
|
||||
"beta_hml",
|
||||
"beta_rmw",
|
||||
"beta_cma",
|
||||
"beta_mom",
|
||||
"beta_lowvol",
|
||||
"beta_recovery",
|
||||
]
|
||||
|
||||
|
||||
class ExternalFactorFormatError(ValueError):
|
||||
@@ -451,17 +461,34 @@ def _beta_semantics_map(proxy_only: bool) -> dict[str, str]:
|
||||
}
|
||||
|
||||
|
||||
def _parse_beta_semantics(row: pd.Series) -> dict[str, str]:
|
||||
def _resolve_beta_semantics(row: pd.Series) -> dict[str, str]:
|
||||
canonical = _beta_semantics_map(bool(row.get("proxy_only", False)))
|
||||
raw_value = row.get("beta_semantics")
|
||||
if isinstance(raw_value, str) and raw_value:
|
||||
try:
|
||||
parsed = json.loads(raw_value)
|
||||
except json.JSONDecodeError:
|
||||
parsed = None
|
||||
return canonical
|
||||
else:
|
||||
if isinstance(parsed, dict):
|
||||
return {str(key): str(value) for key, value in parsed.items()}
|
||||
return _beta_semantics_map(bool(row.get("proxy_only", False)))
|
||||
parsed_mapping = {str(key): str(value) for key, value in parsed.items()}
|
||||
if set(parsed_mapping) == set(SEMANTIC_BETA_COLUMNS) and parsed_mapping == canonical:
|
||||
return parsed_mapping
|
||||
return canonical
|
||||
|
||||
|
||||
def _section_beta_header_map(summary_df: pd.DataFrame) -> dict[str, str]:
|
||||
if summary_df.empty:
|
||||
return {}
|
||||
|
||||
semantics = _resolve_beta_semantics(summary_df.iloc[0])
|
||||
header_map: dict[str, str] = {}
|
||||
for beta_column, factor_name in semantics.items():
|
||||
suffix = factor_name.lower()
|
||||
if suffix == "mkt_rf":
|
||||
suffix = "mkt"
|
||||
header_map[beta_column] = f"beta_{suffix}"
|
||||
return header_map
|
||||
|
||||
|
||||
def attribute_strategies(
|
||||
@@ -609,7 +636,7 @@ def _describe_fit(r_squared: float) -> str:
|
||||
|
||||
def _top_loading_descriptions(row: pd.Series, limit: int = 2) -> str:
|
||||
beta_columns = [column for column in row.index if column.startswith("beta_")]
|
||||
factor_labels = _parse_beta_semantics(row)
|
||||
factor_labels = _resolve_beta_semantics(row)
|
||||
present = []
|
||||
for column in beta_columns:
|
||||
value = row.get(column)
|
||||
@@ -644,15 +671,8 @@ def _print_attribution_section(summary_df: pd.DataFrame, title: str, proxy_label
|
||||
"beta_recovery",
|
||||
]
|
||||
table = summary_df.reindex(columns=display_columns).copy()
|
||||
if proxy_labels:
|
||||
table = table.rename(
|
||||
columns={
|
||||
"beta_smb": "beta_smb_proxy",
|
||||
"beta_hml": "beta_hml_proxy",
|
||||
"beta_rmw": "beta_rmw_proxy",
|
||||
"beta_cma": "beta_cma_proxy",
|
||||
}
|
||||
)
|
||||
del proxy_labels
|
||||
table = table.rename(columns=_section_beta_header_map(summary_df))
|
||||
numeric_columns = [
|
||||
column
|
||||
for column in table.columns
|
||||
|
||||
Reference in New Issue
Block a user