Add attribution beta semantics metadata
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import http.client
|
||||
import io
|
||||
import socket
|
||||
@@ -54,6 +55,7 @@ SUMMARY_COLUMNS = [
|
||||
"model",
|
||||
"factor_source",
|
||||
"proxy_only",
|
||||
"beta_semantics",
|
||||
"start_date",
|
||||
"end_date",
|
||||
"n_obs",
|
||||
@@ -436,6 +438,32 @@ def _resolve_benchmark_symbol(benchmark: str | None) -> str:
|
||||
return benchmark
|
||||
|
||||
|
||||
def _beta_semantics_map(proxy_only: bool) -> dict[str, str]:
|
||||
return {
|
||||
"beta_mkt": "MKT" if proxy_only else "MKT_RF",
|
||||
"beta_smb": "SMB_PROXY" if proxy_only else "SMB",
|
||||
"beta_hml": "HML_PROXY" if proxy_only else "HML",
|
||||
"beta_rmw": "RMW_PROXY" if proxy_only else "RMW",
|
||||
"beta_cma": "CMA_PROXY" if proxy_only else "CMA",
|
||||
"beta_mom": "MOM",
|
||||
"beta_lowvol": "LOWVOL",
|
||||
"beta_recovery": "RECOVERY",
|
||||
}
|
||||
|
||||
|
||||
def _parse_beta_semantics(row: pd.Series) -> dict[str, str]:
|
||||
raw_value = row.get("beta_semantics")
|
||||
if isinstance(raw_value, str) and raw_value:
|
||||
try:
|
||||
parsed = json.loads(raw_value)
|
||||
except json.JSONDecodeError:
|
||||
parsed = None
|
||||
else:
|
||||
if isinstance(parsed, dict):
|
||||
return {str(key): str(value) for key, value in parsed.items()}
|
||||
return _beta_semantics_map(bool(row.get("proxy_only", False)))
|
||||
|
||||
|
||||
def attribute_strategies(
|
||||
results_df: pd.DataFrame,
|
||||
benchmark_label: str,
|
||||
@@ -507,6 +535,7 @@ def attribute_strategies(
|
||||
"model": model_name,
|
||||
"factor_source": prepared["factor_source"],
|
||||
"proxy_only": prepared["proxy_only"],
|
||||
"beta_semantics": json.dumps(_beta_semantics_map(bool(prepared["proxy_only"])), sort_keys=True),
|
||||
"start_date": regression_result["start_date"],
|
||||
"end_date": regression_result["end_date"],
|
||||
"n_obs": regression_result["n_obs"],
|
||||
@@ -580,28 +609,7 @@ def _describe_fit(r_squared: float) -> str:
|
||||
|
||||
def _top_loading_descriptions(row: pd.Series, limit: int = 2) -> str:
|
||||
beta_columns = [column for column in row.index if column.startswith("beta_")]
|
||||
if bool(row.get("proxy_only", False)):
|
||||
factor_labels = {
|
||||
"beta_mkt": "MKT",
|
||||
"beta_smb": "SMB_PROXY",
|
||||
"beta_hml": "HML_PROXY",
|
||||
"beta_rmw": "RMW_PROXY",
|
||||
"beta_cma": "CMA_PROXY",
|
||||
"beta_mom": "MOM",
|
||||
"beta_lowvol": "LOWVOL",
|
||||
"beta_recovery": "RECOVERY",
|
||||
}
|
||||
else:
|
||||
factor_labels = {
|
||||
"beta_mkt": "MKT",
|
||||
"beta_smb": "SMB",
|
||||
"beta_hml": "HML",
|
||||
"beta_rmw": "RMW",
|
||||
"beta_cma": "CMA",
|
||||
"beta_mom": "MOM",
|
||||
"beta_lowvol": "LOWVOL",
|
||||
"beta_recovery": "RECOVERY",
|
||||
}
|
||||
factor_labels = _parse_beta_semantics(row)
|
||||
present = []
|
||||
for column in beta_columns:
|
||||
value = row.get(column)
|
||||
@@ -616,11 +624,7 @@ def _top_loading_descriptions(row: pd.Series, limit: int = 2) -> str:
|
||||
return ", ".join(f"{name} {value:.2f}" for name, value in top_loadings)
|
||||
|
||||
|
||||
def print_attribution_summary(summary_df: pd.DataFrame) -> None:
|
||||
if summary_df.empty:
|
||||
print("Factor attribution: no usable regressions were produced.")
|
||||
return
|
||||
|
||||
def _print_attribution_section(summary_df: pd.DataFrame, title: str, proxy_labels: bool) -> None:
|
||||
display_columns = [
|
||||
"strategy",
|
||||
"market",
|
||||
@@ -640,7 +644,7 @@ def print_attribution_summary(summary_df: pd.DataFrame) -> None:
|
||||
"beta_recovery",
|
||||
]
|
||||
table = summary_df.reindex(columns=display_columns).copy()
|
||||
if bool(table["proxy_only"].fillna(False).all()):
|
||||
if proxy_labels:
|
||||
table = table.rename(
|
||||
columns={
|
||||
"beta_smb": "beta_smb_proxy",
|
||||
@@ -656,8 +660,32 @@ def print_attribution_summary(summary_df: pd.DataFrame) -> None:
|
||||
]
|
||||
table.loc[:, numeric_columns] = table.loc[:, numeric_columns].round(4)
|
||||
|
||||
print("\nFactor attribution")
|
||||
print(f"\n{title}")
|
||||
print(table.to_string(index=False, na_rep=""))
|
||||
|
||||
|
||||
def print_attribution_summary(summary_df: pd.DataFrame) -> None:
|
||||
if summary_df.empty:
|
||||
print("Factor attribution: no usable regressions were produced.")
|
||||
return
|
||||
|
||||
proxy_mask = summary_df["proxy_only"].fillna(False).astype(bool)
|
||||
standard_rows = summary_df.loc[~proxy_mask]
|
||||
proxy_rows = summary_df.loc[proxy_mask]
|
||||
|
||||
print("\nFactor attribution")
|
||||
if not standard_rows.empty:
|
||||
_print_attribution_section(
|
||||
standard_rows,
|
||||
title="Standard factor attribution",
|
||||
proxy_labels=False,
|
||||
)
|
||||
if not proxy_rows.empty:
|
||||
_print_attribution_section(
|
||||
proxy_rows,
|
||||
title="Proxy factor attribution",
|
||||
proxy_labels=True,
|
||||
)
|
||||
print("\nInterpretation")
|
||||
for _, row in summary_df.iterrows():
|
||||
print(
|
||||
|
||||
Reference in New Issue
Block a user