Add attribution beta semantics metadata
This commit is contained in:
@@ -1,5 +1,6 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
import http.client
|
import http.client
|
||||||
import io
|
import io
|
||||||
import socket
|
import socket
|
||||||
@@ -54,6 +55,7 @@ SUMMARY_COLUMNS = [
|
|||||||
"model",
|
"model",
|
||||||
"factor_source",
|
"factor_source",
|
||||||
"proxy_only",
|
"proxy_only",
|
||||||
|
"beta_semantics",
|
||||||
"start_date",
|
"start_date",
|
||||||
"end_date",
|
"end_date",
|
||||||
"n_obs",
|
"n_obs",
|
||||||
@@ -436,6 +438,32 @@ def _resolve_benchmark_symbol(benchmark: str | None) -> str:
|
|||||||
return benchmark
|
return benchmark
|
||||||
|
|
||||||
|
|
||||||
|
def _beta_semantics_map(proxy_only: bool) -> dict[str, str]:
|
||||||
|
return {
|
||||||
|
"beta_mkt": "MKT" if proxy_only else "MKT_RF",
|
||||||
|
"beta_smb": "SMB_PROXY" if proxy_only else "SMB",
|
||||||
|
"beta_hml": "HML_PROXY" if proxy_only else "HML",
|
||||||
|
"beta_rmw": "RMW_PROXY" if proxy_only else "RMW",
|
||||||
|
"beta_cma": "CMA_PROXY" if proxy_only else "CMA",
|
||||||
|
"beta_mom": "MOM",
|
||||||
|
"beta_lowvol": "LOWVOL",
|
||||||
|
"beta_recovery": "RECOVERY",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_beta_semantics(row: pd.Series) -> dict[str, str]:
|
||||||
|
raw_value = row.get("beta_semantics")
|
||||||
|
if isinstance(raw_value, str) and raw_value:
|
||||||
|
try:
|
||||||
|
parsed = json.loads(raw_value)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
parsed = None
|
||||||
|
else:
|
||||||
|
if isinstance(parsed, dict):
|
||||||
|
return {str(key): str(value) for key, value in parsed.items()}
|
||||||
|
return _beta_semantics_map(bool(row.get("proxy_only", False)))
|
||||||
|
|
||||||
|
|
||||||
def attribute_strategies(
|
def attribute_strategies(
|
||||||
results_df: pd.DataFrame,
|
results_df: pd.DataFrame,
|
||||||
benchmark_label: str,
|
benchmark_label: str,
|
||||||
@@ -507,6 +535,7 @@ def attribute_strategies(
|
|||||||
"model": model_name,
|
"model": model_name,
|
||||||
"factor_source": prepared["factor_source"],
|
"factor_source": prepared["factor_source"],
|
||||||
"proxy_only": prepared["proxy_only"],
|
"proxy_only": prepared["proxy_only"],
|
||||||
|
"beta_semantics": json.dumps(_beta_semantics_map(bool(prepared["proxy_only"])), sort_keys=True),
|
||||||
"start_date": regression_result["start_date"],
|
"start_date": regression_result["start_date"],
|
||||||
"end_date": regression_result["end_date"],
|
"end_date": regression_result["end_date"],
|
||||||
"n_obs": regression_result["n_obs"],
|
"n_obs": regression_result["n_obs"],
|
||||||
@@ -580,28 +609,7 @@ def _describe_fit(r_squared: float) -> str:
|
|||||||
|
|
||||||
def _top_loading_descriptions(row: pd.Series, limit: int = 2) -> str:
|
def _top_loading_descriptions(row: pd.Series, limit: int = 2) -> str:
|
||||||
beta_columns = [column for column in row.index if column.startswith("beta_")]
|
beta_columns = [column for column in row.index if column.startswith("beta_")]
|
||||||
if bool(row.get("proxy_only", False)):
|
factor_labels = _parse_beta_semantics(row)
|
||||||
factor_labels = {
|
|
||||||
"beta_mkt": "MKT",
|
|
||||||
"beta_smb": "SMB_PROXY",
|
|
||||||
"beta_hml": "HML_PROXY",
|
|
||||||
"beta_rmw": "RMW_PROXY",
|
|
||||||
"beta_cma": "CMA_PROXY",
|
|
||||||
"beta_mom": "MOM",
|
|
||||||
"beta_lowvol": "LOWVOL",
|
|
||||||
"beta_recovery": "RECOVERY",
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
factor_labels = {
|
|
||||||
"beta_mkt": "MKT",
|
|
||||||
"beta_smb": "SMB",
|
|
||||||
"beta_hml": "HML",
|
|
||||||
"beta_rmw": "RMW",
|
|
||||||
"beta_cma": "CMA",
|
|
||||||
"beta_mom": "MOM",
|
|
||||||
"beta_lowvol": "LOWVOL",
|
|
||||||
"beta_recovery": "RECOVERY",
|
|
||||||
}
|
|
||||||
present = []
|
present = []
|
||||||
for column in beta_columns:
|
for column in beta_columns:
|
||||||
value = row.get(column)
|
value = row.get(column)
|
||||||
@@ -616,11 +624,7 @@ def _top_loading_descriptions(row: pd.Series, limit: int = 2) -> str:
|
|||||||
return ", ".join(f"{name} {value:.2f}" for name, value in top_loadings)
|
return ", ".join(f"{name} {value:.2f}" for name, value in top_loadings)
|
||||||
|
|
||||||
|
|
||||||
def print_attribution_summary(summary_df: pd.DataFrame) -> None:
|
def _print_attribution_section(summary_df: pd.DataFrame, title: str, proxy_labels: bool) -> None:
|
||||||
if summary_df.empty:
|
|
||||||
print("Factor attribution: no usable regressions were produced.")
|
|
||||||
return
|
|
||||||
|
|
||||||
display_columns = [
|
display_columns = [
|
||||||
"strategy",
|
"strategy",
|
||||||
"market",
|
"market",
|
||||||
@@ -640,7 +644,7 @@ def print_attribution_summary(summary_df: pd.DataFrame) -> None:
|
|||||||
"beta_recovery",
|
"beta_recovery",
|
||||||
]
|
]
|
||||||
table = summary_df.reindex(columns=display_columns).copy()
|
table = summary_df.reindex(columns=display_columns).copy()
|
||||||
if bool(table["proxy_only"].fillna(False).all()):
|
if proxy_labels:
|
||||||
table = table.rename(
|
table = table.rename(
|
||||||
columns={
|
columns={
|
||||||
"beta_smb": "beta_smb_proxy",
|
"beta_smb": "beta_smb_proxy",
|
||||||
@@ -656,8 +660,32 @@ def print_attribution_summary(summary_df: pd.DataFrame) -> None:
|
|||||||
]
|
]
|
||||||
table.loc[:, numeric_columns] = table.loc[:, numeric_columns].round(4)
|
table.loc[:, numeric_columns] = table.loc[:, numeric_columns].round(4)
|
||||||
|
|
||||||
print("\nFactor attribution")
|
print(f"\n{title}")
|
||||||
print(table.to_string(index=False, na_rep=""))
|
print(table.to_string(index=False, na_rep=""))
|
||||||
|
|
||||||
|
|
||||||
|
def print_attribution_summary(summary_df: pd.DataFrame) -> None:
|
||||||
|
if summary_df.empty:
|
||||||
|
print("Factor attribution: no usable regressions were produced.")
|
||||||
|
return
|
||||||
|
|
||||||
|
proxy_mask = summary_df["proxy_only"].fillna(False).astype(bool)
|
||||||
|
standard_rows = summary_df.loc[~proxy_mask]
|
||||||
|
proxy_rows = summary_df.loc[proxy_mask]
|
||||||
|
|
||||||
|
print("\nFactor attribution")
|
||||||
|
if not standard_rows.empty:
|
||||||
|
_print_attribution_section(
|
||||||
|
standard_rows,
|
||||||
|
title="Standard factor attribution",
|
||||||
|
proxy_labels=False,
|
||||||
|
)
|
||||||
|
if not proxy_rows.empty:
|
||||||
|
_print_attribution_section(
|
||||||
|
proxy_rows,
|
||||||
|
title="Proxy factor attribution",
|
||||||
|
proxy_labels=True,
|
||||||
|
)
|
||||||
print("\nInterpretation")
|
print("\nInterpretation")
|
||||||
for _, row in summary_df.iterrows():
|
for _, row in summary_df.iterrows():
|
||||||
print(
|
print(
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import http.client
|
import http.client
|
||||||
import contextlib
|
import contextlib
|
||||||
|
import json
|
||||||
import io
|
import io
|
||||||
import socket
|
import socket
|
||||||
import ssl
|
import ssl
|
||||||
@@ -640,6 +641,7 @@ class AttributionIntegrationTests(unittest.TestCase):
|
|||||||
"model",
|
"model",
|
||||||
"factor_source",
|
"factor_source",
|
||||||
"proxy_only",
|
"proxy_only",
|
||||||
|
"beta_semantics",
|
||||||
"start_date",
|
"start_date",
|
||||||
"end_date",
|
"end_date",
|
||||||
"n_obs",
|
"n_obs",
|
||||||
@@ -664,6 +666,19 @@ class AttributionIntegrationTests(unittest.TestCase):
|
|||||||
self.assertEqual(summary.loc[0, "model"], "ff5")
|
self.assertEqual(summary.loc[0, "model"], "ff5")
|
||||||
self.assertEqual(summary.loc[0, "factor_source"], "external+local")
|
self.assertEqual(summary.loc[0, "factor_source"], "external+local")
|
||||||
self.assertFalse(bool(summary.loc[0, "proxy_only"]))
|
self.assertFalse(bool(summary.loc[0, "proxy_only"]))
|
||||||
|
self.assertEqual(
|
||||||
|
json.loads(summary.loc[0, "beta_semantics"]),
|
||||||
|
{
|
||||||
|
"beta_mkt": "MKT_RF",
|
||||||
|
"beta_smb": "SMB",
|
||||||
|
"beta_hml": "HML",
|
||||||
|
"beta_rmw": "RMW",
|
||||||
|
"beta_cma": "CMA",
|
||||||
|
"beta_mom": "MOM",
|
||||||
|
"beta_lowvol": "LOWVOL",
|
||||||
|
"beta_recovery": "RECOVERY",
|
||||||
|
},
|
||||||
|
)
|
||||||
self.assertAlmostEqual(summary.loc[0, "beta_mkt"], 1.10, places=3)
|
self.assertAlmostEqual(summary.loc[0, "beta_mkt"], 1.10, places=3)
|
||||||
self.assertAlmostEqual(summary.loc[0, "beta_smb"], -0.25, places=3)
|
self.assertAlmostEqual(summary.loc[0, "beta_smb"], -0.25, places=3)
|
||||||
self.assertAlmostEqual(summary.loc[0, "beta_hml"], 0.35, places=3)
|
self.assertAlmostEqual(summary.loc[0, "beta_hml"], 0.35, places=3)
|
||||||
@@ -704,6 +719,19 @@ class AttributionIntegrationTests(unittest.TestCase):
|
|||||||
self.assertEqual(summary.loc[0, "model"], "proxy")
|
self.assertEqual(summary.loc[0, "model"], "proxy")
|
||||||
self.assertEqual(summary.loc[0, "factor_source"], "proxy_only")
|
self.assertEqual(summary.loc[0, "factor_source"], "proxy_only")
|
||||||
self.assertTrue(bool(summary.loc[0, "proxy_only"]))
|
self.assertTrue(bool(summary.loc[0, "proxy_only"]))
|
||||||
|
self.assertEqual(
|
||||||
|
json.loads(summary.loc[0, "beta_semantics"]),
|
||||||
|
{
|
||||||
|
"beta_mkt": "MKT",
|
||||||
|
"beta_smb": "SMB_PROXY",
|
||||||
|
"beta_hml": "HML_PROXY",
|
||||||
|
"beta_rmw": "RMW_PROXY",
|
||||||
|
"beta_cma": "CMA_PROXY",
|
||||||
|
"beta_mom": "MOM",
|
||||||
|
"beta_lowvol": "LOWVOL",
|
||||||
|
"beta_recovery": "RECOVERY",
|
||||||
|
},
|
||||||
|
)
|
||||||
self.assertNotIn("beta_smb_proxy", summary.columns)
|
self.assertNotIn("beta_smb_proxy", summary.columns)
|
||||||
self.assertNotIn("beta_hml_proxy", summary.columns)
|
self.assertNotIn("beta_hml_proxy", summary.columns)
|
||||||
self.assertNotIn("beta_rmw_proxy", summary.columns)
|
self.assertNotIn("beta_rmw_proxy", summary.columns)
|
||||||
@@ -716,6 +744,13 @@ class AttributionIntegrationTests(unittest.TestCase):
|
|||||||
set(loadings["factor"]),
|
set(loadings["factor"]),
|
||||||
{"MKT", "SMB_PROXY", "HML_PROXY", "RMW_PROXY", "CMA_PROXY", "MOM", "LOWVOL", "RECOVERY"},
|
{"MKT", "SMB_PROXY", "HML_PROXY", "RMW_PROXY", "CMA_PROXY", "MOM", "LOWVOL", "RECOVERY"},
|
||||||
)
|
)
|
||||||
|
loadings_by_factor = loadings.set_index("factor")["beta"]
|
||||||
|
semantics = json.loads(summary.loc[0, "beta_semantics"])
|
||||||
|
self.assertAlmostEqual(summary.loc[0, "beta_mkt"], loadings_by_factor[semantics["beta_mkt"]], places=10)
|
||||||
|
self.assertAlmostEqual(summary.loc[0, "beta_smb"], loadings_by_factor[semantics["beta_smb"]], places=10)
|
||||||
|
self.assertAlmostEqual(summary.loc[0, "beta_hml"], loadings_by_factor[semantics["beta_hml"]], places=10)
|
||||||
|
self.assertAlmostEqual(summary.loc[0, "beta_rmw"], loadings_by_factor[semantics["beta_rmw"]], places=10)
|
||||||
|
self.assertAlmostEqual(summary.loc[0, "beta_cma"], loadings_by_factor[semantics["beta_cma"]], places=10)
|
||||||
|
|
||||||
def test_attribute_strategies_without_benchmark_uses_equal_weight_proxy_market(self):
|
def test_attribute_strategies_without_benchmark_uses_equal_weight_proxy_market(self):
|
||||||
dates = pd.date_range("2025-01-01", periods=320, freq="B")
|
dates = pd.date_range("2025-01-01", periods=320, freq="B")
|
||||||
@@ -802,6 +837,18 @@ class AttributionIntegrationTests(unittest.TestCase):
|
|||||||
"model": "proxy",
|
"model": "proxy",
|
||||||
"factor_source": "proxy_only",
|
"factor_source": "proxy_only",
|
||||||
"proxy_only": True,
|
"proxy_only": True,
|
||||||
|
"beta_semantics": json.dumps(
|
||||||
|
{
|
||||||
|
"beta_mkt": "MKT",
|
||||||
|
"beta_smb": "SMB_PROXY",
|
||||||
|
"beta_hml": "HML_PROXY",
|
||||||
|
"beta_rmw": "RMW_PROXY",
|
||||||
|
"beta_cma": "CMA_PROXY",
|
||||||
|
"beta_mom": "MOM",
|
||||||
|
"beta_lowvol": "LOWVOL",
|
||||||
|
"beta_recovery": "RECOVERY",
|
||||||
|
}
|
||||||
|
),
|
||||||
"start_date": "2025-01-02",
|
"start_date": "2025-01-02",
|
||||||
"end_date": "2026-03-24",
|
"end_date": "2026-03-24",
|
||||||
"n_obs": 319,
|
"n_obs": 319,
|
||||||
@@ -834,6 +881,96 @@ class AttributionIntegrationTests(unittest.TestCase):
|
|||||||
self.assertIn("SMB_PROXY", output)
|
self.assertIn("SMB_PROXY", output)
|
||||||
self.assertNotIn(" beta_smb ", output)
|
self.assertNotIn(" beta_smb ", output)
|
||||||
|
|
||||||
|
def test_print_attribution_summary_splits_standard_and_proxy_sections_for_mixed_frames(self):
|
||||||
|
summary = pd.DataFrame(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"strategy": "US Strategy",
|
||||||
|
"market": "us",
|
||||||
|
"model": "ff5",
|
||||||
|
"factor_source": "external+local",
|
||||||
|
"proxy_only": False,
|
||||||
|
"beta_semantics": json.dumps(
|
||||||
|
{
|
||||||
|
"beta_mkt": "MKT_RF",
|
||||||
|
"beta_smb": "SMB",
|
||||||
|
"beta_hml": "HML",
|
||||||
|
"beta_rmw": "RMW",
|
||||||
|
"beta_cma": "CMA",
|
||||||
|
"beta_mom": "MOM",
|
||||||
|
"beta_lowvol": "LOWVOL",
|
||||||
|
"beta_recovery": "RECOVERY",
|
||||||
|
}
|
||||||
|
),
|
||||||
|
"start_date": "2025-01-02",
|
||||||
|
"end_date": "2026-03-24",
|
||||||
|
"n_obs": 319,
|
||||||
|
"alpha_daily": 0.0004,
|
||||||
|
"alpha_ann": 0.1008,
|
||||||
|
"alpha_t_stat": 2.1,
|
||||||
|
"alpha_p_value": 0.04,
|
||||||
|
"r_squared": 0.82,
|
||||||
|
"adj_r_squared": 0.81,
|
||||||
|
"residual_vol_ann": 0.12,
|
||||||
|
"beta_mkt": 1.05,
|
||||||
|
"beta_smb": -0.20,
|
||||||
|
"beta_hml": 0.30,
|
||||||
|
"beta_rmw": 0.05,
|
||||||
|
"beta_cma": np.nan,
|
||||||
|
"beta_mom": np.nan,
|
||||||
|
"beta_lowvol": np.nan,
|
||||||
|
"beta_recovery": np.nan,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"strategy": "CN Strategy",
|
||||||
|
"market": "cn",
|
||||||
|
"model": "proxy",
|
||||||
|
"factor_source": "proxy_only",
|
||||||
|
"proxy_only": True,
|
||||||
|
"beta_semantics": json.dumps(
|
||||||
|
{
|
||||||
|
"beta_mkt": "MKT",
|
||||||
|
"beta_smb": "SMB_PROXY",
|
||||||
|
"beta_hml": "HML_PROXY",
|
||||||
|
"beta_rmw": "RMW_PROXY",
|
||||||
|
"beta_cma": "CMA_PROXY",
|
||||||
|
"beta_mom": "MOM",
|
||||||
|
"beta_lowvol": "LOWVOL",
|
||||||
|
"beta_recovery": "RECOVERY",
|
||||||
|
}
|
||||||
|
),
|
||||||
|
"start_date": "2025-01-02",
|
||||||
|
"end_date": "2026-03-24",
|
||||||
|
"n_obs": 319,
|
||||||
|
"alpha_daily": 0.0002,
|
||||||
|
"alpha_ann": 0.0504,
|
||||||
|
"alpha_t_stat": 1.5,
|
||||||
|
"alpha_p_value": 0.12,
|
||||||
|
"r_squared": 0.72,
|
||||||
|
"adj_r_squared": 0.70,
|
||||||
|
"residual_vol_ann": 0.14,
|
||||||
|
"beta_mkt": 0.85,
|
||||||
|
"beta_smb": -0.30,
|
||||||
|
"beta_hml": 0.25,
|
||||||
|
"beta_rmw": 0.10,
|
||||||
|
"beta_cma": -0.05,
|
||||||
|
"beta_mom": 0.20,
|
||||||
|
"beta_lowvol": np.nan,
|
||||||
|
"beta_recovery": np.nan,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
buffer = io.StringIO()
|
||||||
|
with contextlib.redirect_stdout(buffer):
|
||||||
|
print_attribution_summary(summary)
|
||||||
|
|
||||||
|
output = buffer.getvalue()
|
||||||
|
self.assertIn("Standard factor attribution", output)
|
||||||
|
self.assertIn("Proxy factor attribution", output)
|
||||||
|
self.assertIn("beta_smb_proxy", output)
|
||||||
|
self.assertIn("beta_smb ", output)
|
||||||
|
|
||||||
def _make_price_frame(self, dates: pd.DatetimeIndex, benchmark: str) -> pd.DataFrame:
|
def _make_price_frame(self, dates: pd.DatetimeIndex, benchmark: str) -> pd.DataFrame:
|
||||||
steps = np.arange(len(dates), dtype=float)
|
steps = np.arange(len(dates), dtype=float)
|
||||||
data = {}
|
data = {}
|
||||||
|
|||||||
Reference in New Issue
Block a user