Fix proxy attribution benchmark and labeling
This commit is contained in:
@@ -32,17 +32,18 @@ PROXY_FACTOR_COLUMNS = [
|
|||||||
"CMA_PROXY",
|
"CMA_PROXY",
|
||||||
] + EXTENSION_FACTOR_COLUMNS
|
] + EXTENSION_FACTOR_COLUMNS
|
||||||
TRADING_DAYS_PER_YEAR = 252
|
TRADING_DAYS_PER_YEAR = 252
|
||||||
|
MISSING_BENCHMARK_SENTINEL = "__missing_benchmark__"
|
||||||
SUMMARY_BETA_COLUMN_BY_FACTOR = {
|
SUMMARY_BETA_COLUMN_BY_FACTOR = {
|
||||||
"MKT_RF": "beta_mkt",
|
"MKT_RF": "beta_mkt",
|
||||||
"MKT": "beta_mkt",
|
"MKT": "beta_mkt",
|
||||||
"SMB": "beta_smb",
|
"SMB": "beta_smb",
|
||||||
"SMB_PROXY": "beta_smb",
|
"SMB_PROXY": "beta_smb_proxy",
|
||||||
"HML": "beta_hml",
|
"HML": "beta_hml",
|
||||||
"HML_PROXY": "beta_hml",
|
"HML_PROXY": "beta_hml_proxy",
|
||||||
"RMW": "beta_rmw",
|
"RMW": "beta_rmw",
|
||||||
"RMW_PROXY": "beta_rmw",
|
"RMW_PROXY": "beta_rmw_proxy",
|
||||||
"CMA": "beta_cma",
|
"CMA": "beta_cma",
|
||||||
"CMA_PROXY": "beta_cma",
|
"CMA_PROXY": "beta_cma_proxy",
|
||||||
"MOM": "beta_mom",
|
"MOM": "beta_mom",
|
||||||
"LOWVOL": "beta_lowvol",
|
"LOWVOL": "beta_lowvol",
|
||||||
"RECOVERY": "beta_recovery",
|
"RECOVERY": "beta_recovery",
|
||||||
@@ -68,6 +69,10 @@ SUMMARY_COLUMNS = [
|
|||||||
"beta_hml",
|
"beta_hml",
|
||||||
"beta_rmw",
|
"beta_rmw",
|
||||||
"beta_cma",
|
"beta_cma",
|
||||||
|
"beta_smb_proxy",
|
||||||
|
"beta_hml_proxy",
|
||||||
|
"beta_rmw_proxy",
|
||||||
|
"beta_cma_proxy",
|
||||||
"beta_mom",
|
"beta_mom",
|
||||||
"beta_lowvol",
|
"beta_lowvol",
|
||||||
"beta_recovery",
|
"beta_recovery",
|
||||||
@@ -429,6 +434,12 @@ def _select_model_names(
|
|||||||
return list(available_models)
|
return list(available_models)
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_benchmark_symbol(benchmark: str | None) -> str:
|
||||||
|
if benchmark is None:
|
||||||
|
return MISSING_BENCHMARK_SENTINEL
|
||||||
|
return benchmark
|
||||||
|
|
||||||
|
|
||||||
def attribute_strategies(
|
def attribute_strategies(
|
||||||
results_df: pd.DataFrame,
|
results_df: pd.DataFrame,
|
||||||
benchmark_label: str,
|
benchmark_label: str,
|
||||||
@@ -438,10 +449,7 @@ def attribute_strategies(
|
|||||||
benchmark: str | None = None,
|
benchmark: str | None = None,
|
||||||
external_factors: pd.DataFrame | None = None,
|
external_factors: pd.DataFrame | None = None,
|
||||||
) -> tuple[pd.DataFrame, pd.DataFrame]:
|
) -> tuple[pd.DataFrame, pd.DataFrame]:
|
||||||
benchmark_symbol = benchmark
|
benchmark_symbol = _resolve_benchmark_symbol(benchmark)
|
||||||
if benchmark_symbol is None:
|
|
||||||
matching_columns = [column for column in price_data.columns if column in benchmark_label]
|
|
||||||
benchmark_symbol = matching_columns[0] if matching_columns else price_data.columns[-1]
|
|
||||||
|
|
||||||
extension_factors = build_extension_factors(price_data, benchmark=benchmark_symbol, market=market)
|
extension_factors = build_extension_factors(price_data, benchmark=benchmark_symbol, market=market)
|
||||||
|
|
||||||
@@ -518,6 +526,10 @@ def attribute_strategies(
|
|||||||
"beta_hml": np.nan,
|
"beta_hml": np.nan,
|
||||||
"beta_rmw": np.nan,
|
"beta_rmw": np.nan,
|
||||||
"beta_cma": np.nan,
|
"beta_cma": np.nan,
|
||||||
|
"beta_smb_proxy": np.nan,
|
||||||
|
"beta_hml_proxy": np.nan,
|
||||||
|
"beta_rmw_proxy": np.nan,
|
||||||
|
"beta_cma_proxy": np.nan,
|
||||||
"beta_mom": np.nan,
|
"beta_mom": np.nan,
|
||||||
"beta_lowvol": np.nan,
|
"beta_lowvol": np.nan,
|
||||||
"beta_recovery": np.nan,
|
"beta_recovery": np.nan,
|
||||||
@@ -575,7 +587,7 @@ def _describe_fit(r_squared: float) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def _top_loading_descriptions(row: pd.Series, limit: int = 2) -> str:
|
def _top_loading_descriptions(row: pd.Series, limit: int = 2) -> str:
|
||||||
beta_columns = [column for column in SUMMARY_COLUMNS if column.startswith("beta_")]
|
beta_columns = [column for column in row.index if column.startswith("beta_")]
|
||||||
present = []
|
present = []
|
||||||
for column in beta_columns:
|
for column in beta_columns:
|
||||||
value = row.get(column)
|
value = row.get(column)
|
||||||
@@ -606,11 +618,15 @@ def print_attribution_summary(summary_df: pd.DataFrame) -> None:
|
|||||||
"beta_hml",
|
"beta_hml",
|
||||||
"beta_rmw",
|
"beta_rmw",
|
||||||
"beta_cma",
|
"beta_cma",
|
||||||
|
"beta_smb_proxy",
|
||||||
|
"beta_hml_proxy",
|
||||||
|
"beta_rmw_proxy",
|
||||||
|
"beta_cma_proxy",
|
||||||
"beta_mom",
|
"beta_mom",
|
||||||
"beta_lowvol",
|
"beta_lowvol",
|
||||||
"beta_recovery",
|
"beta_recovery",
|
||||||
]
|
]
|
||||||
table = summary_df.loc[:, display_columns].copy()
|
table = summary_df.reindex(columns=display_columns).copy()
|
||||||
numeric_columns = [column for column in display_columns if column not in {"strategy", "market", "model"}]
|
numeric_columns = [column for column in display_columns if column not in {"strategy", "market", "model"}]
|
||||||
table.loc[:, numeric_columns] = table.loc[:, numeric_columns].round(4)
|
table.loc[:, numeric_columns] = table.loc[:, numeric_columns].round(4)
|
||||||
|
|
||||||
|
|||||||
@@ -655,6 +655,10 @@ class AttributionIntegrationTests(unittest.TestCase):
|
|||||||
"beta_hml",
|
"beta_hml",
|
||||||
"beta_rmw",
|
"beta_rmw",
|
||||||
"beta_cma",
|
"beta_cma",
|
||||||
|
"beta_smb_proxy",
|
||||||
|
"beta_hml_proxy",
|
||||||
|
"beta_rmw_proxy",
|
||||||
|
"beta_cma_proxy",
|
||||||
"beta_mom",
|
"beta_mom",
|
||||||
"beta_lowvol",
|
"beta_lowvol",
|
||||||
"beta_recovery",
|
"beta_recovery",
|
||||||
@@ -667,6 +671,7 @@ class AttributionIntegrationTests(unittest.TestCase):
|
|||||||
self.assertAlmostEqual(summary.loc[0, "beta_mkt"], 1.10, places=3)
|
self.assertAlmostEqual(summary.loc[0, "beta_mkt"], 1.10, places=3)
|
||||||
self.assertAlmostEqual(summary.loc[0, "beta_smb"], -0.25, places=3)
|
self.assertAlmostEqual(summary.loc[0, "beta_smb"], -0.25, places=3)
|
||||||
self.assertAlmostEqual(summary.loc[0, "beta_hml"], 0.35, places=3)
|
self.assertAlmostEqual(summary.loc[0, "beta_hml"], 0.35, places=3)
|
||||||
|
self.assertTrue(np.isnan(summary.loc[0, "beta_smb_proxy"]))
|
||||||
self.assertTrue(np.isnan(summary.loc[0, "beta_mom"]))
|
self.assertTrue(np.isnan(summary.loc[0, "beta_mom"]))
|
||||||
|
|
||||||
self.assertListEqual(
|
self.assertListEqual(
|
||||||
@@ -704,11 +709,53 @@ class AttributionIntegrationTests(unittest.TestCase):
|
|||||||
self.assertEqual(summary.loc[0, "model"], "proxy")
|
self.assertEqual(summary.loc[0, "model"], "proxy")
|
||||||
self.assertEqual(summary.loc[0, "factor_source"], "proxy_only")
|
self.assertEqual(summary.loc[0, "factor_source"], "proxy_only")
|
||||||
self.assertTrue(bool(summary.loc[0, "proxy_only"]))
|
self.assertTrue(bool(summary.loc[0, "proxy_only"]))
|
||||||
|
self.assertIn("beta_smb_proxy", summary.columns)
|
||||||
|
self.assertIn("beta_hml_proxy", summary.columns)
|
||||||
|
self.assertIn("beta_rmw_proxy", summary.columns)
|
||||||
|
self.assertIn("beta_cma_proxy", summary.columns)
|
||||||
|
self.assertTrue(np.isnan(summary.loc[0, "beta_smb"]))
|
||||||
|
self.assertTrue(np.isnan(summary.loc[0, "beta_hml"]))
|
||||||
|
self.assertTrue(np.isnan(summary.loc[0, "beta_rmw"]))
|
||||||
|
self.assertTrue(np.isnan(summary.loc[0, "beta_cma"]))
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
set(loadings["factor"]),
|
set(loadings["factor"]),
|
||||||
{"MKT", "SMB_PROXY", "HML_PROXY", "RMW_PROXY", "CMA_PROXY", "MOM", "LOWVOL", "RECOVERY"},
|
{"MKT", "SMB_PROXY", "HML_PROXY", "RMW_PROXY", "CMA_PROXY", "MOM", "LOWVOL", "RECOVERY"},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_attribute_strategies_without_benchmark_uses_equal_weight_proxy_market(self):
|
||||||
|
dates = pd.date_range("2025-01-01", periods=320, freq="B")
|
||||||
|
prices = self._make_price_frame(dates, benchmark="000300.SS").drop(columns=["000300.SS"])
|
||||||
|
equal_weight_returns = prices.pct_change().mean(axis=1).fillna(0.0)
|
||||||
|
results = pd.DataFrame(
|
||||||
|
{
|
||||||
|
"Strategy": 100_000.0 * (1.0 + 0.0002 + 0.8 * equal_weight_returns).cumprod(),
|
||||||
|
"External Benchmark": 100_000.0 * (1.0 + 0.0001 + 0.6 * equal_weight_returns).cumprod(),
|
||||||
|
},
|
||||||
|
index=dates,
|
||||||
|
)
|
||||||
|
|
||||||
|
summary_missing, loadings_missing = attribute_strategies(
|
||||||
|
results_df=results,
|
||||||
|
benchmark_label="External Benchmark",
|
||||||
|
benchmark=None,
|
||||||
|
price_data=prices,
|
||||||
|
market="cn",
|
||||||
|
model_selection="ff5",
|
||||||
|
external_factors=None,
|
||||||
|
)
|
||||||
|
summary_explicit, loadings_explicit = attribute_strategies(
|
||||||
|
results_df=results,
|
||||||
|
benchmark_label="External Benchmark",
|
||||||
|
benchmark="MISSING_BENCHMARK",
|
||||||
|
price_data=prices,
|
||||||
|
market="cn",
|
||||||
|
model_selection="ff5",
|
||||||
|
external_factors=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
pd.testing.assert_frame_equal(summary_missing, summary_explicit, check_dtype=False)
|
||||||
|
pd.testing.assert_frame_equal(loadings_missing, loadings_explicit, check_dtype=False)
|
||||||
|
|
||||||
def test_print_attribution_summary_prints_compact_table_and_interpretation(self):
|
def test_print_attribution_summary_prints_compact_table_and_interpretation(self):
|
||||||
summary = pd.DataFrame(
|
summary = pd.DataFrame(
|
||||||
[
|
[
|
||||||
@@ -751,6 +798,50 @@ class AttributionIntegrationTests(unittest.TestCase):
|
|||||||
self.assertIn("alpha_ann", output)
|
self.assertIn("alpha_ann", output)
|
||||||
self.assertIn("Interpretation", output)
|
self.assertIn("Interpretation", output)
|
||||||
|
|
||||||
|
def test_print_attribution_summary_keeps_proxy_factor_labels_in_output(self):
|
||||||
|
summary = pd.DataFrame(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"strategy": "Strategy",
|
||||||
|
"market": "cn",
|
||||||
|
"model": "proxy",
|
||||||
|
"factor_source": "proxy_only",
|
||||||
|
"proxy_only": True,
|
||||||
|
"start_date": "2025-01-02",
|
||||||
|
"end_date": "2026-03-24",
|
||||||
|
"n_obs": 319,
|
||||||
|
"alpha_daily": 0.0002,
|
||||||
|
"alpha_ann": 0.0504,
|
||||||
|
"alpha_t_stat": 1.5,
|
||||||
|
"alpha_p_value": 0.12,
|
||||||
|
"r_squared": 0.72,
|
||||||
|
"adj_r_squared": 0.70,
|
||||||
|
"residual_vol_ann": 0.14,
|
||||||
|
"beta_mkt": 0.85,
|
||||||
|
"beta_smb": np.nan,
|
||||||
|
"beta_hml": np.nan,
|
||||||
|
"beta_rmw": np.nan,
|
||||||
|
"beta_cma": np.nan,
|
||||||
|
"beta_smb_proxy": -0.30,
|
||||||
|
"beta_hml_proxy": 0.25,
|
||||||
|
"beta_rmw_proxy": 0.10,
|
||||||
|
"beta_cma_proxy": -0.05,
|
||||||
|
"beta_mom": 0.20,
|
||||||
|
"beta_lowvol": np.nan,
|
||||||
|
"beta_recovery": np.nan,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
buffer = io.StringIO()
|
||||||
|
with contextlib.redirect_stdout(buffer):
|
||||||
|
print_attribution_summary(summary)
|
||||||
|
|
||||||
|
output = buffer.getvalue()
|
||||||
|
self.assertIn("beta_smb_proxy", output)
|
||||||
|
self.assertIn("beta_hml_proxy", output)
|
||||||
|
self.assertIn("SMB_PROXY", output)
|
||||||
|
|
||||||
def _make_price_frame(self, dates: pd.DatetimeIndex, benchmark: str) -> pd.DataFrame:
|
def _make_price_frame(self, dates: pd.DatetimeIndex, benchmark: str) -> pd.DataFrame:
|
||||||
steps = np.arange(len(dates), dtype=float)
|
steps = np.arange(len(dates), dtype=float)
|
||||||
data = {}
|
data = {}
|
||||||
|
|||||||
Reference in New Issue
Block a user