quant/tests/test_factor_attribution.py

import http.client
import contextlib
import json
import io
import socket
import ssl
import tempfile
import unittest
import zipfile
from pathlib import Path
from urllib.error import URLError
from unittest import mock

import numpy as np
import pandas as pd

from factor_attribution import (
    ExternalFactorDownloadError,
    ExternalFactorFormatError,
    KEN_FRENCH_DAILY_FF5_ZIP_URL,
    _download_kf_zip_bytes,
    _parse_kf_daily_csv,
    attribute_strategies,
    build_extension_factors,
    build_proxy_core_factors,
    export_attribution,
    load_external_us_factors,
    print_attribution_summary,
    prepare_factor_models,
    run_factor_regression,
)


class ExternalFactorLoaderTests(unittest.TestCase):
    def test_download_kf_zip_bytes_fetches_official_ken_french_zip(self):
        response = mock.MagicMock()
        response.read.return_value = b"zip-bytes"
        response.__enter__.return_value = response
        response.__exit__.return_value = False

        with mock.patch("factor_attribution.urlopen", return_value=response) as mocked_urlopen:
            raw_bytes = _download_kf_zip_bytes()

        self.assertEqual(raw_bytes, b"zip-bytes")
        request = mocked_urlopen.call_args.args[0]
        self.assertEqual(request.full_url, KEN_FRENCH_DAILY_FF5_ZIP_URL)
        self.assertEqual(mocked_urlopen.call_args.kwargs["timeout"], 30)

    def test_download_kf_zip_bytes_wraps_transport_errors(self):
        for error in (
            URLError("boom"),
            TimeoutError("timed out"),
            ConnectionError("conn reset"),
            socket.timeout("socket timed out"),
            socket.gaierror("dns failed"),
            ssl.SSLError("tls failed"),
        ):
            with self.subTest(error_type=type(error).__name__):
                with mock.patch("factor_attribution.urlopen", side_effect=error):
                    with self.assertRaises(ExternalFactorDownloadError):
                        _download_kf_zip_bytes()

    def test_download_kf_zip_bytes_wraps_incomplete_read_errors(self):
        response = mock.MagicMock()
        response.read.side_effect = http.client.IncompleteRead(b"partial", 10)
        response.__enter__.return_value = response
        response.__exit__.return_value = False

        with mock.patch("factor_attribution.urlopen", return_value=response):
            with self.assertRaises(ExternalFactorDownloadError):
                _download_kf_zip_bytes()

    def test_load_external_us_factors_parses_percent_values_and_dates_from_zip_payload(self):
        csv_text = (
            "This line is ignored\n"
            ",Mkt-RF,SMB,HML,RMW,CMA,RF\n"
            "20260102,1.00,0.50,-0.25,0.10,-0.05,0.02\n"
            "20260105,-0.20,0.10,0.30,-0.15,0.05,0.02\n"
            "\n"
        )
        zip_bytes = self._make_zip_bytes(
            "F-F_Research_Data_5_Factors_2x3_daily.csv",
            csv_text,
        )

        with tempfile.TemporaryDirectory() as tmpdir:
            with mock.patch(
                "factor_attribution._download_kf_zip_bytes",
                return_value=zip_bytes,
            ):
                factors = load_external_us_factors(cache_dir=Path(tmpdir))

        self.assertListEqual(
            list(factors.columns),
            ["MKT_RF", "SMB", "HML", "RMW", "CMA", "RF"],
        )
        self.assertAlmostEqual(factors.iloc[0]["MKT_RF"], 0.01)
        self.assertAlmostEqual(factors.iloc[0]["RF"], 0.0002)
        self.assertEqual(str(factors.index[0].date()), "2026-01-02")

    def test_load_external_us_factors_falls_back_to_cache_when_download_fails(self):
        cached = pd.DataFrame(
            {
                "MKT_RF": [0.01],
                "SMB": [0.0],
                "HML": [0.0],
                "RMW": [0.0],
                "CMA": [0.0],
                "RF": [0.0001],
            },
            index=pd.to_datetime(["2026-01-02"]),
        )

        with tempfile.TemporaryDirectory() as tmpdir:
            cache_dir = Path(tmpdir)
            cached.to_csv(cache_dir / "ff5_us_daily.csv")
            with mock.patch("factor_attribution.urlopen", side_effect=socket.gaierror("dns failed")):
                with self.assertWarnsRegex(UserWarning, "cached data"):
                    factors = load_external_us_factors(cache_dir=cache_dir)

        self.assertEqual(len(factors), 1)
        self.assertAlmostEqual(factors.iloc[0]["MKT_RF"], 0.01)

    def test_load_external_us_factors_falls_back_to_cache_when_download_read_is_incomplete(self):
        cached = pd.DataFrame(
            {
                "MKT_RF": [0.01],
                "SMB": [0.0],
                "HML": [0.0],
                "RMW": [0.0],
                "CMA": [0.0],
                "RF": [0.0001],
            },
            index=pd.to_datetime(["2026-01-02"]),
        )
        response = mock.MagicMock()
        response.read.side_effect = http.client.IncompleteRead(b"partial", 10)
        response.__enter__.return_value = response
        response.__exit__.return_value = False

        with tempfile.TemporaryDirectory() as tmpdir:
            cache_dir = Path(tmpdir)
            cached.to_csv(cache_dir / "ff5_us_daily.csv")
            with mock.patch("factor_attribution.urlopen", return_value=response):
                with self.assertWarnsRegex(UserWarning, "cached data"):
                    factors = load_external_us_factors(cache_dir=cache_dir)

        self.assertEqual(len(factors), 1)
        self.assertAlmostEqual(factors.iloc[0]["MKT_RF"], 0.01)

    def test_load_external_us_factors_falls_back_to_cache_when_http_status_line_is_bad(self):
        cached = pd.DataFrame(
            {
                "MKT_RF": [0.01],
                "SMB": [0.0],
                "HML": [0.0],
                "RMW": [0.0],
                "CMA": [0.0],
                "RF": [0.0001],
            },
            index=pd.to_datetime(["2026-01-02"]),
        )

        with tempfile.TemporaryDirectory() as tmpdir:
            cache_dir = Path(tmpdir)
            cached.to_csv(cache_dir / "ff5_us_daily.csv")
            with mock.patch("factor_attribution.urlopen", side_effect=http.client.BadStatusLine("HTTP/1.1 ???")):
                with self.assertWarnsRegex(UserWarning, "cached data"):
                    factors = load_external_us_factors(cache_dir=cache_dir)

        self.assertEqual(len(factors), 1)
        self.assertAlmostEqual(factors.iloc[0]["MKT_RF"], 0.01)

    def test_parse_kf_daily_csv_raises_external_factor_format_error_for_missing_header(self):
        zip_bytes = self._make_zip_bytes(
            "F-F_Research_Data_5_Factors_2x3_daily.csv",
            "not the expected file format\n20260102,1.00\n",
        )

        with self.assertRaises(ExternalFactorFormatError):
            _parse_kf_daily_csv(zip_bytes)

    def test_load_external_us_factors_warns_and_falls_back_to_cache_when_source_format_is_invalid(self):
        cached = pd.DataFrame(
            {
                "MKT_RF": [0.01],
                "SMB": [0.0],
                "HML": [0.0],
                "RMW": [0.0],
                "CMA": [0.0],
                "RF": [0.0001],
            },
            index=pd.to_datetime(["2026-01-02"]),
        )

        with tempfile.TemporaryDirectory() as tmpdir:
            cache_dir = Path(tmpdir)
            cached.to_csv(cache_dir / "ff5_us_daily.csv")
            malformed_zip_bytes = self._make_zip_bytes(
                "F-F_Research_Data_5_Factors_2x3_daily.csv",
                "not the expected file format\n20260102,1.00\n",
            )
            with mock.patch(
                "factor_attribution._download_kf_zip_bytes",
                return_value=malformed_zip_bytes,
            ):
                with self.assertWarnsRegex(UserWarning, "cached data"):
                    factors = load_external_us_factors(cache_dir=cache_dir)

        self.assertEqual(len(factors), 1)
        self.assertAlmostEqual(factors.iloc[0]["MKT_RF"], 0.01)

    def test_load_external_us_factors_warns_and_falls_back_to_cache_when_zip_is_invalid(self):
        cached = pd.DataFrame(
            {
                "MKT_RF": [0.01],
                "SMB": [0.0],
                "HML": [0.0],
                "RMW": [0.0],
                "CMA": [0.0],
                "RF": [0.0001],
            },
            index=pd.to_datetime(["2026-01-02"]),
        )

        with tempfile.TemporaryDirectory() as tmpdir:
            cache_dir = Path(tmpdir)
            cached.to_csv(cache_dir / "ff5_us_daily.csv")
            with mock.patch(
                "factor_attribution._download_kf_zip_bytes",
                return_value=b"not-a-zip-file",
            ):
                with self.assertWarnsRegex(UserWarning, "cached data"):
                    factors = load_external_us_factors(cache_dir=cache_dir)

        self.assertEqual(len(factors), 1)
        self.assertAlmostEqual(factors.iloc[0]["MKT_RF"], 0.01)

    def test_load_external_us_factors_surfaces_cache_write_failures(self):
        csv_text = (
            "This line is ignored\n"
            ",Mkt-RF,SMB,HML,RMW,CMA,RF\n"
            "20260102,1.00,0.50,-0.25,0.10,-0.05,0.02\n"
            "\n"
        )
        zip_bytes = self._make_zip_bytes(
            "F-F_Research_Data_5_Factors_2x3_daily.csv",
            csv_text,
        )

        with tempfile.TemporaryDirectory() as tmpdir:
            with mock.patch(
                "factor_attribution._download_kf_zip_bytes",
                return_value=zip_bytes,
            ):
                with mock.patch("pandas.DataFrame.to_csv", side_effect=OSError("disk full")):
                    with self.assertRaises(OSError):
                        load_external_us_factors(cache_dir=Path(tmpdir))

    def test_load_external_us_factors_does_not_swallow_unrelated_local_failures(self):
        csv_text = (
            "This line is ignored\n"
            ",Mkt-RF,SMB,HML,RMW,CMA,RF\n"
            "20260102,1.00,0.50,-0.25,0.10,-0.05,0.02\n"
            "\n"
        )
        zip_bytes = self._make_zip_bytes(
            "F-F_Research_Data_5_Factors_2x3_daily.csv",
            csv_text,
        )
        cached = pd.DataFrame(
            {
                "MKT_RF": [0.01],
                "SMB": [0.0],
                "HML": [0.0],
                "RMW": [0.0],
                "CMA": [0.0],
                "RF": [0.0001],
            },
            index=pd.to_datetime(["2026-01-02"]),
        )

        with tempfile.TemporaryDirectory() as tmpdir:
            cache_dir = Path(tmpdir)
            cached.to_csv(cache_dir / "ff5_us_daily.csv")
            with mock.patch(
                "factor_attribution._download_kf_zip_bytes",
                return_value=zip_bytes,
            ):
                with mock.patch(
                    "factor_attribution._parse_kf_daily_csv",
                    side_effect=RuntimeError("unexpected local bug"),
                ):
                    with self.assertRaises(RuntimeError):
                        load_external_us_factors(cache_dir=cache_dir)

    def _make_zip_bytes(self, filename: str, contents: str) -> bytes:
        buffer = io.BytesIO()
        with zipfile.ZipFile(buffer, mode="w") as archive:
            archive.writestr(filename, contents)
        return buffer.getvalue()


class LocalFactorConstructionTests(unittest.TestCase):
    def test_build_extension_factors_returns_expected_columns_with_non_null_values_after_warmup(self):
        prices = self._make_price_frame(benchmark="SPY")

        factors = build_extension_factors(prices, benchmark="SPY", market="us")

        self.assertListEqual(list(factors.columns), ["MOM", "LOWVOL", "RECOVERY"])
        self.assertTrue(factors.iloc[260:].notna().all().all())
        self.assertGreater(factors.iloc[260:].abs().sum().sum(), 0.0)

    def test_build_proxy_core_factors_returns_expected_columns_with_non_null_values_after_warmup(self):
        prices = self._make_price_frame(benchmark="000300.SS")

        factors = build_proxy_core_factors(prices, benchmark="000300.SS", market="cn")

        self.assertListEqual(
            list(factors.columns),
            ["MKT", "SMB_PROXY", "HML_PROXY", "RMW_PROXY", "CMA_PROXY"],
        )
        self.assertTrue(factors.iloc[260:].notna().all().all())
        self.assertGreater(factors.iloc[260:].abs().sum().sum(), 0.0)

    def test_build_extension_factors_does_not_use_future_prices(self):
        prices = self._make_price_frame(benchmark="SPY")
        mutated = prices.copy()
        future_start = prices.index[280]
        mutated.loc[future_start:, "A"] = mutated.loc[future_start:, "A"] * 1.8
        mutated.loc[future_start:, "B"] = mutated.loc[future_start:, "B"] * 0.4

        original = build_extension_factors(prices, benchmark="SPY", market="us")
        changed = build_extension_factors(mutated, benchmark="SPY", market="us")

        comparison_end = prices.index[279]
        pd.testing.assert_frame_equal(original.loc[:comparison_end], changed.loc[:comparison_end])
        self.assertGreater(
            (original.loc[future_start:] - changed.loc[future_start:]).abs().sum().sum(),
            0.0,
        )

    def test_build_proxy_core_factors_market_branch_does_not_use_future_benchmark_prices(self):
        prices = self._make_price_frame(benchmark="000300.SS")
        mutated = prices.copy()
        future_start = prices.index[280]
        mutated.loc[future_start:, "000300.SS"] = mutated.loc[future_start:, "000300.SS"] * 1.4

        original = build_proxy_core_factors(prices, benchmark="000300.SS", market="cn")
        changed = build_proxy_core_factors(mutated, benchmark="000300.SS", market="cn")

        comparison_end = prices.index[279]
        pd.testing.assert_series_equal(
            original.loc[:comparison_end, "MKT"],
            changed.loc[:comparison_end, "MKT"],
            check_names=False,
        )
        proxy_columns = ["SMB_PROXY", "HML_PROXY", "RMW_PROXY", "CMA_PROXY"]
        pd.testing.assert_frame_equal(
            original.loc[:, proxy_columns],
            changed.loc[:, proxy_columns],
        )
        self.assertGreater(
            (original.loc[future_start:, "MKT"] - changed.loc[future_start:, "MKT"]).abs().sum(),
            0.0,
        )

    def test_build_proxy_core_factors_proxy_columns_do_not_use_future_stock_prices(self):
        prices = self._make_price_frame(benchmark="000300.SS")
        mutated = prices.copy()
        future_start = prices.index[280]
        mutated.loc[future_start:, "C"] = mutated.loc[future_start:, "C"] * 0.35
        mutated.loc[future_start:, "D"] = mutated.loc[future_start:, "D"] * 1.6

        original = build_proxy_core_factors(prices, benchmark="000300.SS", market="cn")
        changed = build_proxy_core_factors(mutated, benchmark="000300.SS", market="cn")

        comparison_end = prices.index[279]
        proxy_columns = ["SMB_PROXY", "HML_PROXY", "RMW_PROXY", "CMA_PROXY"]
        pd.testing.assert_frame_equal(
            original.loc[:comparison_end, proxy_columns],
            changed.loc[:comparison_end, proxy_columns],
        )
        self.assertGreater(
            (original.loc[future_start:, proxy_columns] - changed.loc[future_start:, proxy_columns]).abs().sum().sum(),
            0.0,
        )

    def test_build_proxy_core_factors_falls_back_to_equal_weight_market_when_benchmark_missing(self):
        prices_with_benchmark = self._make_price_frame(benchmark="CN_BENCH")
        prices = prices_with_benchmark.drop(columns=["CN_BENCH"])

        factors = build_proxy_core_factors(prices, benchmark="000300.SS", market="cn")
        reference = build_proxy_core_factors(prices_with_benchmark, benchmark="CN_BENCH", market="cn")

        expected_market = prices.pct_change().mean(axis=1)
        pd.testing.assert_series_equal(factors["MKT"], expected_market, check_names=False)
        self.assertListEqual(
            list(factors.columns),
            ["MKT", "SMB_PROXY", "HML_PROXY", "RMW_PROXY", "CMA_PROXY"],
        )
        self.assertTrue(factors.iloc[260:][["SMB_PROXY", "HML_PROXY", "RMW_PROXY", "CMA_PROXY"]].notna().all().all())
        self.assertGreater(
            factors.iloc[260:][["SMB_PROXY", "HML_PROXY", "RMW_PROXY", "CMA_PROXY"]].abs().sum().sum(),
            0.0,
        )
        pd.testing.assert_frame_equal(
            factors[["SMB_PROXY", "HML_PROXY", "RMW_PROXY", "CMA_PROXY"]],
            reference[["SMB_PROXY", "HML_PROXY", "RMW_PROXY", "CMA_PROXY"]],
        )

    def _make_price_frame(self, benchmark: str) -> pd.DataFrame:
        dates = pd.date_range("2025-01-01", periods=320, freq="B")
        steps = np.arange(len(dates), dtype=float)
        symbols = [
            ("A", 45.0, 0.0006, 0.030, 19.0, 0.1),
            ("B", 60.0, 0.0003, 0.025, 23.0, 0.8),
            ("C", 75.0, -0.0002, 0.035, 17.0, 1.4),
            ("D", 90.0, 0.0008, 0.020, 29.0, 0.5),
            ("E", 55.0, -0.0001, 0.028, 31.0, 1.9),
            ("F", 70.0, 0.0005, 0.032, 21.0, 2.5),
        ]
        data = {}
        for symbol, base, drift, amplitude, frequency, phase in symbols:
            log_path = drift * steps + amplitude * np.sin(steps / frequency + phase)
            data[symbol] = base * np.exp(log_path)

        benchmark_path = 0.0004 * steps + 0.018 * np.sin(steps / 27.0 + 0.3)
        data[benchmark] = 250.0 * np.exp(benchmark_path)
        return pd.DataFrame(data, index=dates)


class RegressionTests(unittest.TestCase):
    def test_run_factor_regression_recovers_known_coefficients(self):
        dates = pd.date_range("2024-01-01", periods=300, freq="B")
        angles = np.linspace(0.0, 18.0, len(dates))
        factors = pd.DataFrame(
            {
                "MKT_RF": 0.012 * np.sin(angles),
                "SMB": 0.007 * np.cos(angles * 0.7) + np.linspace(-0.002, 0.003, len(dates)),
                "RF": np.full(len(dates), 0.0001),
            },
            index=dates,
        )
        factors.loc[dates[:4], "SMB"] = np.nan

        strategy = (
            0.0005
            + 1.2 * factors["MKT_RF"]
            + 0.4 * factors["SMB"]
            + factors["RF"]
        )

        result = run_factor_regression(
            strategy,
            factors,
            factor_cols=["MKT_RF", "SMB"],
            risk_free_col="RF",
        )

        self.assertAlmostEqual(result["alpha_daily"], 0.0005, places=6)
        self.assertAlmostEqual(result["betas"]["MKT_RF"], 1.2, places=6)
        self.assertAlmostEqual(result["betas"]["SMB"], 0.4, places=6)
        self.assertGreater(result["r_squared"], 0.999999)
        self.assertEqual(result["start_date"], "2024-01-05")
        self.assertEqual(result["end_date"], "2025-02-21")
        self.assertEqual(result["n_obs"], 296)

    def test_run_factor_regression_rejects_underdetermined_designs(self):
        dates = pd.date_range("2024-01-01", periods=2, freq="B")
        factors = pd.DataFrame(
            {
                "MKT_RF": [0.01, -0.02],
                "SMB": [0.005, 0.004],
            },
            index=dates,
        )
        strategy = pd.Series([0.012, -0.018], index=dates)

        with self.assertRaisesRegex(ValueError, "Insufficient observations"):
            run_factor_regression(strategy, factors, factor_cols=["MKT_RF", "SMB"])

    def test_run_factor_regression_allows_square_full_rank_design_without_inference(self):
        dates = pd.date_range("2024-01-01", periods=3, freq="B")
        factors = pd.DataFrame(
            {
                "MKT_RF": [0.0, 1.0, 0.0],
                "SMB": [0.0, 0.0, 1.0],
            },
            index=dates,
        )
        strategy = pd.Series([0.0005, 1.2005, -0.3995], index=dates)

        result = run_factor_regression(strategy, factors, factor_cols=["MKT_RF", "SMB"])

        self.assertAlmostEqual(result["alpha_daily"], 0.0005, places=10)
        self.assertAlmostEqual(result["betas"]["MKT_RF"], 1.2, places=10)
        self.assertAlmostEqual(result["betas"]["SMB"], -0.4, places=10)
        self.assertEqual(result["r_squared"], 1.0)
        self.assertTrue(np.isnan(result["alpha_t_stat"]))
        self.assertTrue(np.isnan(result["alpha_p_value"]))
        self.assertTrue(np.isnan(result["t_stats"]["MKT_RF"]))
        self.assertTrue(np.isnan(result["t_stats"]["SMB"]))
        self.assertTrue(np.isnan(result["p_values"]["MKT_RF"]))
        self.assertTrue(np.isnan(result["p_values"]["SMB"]))
        self.assertTrue(np.isnan(result["adj_r_squared"]))
        self.assertAlmostEqual(result["residual_vol_ann"], 0.0, places=12)

    def test_run_factor_regression_single_observation_intercept_only_has_zero_residual_vol(self):
        dates = pd.date_range("2024-01-01", periods=1, freq="B")
        factors = pd.DataFrame(index=dates)
        strategy = pd.Series([0.0015], index=dates)

        result = run_factor_regression(strategy, factors, factor_cols=[])

        self.assertAlmostEqual(result["alpha_daily"], 0.0015, places=12)
        self.assertEqual(result["betas"], {})
        self.assertEqual(result["t_stats"], {})
        self.assertEqual(result["p_values"], {})
        self.assertEqual(result["r_squared"], 0.0)
        self.assertTrue(np.isnan(result["alpha_t_stat"]))
        self.assertTrue(np.isnan(result["alpha_p_value"]))
        self.assertTrue(np.isnan(result["adj_r_squared"]))
        self.assertEqual(result["n_obs"], 1)
        self.assertAlmostEqual(result["residual_vol_ann"], 0.0, places=12)

    def test_run_factor_regression_rejects_rank_deficient_designs(self):
        dates = pd.date_range("2024-01-01", periods=6, freq="B")
        market = np.array([0.01, -0.02, 0.015, 0.005, -0.01, 0.02])
        factors = pd.DataFrame(
            {
                "MKT_RF": market,
                "SMB": market * 2.0,
            },
            index=dates,
        )
        strategy = pd.Series(0.0005 + 1.0 * factors["MKT_RF"] + 0.5 * factors["SMB"], index=dates)

        with self.assertRaisesRegex(ValueError, "rank-deficient"):
            run_factor_regression(strategy, factors, factor_cols=["MKT_RF", "SMB"])

    def test_prepare_factor_models_uses_proxy_family_without_external_us_factors(self):
        dates = pd.date_range("2024-01-01", periods=5, freq="B")
        extension = pd.DataFrame(
            {
                "MOM": np.linspace(0.001, 0.005, len(dates)),
                "LOWVOL": np.linspace(-0.002, 0.002, len(dates)),
                "RECOVERY": np.linspace(0.003, -0.001, len(dates)),
            },
            index=dates,
        )
        proxy = pd.DataFrame(
            {
                "MKT": np.linspace(-0.01, 0.01, len(dates)),
                "SMB_PROXY": np.linspace(0.002, 0.004, len(dates)),
                "HML_PROXY": np.linspace(-0.003, 0.001, len(dates)),
                "RMW_PROXY": np.linspace(0.005, 0.001, len(dates)),
                "CMA_PROXY": np.linspace(-0.004, -0.002, len(dates)),
            },
            index=dates,
        )

        prepared = prepare_factor_models(
            market="us",
            extension_factors=extension,
            proxy_factors=proxy,
            external_factors=None,
        )

        self.assertEqual(prepared["factor_source"], "proxy_only")
        self.assertIsNone(prepared["risk_free_col"])
        self.assertListEqual(list(prepared["models"]), ["proxy"])
        self.assertListEqual(
            prepared["models"]["proxy"],
            ["MKT", "SMB_PROXY", "HML_PROXY", "RMW_PROXY", "CMA_PROXY", "MOM", "LOWVOL", "RECOVERY"],
        )
        self.assertListEqual(
            list(prepared["factor_frame"].columns),
            ["MKT", "SMB_PROXY", "HML_PROXY", "RMW_PROXY", "CMA_PROXY", "MOM", "LOWVOL", "RECOVERY"],
        )


class AttributionIntegrationTests(unittest.TestCase):
    def test_attribute_strategies_exports_standard_model_summary_and_loadings(self):
        dates = pd.date_range("2025-01-01", periods=320, freq="B")
        angles = np.linspace(0.0, 24.0, len(dates))
        factors = pd.DataFrame(
            {
                "MKT_RF": 0.010 * np.sin(angles),
                "SMB": 0.006 * np.cos(angles * 0.7),
                "HML": 0.004 * np.sin(angles * 1.3 + 0.4),
                "RMW": 0.003 * np.cos(angles * 1.1 + 0.2),
                "CMA": 0.002 * np.sin(angles * 0.5 + 0.8),
                "RF": np.full(len(dates), 0.0001),
            },
            index=dates,
        )
        strategy_returns = (
            0.0004
            + 1.10 * factors["MKT_RF"]
            - 0.25 * factors["SMB"]
            + 0.35 * factors["HML"]
            + 0.10 * factors["RMW"]
            - 0.05 * factors["CMA"]
            + factors["RF"]
        )
        benchmark_returns = 0.95 * factors["MKT_RF"] + factors["RF"]
        results = pd.DataFrame(
            {
                "Strategy": 100_000.0 * (1.0 + strategy_returns).cumprod(),
                "SPY (Benchmark)": 100_000.0 * (1.0 + benchmark_returns).cumprod(),
            },
            index=dates,
        )
        prices = self._make_price_frame(dates, benchmark="SPY")

        with tempfile.TemporaryDirectory() as tmpdir:
            summary, loadings = attribute_strategies(
                results_df=results,
                benchmark_label="SPY (Benchmark)",
                benchmark="SPY",
                price_data=prices,
                market="us",
                model_selection="ff5",
                external_factors=factors,
            )
            export_attribution(summary, loadings, tmpdir)

            self.assertTrue((Path(tmpdir) / "summary.csv").exists())
            self.assertTrue((Path(tmpdir) / "loadings.csv").exists())

            exported_summary = pd.read_csv(Path(tmpdir) / "summary.csv")
            exported_loadings = pd.read_csv(Path(tmpdir) / "loadings.csv")

        self.assertEqual(len(summary), 1)
        self.assertListEqual(
            list(summary.columns),
            [
                "strategy",
                "market",
                "model",
                "factor_source",
                "proxy_only",
                "beta_semantics",
                "start_date",
                "end_date",
                "n_obs",
                "alpha_daily",
                "alpha_ann",
                "alpha_t_stat",
                "alpha_p_value",
                "r_squared",
                "adj_r_squared",
                "residual_vol_ann",
                "beta_mkt",
                "beta_smb",
                "beta_hml",
                "beta_rmw",
                "beta_cma",
                "beta_mom",
                "beta_lowvol",
                "beta_recovery",
            ],
        )
        self.assertEqual(summary.loc[0, "strategy"], "Strategy")
        self.assertEqual(summary.loc[0, "model"], "ff5")
        self.assertEqual(summary.loc[0, "factor_source"], "external+local")
        self.assertFalse(bool(summary.loc[0, "proxy_only"]))
        self.assertEqual(
            json.loads(summary.loc[0, "beta_semantics"]),
            {
                "beta_mkt": "MKT_RF",
                "beta_smb": "SMB",
                "beta_hml": "HML",
                "beta_rmw": "RMW",
                "beta_cma": "CMA",
                "beta_mom": "MOM",
                "beta_lowvol": "LOWVOL",
                "beta_recovery": "RECOVERY",
            },
        )
        self.assertAlmostEqual(summary.loc[0, "beta_mkt"], 1.10, places=3)
        self.assertAlmostEqual(summary.loc[0, "beta_smb"], -0.25, places=3)
        self.assertAlmostEqual(summary.loc[0, "beta_hml"], 0.35, places=3)
        self.assertTrue(np.isnan(summary.loc[0, "beta_mom"]))

        self.assertListEqual(
            list(loadings.columns),
            ["strategy", "market", "model", "factor_source", "proxy_only", "factor", "beta", "t_stat", "p_value"],
        )
        self.assertEqual(set(loadings["factor"]), {"MKT_RF", "SMB", "HML", "RMW", "CMA"})
        self.assertEqual(len(loadings), 5)
        pd.testing.assert_frame_equal(summary, exported_summary, check_dtype=False)
        pd.testing.assert_frame_equal(loadings, exported_loadings, check_dtype=False)

    def test_attribute_strategies_uses_proxy_model_for_cn_runs(self):
        dates = pd.date_range("2025-01-01", periods=320, freq="B")
        prices = self._make_price_frame(dates, benchmark="000300.SS")
        returns = prices["000300.SS"].pct_change().fillna(0.0) * 0.7 + 0.0002
        results = pd.DataFrame(
            {
                "Strategy": 100_000.0 * (1.0 + returns).cumprod(),
                "CSI 300 (Benchmark)": 100_000.0 * (1.0 + prices["000300.SS"].pct_change().fillna(0.0)).cumprod(),
            },
            index=dates,
        )

        summary, loadings = attribute_strategies(
            results_df=results,
            benchmark_label="CSI 300 (Benchmark)",
            benchmark="000300.SS",
            price_data=prices,
            market="cn",
            model_selection="ff5",
            external_factors=None,
        )

        self.assertEqual(len(summary), 1)
        self.assertEqual(summary.loc[0, "model"], "proxy")
        self.assertEqual(summary.loc[0, "factor_source"], "proxy_only")
        self.assertTrue(bool(summary.loc[0, "proxy_only"]))
        self.assertEqual(
            json.loads(summary.loc[0, "beta_semantics"]),
            {
                "beta_mkt": "MKT",
                "beta_smb": "SMB_PROXY",
                "beta_hml": "HML_PROXY",
                "beta_rmw": "RMW_PROXY",
                "beta_cma": "CMA_PROXY",
                "beta_mom": "MOM",
                "beta_lowvol": "LOWVOL",
                "beta_recovery": "RECOVERY",
            },
        )
        self.assertNotIn("beta_smb_proxy", summary.columns)
        self.assertNotIn("beta_hml_proxy", summary.columns)
        self.assertNotIn("beta_rmw_proxy", summary.columns)
        self.assertNotIn("beta_cma_proxy", summary.columns)
        self.assertFalse(np.isnan(summary.loc[0, "beta_smb"]))
        self.assertFalse(np.isnan(summary.loc[0, "beta_hml"]))
        self.assertFalse(np.isnan(summary.loc[0, "beta_rmw"]))
        self.assertFalse(np.isnan(summary.loc[0, "beta_cma"]))
        self.assertEqual(
            set(loadings["factor"]),
            {"MKT", "SMB_PROXY", "HML_PROXY", "RMW_PROXY", "CMA_PROXY", "MOM", "LOWVOL", "RECOVERY"},
        )
        loadings_by_factor = loadings.set_index("factor")["beta"]
        semantics = json.loads(summary.loc[0, "beta_semantics"])
        self.assertAlmostEqual(summary.loc[0, "beta_mkt"], loadings_by_factor[semantics["beta_mkt"]], places=10)
        self.assertAlmostEqual(summary.loc[0, "beta_smb"], loadings_by_factor[semantics["beta_smb"]], places=10)
        self.assertAlmostEqual(summary.loc[0, "beta_hml"], loadings_by_factor[semantics["beta_hml"]], places=10)
        self.assertAlmostEqual(summary.loc[0, "beta_rmw"], loadings_by_factor[semantics["beta_rmw"]], places=10)
        self.assertAlmostEqual(summary.loc[0, "beta_cma"], loadings_by_factor[semantics["beta_cma"]], places=10)

    def test_attribute_strategies_without_benchmark_uses_equal_weight_proxy_market(self):
        dates = pd.date_range("2025-01-01", periods=320, freq="B")
        prices = self._make_price_frame(dates, benchmark="000300.SS").drop(columns=["000300.SS"])
        equal_weight_returns = prices.pct_change().mean(axis=1).fillna(0.0)
        results = pd.DataFrame(
            {
                "Strategy": 100_000.0 * (1.0 + 0.0002 + 0.8 * equal_weight_returns).cumprod(),
                "External Benchmark": 100_000.0 * (1.0 + 0.0001 + 0.6 * equal_weight_returns).cumprod(),
            },
            index=dates,
        )

        summary_missing, loadings_missing = attribute_strategies(
            results_df=results,
            benchmark_label="External Benchmark",
            benchmark=None,
            price_data=prices,
            market="cn",
            model_selection="ff5",
            external_factors=None,
        )
        summary_explicit, loadings_explicit = attribute_strategies(
            results_df=results,
            benchmark_label="External Benchmark",
            benchmark="MISSING_BENCHMARK",
            price_data=prices,
            market="cn",
            model_selection="ff5",
            external_factors=None,
        )

        pd.testing.assert_frame_equal(summary_missing, summary_explicit, check_dtype=False)
        pd.testing.assert_frame_equal(loadings_missing, loadings_explicit, check_dtype=False)

    def test_print_attribution_summary_prints_compact_table_and_interpretation(self):
        summary = pd.DataFrame(
            [
                {
                    "strategy": "Strategy",
                    "market": "us",
                    "model": "ff5",
                    "factor_source": "external+local",
                    "proxy_only": False,
                    "start_date": "2025-01-02",
                    "end_date": "2026-03-24",
                    "n_obs": 319,
                    "alpha_daily": 0.0004,
                    "alpha_ann": 0.1008,
                    "alpha_t_stat": 2.1,
                    "alpha_p_value": 0.04,
                    "r_squared": 0.82,
                    "adj_r_squared": 0.81,
                    "residual_vol_ann": 0.12,
                    "beta_mkt": 1.05,
                    "beta_smb": -0.20,
                    "beta_hml": 0.30,
                    "beta_rmw": 0.05,
                    "beta_cma": np.nan,
                    "beta_mom": np.nan,
                    "beta_lowvol": np.nan,
                    "beta_recovery": np.nan,
                }
            ]
        )

        buffer = io.StringIO()
        with contextlib.redirect_stdout(buffer):
            print_attribution_summary(summary)

        output = buffer.getvalue()
        self.assertIn("Factor attribution", output)
        self.assertIn("Strategy", output)
        self.assertIn("ff5", output)
        self.assertIn("alpha_ann", output)
        self.assertIn("Interpretation", output)

    def test_print_attribution_summary_keeps_proxy_factor_labels_in_output(self):
        summary = pd.DataFrame(
            [
                {
                    "strategy": "Strategy",
                    "market": "cn",
                    "model": "proxy",
                    "factor_source": "proxy_only",
                    "proxy_only": True,
                    "beta_semantics": json.dumps(
                        {
                            "beta_mkt": "MKT",
                            "beta_smb": "SMB_PROXY",
                            "beta_hml": "HML_PROXY",
                            "beta_rmw": "RMW_PROXY",
                            "beta_cma": "CMA_PROXY",
                            "beta_mom": "MOM",
                            "beta_lowvol": "LOWVOL",
                            "beta_recovery": "RECOVERY",
                        }
                    ),
                    "start_date": "2025-01-02",
                    "end_date": "2026-03-24",
                    "n_obs": 319,
                    "alpha_daily": 0.0002,
                    "alpha_ann": 0.0504,
                    "alpha_t_stat": 1.5,
                    "alpha_p_value": 0.12,
                    "r_squared": 0.72,
                    "adj_r_squared": 0.70,
                    "residual_vol_ann": 0.14,
                    "beta_mkt": 0.85,
                    "beta_smb": -0.30,
                    "beta_hml": 0.25,
                    "beta_rmw": 0.10,
                    "beta_cma": -0.05,
                    "beta_mom": 0.20,
                    "beta_lowvol": np.nan,
                    "beta_recovery": np.nan,
                }
            ]
        )

        buffer = io.StringIO()
        with contextlib.redirect_stdout(buffer):
            print_attribution_summary(summary)

        output = buffer.getvalue()
        self.assertIn("beta_smb_proxy", output)
        self.assertIn("beta_hml_proxy", output)
        self.assertIn("SMB_PROXY", output)
        self.assertNotIn(" beta_smb ", output)

    def test_print_attribution_summary_ignores_malformed_proxy_beta_semantics(self):
        summary = pd.DataFrame(
            [
                {
                    "strategy": "Strategy",
                    "market": "cn",
                    "model": "proxy",
                    "factor_source": "proxy_only",
                    "proxy_only": True,
                    "beta_semantics": "{not-json",
                    "start_date": "2025-01-02",
                    "end_date": "2026-03-24",
                    "n_obs": 319,
                    "alpha_daily": 0.0002,
                    "alpha_ann": 0.0504,
                    "alpha_t_stat": 1.5,
                    "alpha_p_value": 0.12,
                    "r_squared": 0.72,
                    "adj_r_squared": 0.70,
                    "residual_vol_ann": 0.14,
                    "beta_mkt": 0.85,
                    "beta_smb": -0.30,
                    "beta_hml": 0.25,
                    "beta_rmw": 0.10,
                    "beta_cma": -0.05,
                    "beta_mom": 0.20,
                    "beta_lowvol": np.nan,
                    "beta_recovery": np.nan,
                }
            ]
        )

        buffer = io.StringIO()
        with contextlib.redirect_stdout(buffer):
            print_attribution_summary(summary)

        output = buffer.getvalue()
        self.assertIn("Proxy factor attribution", output)
        self.assertIn("beta_smb_proxy", output)
        self.assertIn("SMB_PROXY", output)
        self.assertNotIn(" beta_smb ", output)

    def test_print_attribution_summary_honors_complete_noncanonical_beta_semantics(self):
        summary = pd.DataFrame(
            [
                {
                    "strategy": "US Strategy",
                    "market": "us",
                    "model": "ff5",
                    "factor_source": "external+local",
                    "proxy_only": False,
                    "beta_semantics": json.dumps(
                        {
                            "beta_mkt": "MARKET_EXCESS",
                            "beta_smb": "SIZE",
                            "beta_hml": "VALUE",
                            "beta_rmw": "QUALITY",
                            "beta_cma": "INVESTMENT",
                            "beta_mom": "MOMENTUM",
                            "beta_lowvol": "MINVOL",
                            "beta_recovery": "BOUNCE",
                        }
                    ),
                    "start_date": "2025-01-02",
                    "end_date": "2026-03-24",
                    "n_obs": 319,
                    "alpha_daily": 0.0004,
                    "alpha_ann": 0.1008,
                    "alpha_t_stat": 2.1,
                    "alpha_p_value": 0.04,
                    "r_squared": 0.82,
                    "adj_r_squared": 0.81,
                    "residual_vol_ann": 0.12,
                    "beta_mkt": 1.05,
                    "beta_smb": -0.20,
                    "beta_hml": 0.30,
                    "beta_rmw": 0.05,
                    "beta_cma": np.nan,
                    "beta_mom": np.nan,
                    "beta_lowvol": np.nan,
                    "beta_recovery": np.nan,
                },
                {
                    "strategy": "CN Strategy",
                    "market": "cn",
                    "model": "proxy",
                    "factor_source": "proxy_only",
                    "proxy_only": True,
                    "beta_semantics": json.dumps(
                        {
                            "beta_mkt": "LOCAL_MARKET",
                            "beta_smb": "SIZE_PROXY_CUSTOM",
                            "beta_hml": "VALUE_PROXY_CUSTOM",
                            "beta_rmw": "QUALITY_PROXY_CUSTOM",
                            "beta_cma": "INVEST_PROXY_CUSTOM",
                            "beta_mom": "TREND",
                            "beta_lowvol": "DEFENSIVE",
                            "beta_recovery": "RECOVERY_PROXY",
                        }
                    ),
                    "start_date": "2025-01-02",
                    "end_date": "2026-03-24",
                    "n_obs": 319,
                    "alpha_daily": 0.0002,
                    "alpha_ann": 0.0504,
                    "alpha_t_stat": 1.5,
                    "alpha_p_value": 0.12,
                    "r_squared": 0.72,
                    "adj_r_squared": 0.70,
                    "residual_vol_ann": 0.14,
                    "beta_mkt": 0.85,
                    "beta_smb": -0.30,
                    "beta_hml": 0.25,
                    "beta_rmw": 0.10,
                    "beta_cma": -0.05,
                    "beta_mom": 0.20,
                    "beta_lowvol": np.nan,
                    "beta_recovery": np.nan,
                },
            ]
        )

        buffer = io.StringIO()
        with contextlib.redirect_stdout(buffer):
            print_attribution_summary(summary)

        output = buffer.getvalue()
        self.assertIn("Standard factor attribution", output)
        self.assertIn("Proxy factor attribution", output)
        self.assertIn("beta_market_excess", output)
        self.assertIn("beta_size_proxy_custom", output)
        self.assertIn("MARKET_EXCESS 1.05", output)
        self.assertIn("SIZE_PROXY_CUSTOM -0.30", output)
        self.assertNotIn("MKT_RF 1.05", output)
        self.assertNotIn("SMB_PROXY -0.30", output)

    def test_print_attribution_summary_ignores_duplicate_beta_semantics_labels(self):
        summary = pd.DataFrame(
            [
                {
                    "strategy": "US Strategy",
                    "market": "us",
                    "model": "ff5",
                    "factor_source": "external+local",
                    "proxy_only": False,
                    "beta_semantics": json.dumps(
                        {
                            "beta_mkt": "DUPLICATE",
                            "beta_smb": "DUPLICATE",
                            "beta_hml": "VALUE",
                            "beta_rmw": "QUALITY",
                            "beta_cma": "INVESTMENT",
                            "beta_mom": "MOMENTUM",
                            "beta_lowvol": "MINVOL",
                            "beta_recovery": "BOUNCE",
                        }
                    ),
                    "start_date": "2025-01-02",
                    "end_date": "2026-03-24",
                    "n_obs": 319,
                    "alpha_daily": 0.0004,
                    "alpha_ann": 0.1008,
                    "alpha_t_stat": 2.1,
                    "alpha_p_value": 0.04,
                    "r_squared": 0.82,
                    "adj_r_squared": 0.81,
                    "residual_vol_ann": 0.12,
                    "beta_mkt": 1.05,
                    "beta_smb": -0.20,
                    "beta_hml": 0.30,
                    "beta_rmw": 0.05,
                    "beta_cma": np.nan,
                    "beta_mom": np.nan,
                    "beta_lowvol": np.nan,
                    "beta_recovery": np.nan,
                }
            ]
        )

        buffer = io.StringIO()
        with contextlib.redirect_stdout(buffer):
            print_attribution_summary(summary)

        output = buffer.getvalue()
        self.assertIn("beta_smb", output)
        self.assertIn("MKT_RF 1.05", output)
        self.assertNotIn("beta_duplicate", output)
        self.assertNotIn("DUPLICATE 1.05", output)

    def test_print_attribution_summary_ignores_colliding_normalized_beta_semantics_labels(self):
        summary = pd.DataFrame(
            [
                {
                    "strategy": "CN Strategy",
                    "market": "cn",
                    "model": "proxy",
                    "factor_source": "proxy_only",
                    "proxy_only": True,
                    "beta_semantics": json.dumps(
                        {
                            "beta_mkt": "A-B",
                            "beta_smb": "A B",
                            "beta_hml": "VALUE_PROXY_CUSTOM",
                            "beta_rmw": "QUALITY_PROXY_CUSTOM",
                            "beta_cma": "INVEST_PROXY_CUSTOM",
                            "beta_mom": "TREND",
                            "beta_lowvol": "DEFENSIVE",
                            "beta_recovery": "RECOVERY_PROXY",
                        }
                    ),
                    "start_date": "2025-01-02",
                    "end_date": "2026-03-24",
                    "n_obs": 319,
                    "alpha_daily": 0.0002,
                    "alpha_ann": 0.0504,
                    "alpha_t_stat": 1.5,
                    "alpha_p_value": 0.12,
                    "r_squared": 0.72,
                    "adj_r_squared": 0.70,
                    "residual_vol_ann": 0.14,
                    "beta_mkt": 0.85,
                    "beta_smb": -0.30,
                    "beta_hml": 0.25,
                    "beta_rmw": 0.10,
                    "beta_cma": -0.05,
                    "beta_mom": 0.20,
                    "beta_lowvol": np.nan,
                    "beta_recovery": np.nan,
                }
            ]
        )

        buffer = io.StringIO()
        with contextlib.redirect_stdout(buffer):
            print_attribution_summary(summary)

        output = buffer.getvalue()
        self.assertIn("beta_smb_proxy", output)
        self.assertIn("SMB_PROXY -0.30", output)
        self.assertNotIn("beta_a_b", output)
        self.assertNotIn("A B -0.30", output)

    def test_print_attribution_summary_ignores_punctuation_only_beta_semantics_labels(self):
        summary = pd.DataFrame(
            [
                {
                    "strategy": "CN Strategy",
                    "market": "cn",
                    "model": "proxy",
                    "factor_source": "proxy_only",
                    "proxy_only": True,
                    "beta_semantics": json.dumps(
                        {
                            "beta_mkt": "!!!",
                            "beta_smb": "SIZE_PROXY_CUSTOM",
                            "beta_hml": "VALUE_PROXY_CUSTOM",
                            "beta_rmw": "QUALITY_PROXY_CUSTOM",
                            "beta_cma": "INVEST_PROXY_CUSTOM",
                            "beta_mom": "TREND",
                            "beta_lowvol": "DEFENSIVE",
                            "beta_recovery": "RECOVERY_PROXY",
                        }
                    ),
                    "start_date": "2025-01-02",
                    "end_date": "2026-03-24",
                    "n_obs": 319,
                    "alpha_daily": 0.0002,
                    "alpha_ann": 0.0504,
                    "alpha_t_stat": 1.5,
                    "alpha_p_value": 0.12,
                    "r_squared": 0.72,
                    "adj_r_squared": 0.70,
                    "residual_vol_ann": 0.14,
                    "beta_mkt": 0.85,
                    "beta_smb": -0.30,
                    "beta_hml": 0.25,
                    "beta_rmw": 0.10,
                    "beta_cma": -0.05,
                    "beta_mom": 0.20,
                    "beta_lowvol": np.nan,
                    "beta_recovery": np.nan,
                }
            ]
        )

        buffer = io.StringIO()
        with contextlib.redirect_stdout(buffer):
            print_attribution_summary(summary)

        output = buffer.getvalue()
        self.assertIn("beta_smb_proxy", output)
        self.assertIn("SMB_PROXY -0.30", output)
        self.assertIn("MKT 0.85", output)
        self.assertNotIn("beta_size_proxy_custom", output)
        self.assertNotIn("!!! 0.85", output)

    def test_print_attribution_summary_ignores_labels_that_normalize_to_empty_suffix(self):
        summary = pd.DataFrame(
            [
                {
                    "strategy": "US Strategy",
                    "market": "us",
                    "model": "ff5",
                    "factor_source": "external+local",
                    "proxy_only": False,
                    "beta_semantics": json.dumps(
                        {
                            "beta_mkt": "___",
                            "beta_smb": "SIZE",
                            "beta_hml": "VALUE",
                            "beta_rmw": "QUALITY",
                            "beta_cma": "INVESTMENT",
                            "beta_mom": "MOMENTUM",
                            "beta_lowvol": "MINVOL",
                            "beta_recovery": "BOUNCE",
                        }
                    ),
                    "start_date": "2025-01-02",
                    "end_date": "2026-03-24",
                    "n_obs": 319,
                    "alpha_daily": 0.0004,
                    "alpha_ann": 0.1008,
                    "alpha_t_stat": 2.1,
                    "alpha_p_value": 0.04,
                    "r_squared": 0.82,
                    "adj_r_squared": 0.81,
                    "residual_vol_ann": 0.12,
                    "beta_mkt": 1.05,
                    "beta_smb": -0.20,
                    "beta_hml": 0.30,
                    "beta_rmw": 0.05,
                    "beta_cma": np.nan,
                    "beta_mom": np.nan,
                    "beta_lowvol": np.nan,
                    "beta_recovery": np.nan,
                }
            ]
        )

        buffer = io.StringIO()
        with contextlib.redirect_stdout(buffer):
            print_attribution_summary(summary)

        output = buffer.getvalue()
        self.assertIn("beta_smb", output)
        self.assertIn("MKT_RF 1.05", output)
        self.assertNotIn("beta_size", output)
        self.assertNotIn("___ 1.05", output)

    def test_print_attribution_summary_splits_standard_and_proxy_sections_for_mixed_frames(self):
        summary = pd.DataFrame(
            [
                {
                    "strategy": "US Strategy",
                    "market": "us",
                    "model": "ff5",
                    "factor_source": "external+local",
                    "proxy_only": False,
                    "beta_semantics": json.dumps(
                        {
                            "beta_mkt": "MKT_RF",
                            "beta_smb": "SMB",
                            "beta_hml": "HML",
                            "beta_rmw": "RMW",
                            "beta_cma": "CMA",
                            "beta_mom": "MOM",
                            "beta_lowvol": "LOWVOL",
                            "beta_recovery": "RECOVERY",
                        }
                    ),
                    "start_date": "2025-01-02",
                    "end_date": "2026-03-24",
                    "n_obs": 319,
                    "alpha_daily": 0.0004,
                    "alpha_ann": 0.1008,
                    "alpha_t_stat": 2.1,
                    "alpha_p_value": 0.04,
                    "r_squared": 0.82,
                    "adj_r_squared": 0.81,
                    "residual_vol_ann": 0.12,
                    "beta_mkt": 1.05,
                    "beta_smb": -0.20,
                    "beta_hml": 0.30,
                    "beta_rmw": 0.05,
                    "beta_cma": np.nan,
                    "beta_mom": np.nan,
                    "beta_lowvol": np.nan,
                    "beta_recovery": np.nan,
                },
                {
                    "strategy": "CN Strategy",
                    "market": "cn",
                    "model": "proxy",
                    "factor_source": "proxy_only",
                    "proxy_only": True,
                    "beta_semantics": json.dumps(
                        {
                            "beta_mkt": "MKT",
                            "beta_smb": "SMB_PROXY",
                            "beta_hml": "HML_PROXY",
                            "beta_rmw": "RMW_PROXY",
                            "beta_cma": "CMA_PROXY",
                            "beta_mom": "MOM",
                            "beta_lowvol": "LOWVOL",
                            "beta_recovery": "RECOVERY",
                        }
                    ),
                    "start_date": "2025-01-02",
                    "end_date": "2026-03-24",
                    "n_obs": 319,
                    "alpha_daily": 0.0002,
                    "alpha_ann": 0.0504,
                    "alpha_t_stat": 1.5,
                    "alpha_p_value": 0.12,
                    "r_squared": 0.72,
                    "adj_r_squared": 0.70,
                    "residual_vol_ann": 0.14,
                    "beta_mkt": 0.85,
                    "beta_smb": -0.30,
                    "beta_hml": 0.25,
                    "beta_rmw": 0.10,
                    "beta_cma": -0.05,
                    "beta_mom": 0.20,
                    "beta_lowvol": np.nan,
                    "beta_recovery": np.nan,
                },
            ]
        )

        buffer = io.StringIO()
        with contextlib.redirect_stdout(buffer):
            print_attribution_summary(summary)

        output = buffer.getvalue()
        self.assertIn("Standard factor attribution", output)
        self.assertIn("Proxy factor attribution", output)
        self.assertIn("beta_smb_proxy", output)
        self.assertIn("beta_smb ", output)

    def test_print_attribution_summary_ignores_mismatched_beta_semantics_in_mixed_frames(self):
        summary = pd.DataFrame(
            [
                {
                    "strategy": "US Strategy",
                    "market": "us",
                    "model": "ff5",
                    "factor_source": "external+local",
                    "proxy_only": False,
                    "beta_semantics": json.dumps(
                        {
                            "beta_mkt": "MKT",
                            "beta_smb": "SMB_PROXY",
                            "beta_hml": "HML_PROXY",
                            "beta_rmw": "RMW_PROXY",
                            "beta_cma": "CMA_PROXY",
                            "beta_mom": "MOM",
                            "beta_lowvol": "LOWVOL",
                            "beta_recovery": "RECOVERY",
                            "extra": "BAD",
                        }
                    ),
                    "start_date": "2025-01-02",
                    "end_date": "2026-03-24",
                    "n_obs": 319,
                    "alpha_daily": 0.0004,
                    "alpha_ann": 0.1008,
                    "alpha_t_stat": 2.1,
                    "alpha_p_value": 0.04,
                    "r_squared": 0.82,
                    "adj_r_squared": 0.81,
                    "residual_vol_ann": 0.12,
                    "beta_mkt": 1.05,
                    "beta_smb": -0.20,
                    "beta_hml": 0.30,
                    "beta_rmw": 0.05,
                    "beta_cma": np.nan,
                    "beta_mom": np.nan,
                    "beta_lowvol": np.nan,
                    "beta_recovery": np.nan,
                },
                {
                    "strategy": "CN Strategy",
                    "market": "cn",
                    "model": "proxy",
                    "factor_source": "proxy_only",
                    "proxy_only": True,
                    "beta_semantics": json.dumps({"beta_smb": "SMB", "beta_hml": "HML"}),
                    "start_date": "2025-01-02",
                    "end_date": "2026-03-24",
                    "n_obs": 319,
                    "alpha_daily": 0.0002,
                    "alpha_ann": 0.0504,
                    "alpha_t_stat": 1.5,
                    "alpha_p_value": 0.12,
                    "r_squared": 0.72,
                    "adj_r_squared": 0.70,
                    "residual_vol_ann": 0.14,
                    "beta_mkt": 0.85,
                    "beta_smb": -0.30,
                    "beta_hml": 0.25,
                    "beta_rmw": 0.10,
                    "beta_cma": -0.05,
                    "beta_mom": 0.20,
                    "beta_lowvol": np.nan,
                    "beta_recovery": np.nan,
                },
            ]
        )

        buffer = io.StringIO()
        with contextlib.redirect_stdout(buffer):
            print_attribution_summary(summary)

        output = buffer.getvalue()
        self.assertIn("Standard factor attribution", output)
        self.assertIn("Proxy factor attribution", output)
        self.assertIn("MKT_RF 1.05", output)
        self.assertIn("SMB_PROXY -0.30", output)
        self.assertIn("beta_smb_proxy", output)
        self.assertNotIn("HML_PROXY 0.30", output)

    def _make_price_frame(self, dates: pd.DatetimeIndex, benchmark: str) -> pd.DataFrame:
        steps = np.arange(len(dates), dtype=float)
        data = {}
        for symbol, base, drift, amplitude, frequency, phase in (
            ("AAA", 45.0, 0.0005, 0.030, 19.0, 0.1),
            ("BBB", 60.0, 0.0002, 0.025, 23.0, 0.8),
            ("CCC", 75.0, -0.0001, 0.035, 17.0, 1.4),
            ("DDD", 90.0, 0.0007, 0.020, 29.0, 0.5),
            ("EEE", 55.0, -0.0002, 0.028, 31.0, 1.9),
            ("FFF", 70.0, 0.0004, 0.032, 21.0, 2.5),
        ):
            log_path = drift * steps + amplitude * np.sin(steps / frequency + phase)
            data[symbol] = base * np.exp(log_path)

        benchmark_path = 0.0004 * steps + 0.018 * np.sin(steps / 27.0 + 0.3)
        data[benchmark] = 250.0 * np.exp(benchmark_path)
        return pd.DataFrame(data, index=dates)