Guard factor regressions against unidentified models
This commit is contained in:
@@ -293,14 +293,23 @@ def run_factor_regression(
|
|||||||
|
|
||||||
x = regression_frame[factor_cols].astype(float).to_numpy()
|
x = regression_frame[factor_cols].astype(float).to_numpy()
|
||||||
x = np.column_stack([np.ones(len(regression_frame)), x])
|
x = np.column_stack([np.ones(len(regression_frame)), x])
|
||||||
|
n_obs = len(regression_frame)
|
||||||
|
param_count = x.shape[1]
|
||||||
|
if n_obs <= param_count:
|
||||||
|
raise ValueError(
|
||||||
|
f"Insufficient observations for regression: need more than {param_count} rows, got {n_obs}"
|
||||||
|
)
|
||||||
|
|
||||||
|
coefficients, _, rank, _ = np.linalg.lstsq(x, y.to_numpy(), rcond=None)
|
||||||
|
if rank < param_count:
|
||||||
|
raise ValueError(
|
||||||
|
"Regression design matrix is rank-deficient; coefficients are not uniquely identified"
|
||||||
|
)
|
||||||
|
|
||||||
coefficients, _, _, _ = np.linalg.lstsq(x, y.to_numpy(), rcond=None)
|
|
||||||
fitted = x @ coefficients
|
fitted = x @ coefficients
|
||||||
residuals = y.to_numpy() - fitted
|
residuals = y.to_numpy() - fitted
|
||||||
|
|
||||||
n_obs = len(regression_frame)
|
dof = n_obs - param_count
|
||||||
param_count = x.shape[1]
|
|
||||||
dof = max(n_obs - param_count, 1)
|
|
||||||
residual_variance = float((residuals @ residuals) / dof)
|
residual_variance = float((residuals @ residuals) / dof)
|
||||||
covariance = residual_variance * np.linalg.pinv(x.T @ x)
|
covariance = residual_variance * np.linalg.pinv(x.T @ x)
|
||||||
standard_errors = np.sqrt(np.diag(covariance))
|
standard_errors = np.sqrt(np.diag(covariance))
|
||||||
|
|||||||
@@ -461,6 +461,35 @@ class RegressionTests(unittest.TestCase):
|
|||||||
self.assertEqual(result["end_date"], "2025-02-21")
|
self.assertEqual(result["end_date"], "2025-02-21")
|
||||||
self.assertEqual(result["n_obs"], 296)
|
self.assertEqual(result["n_obs"], 296)
|
||||||
|
|
||||||
|
def test_run_factor_regression_rejects_underdetermined_designs(self):
|
||||||
|
dates = pd.date_range("2024-01-01", periods=3, freq="B")
|
||||||
|
factors = pd.DataFrame(
|
||||||
|
{
|
||||||
|
"MKT_RF": [0.01, -0.02, 0.015],
|
||||||
|
"SMB": [0.005, 0.004, -0.001],
|
||||||
|
},
|
||||||
|
index=dates,
|
||||||
|
)
|
||||||
|
strategy = pd.Series([0.012, -0.018, 0.019], index=dates)
|
||||||
|
|
||||||
|
with self.assertRaisesRegex(ValueError, "Insufficient observations"):
|
||||||
|
run_factor_regression(strategy, factors, factor_cols=["MKT_RF", "SMB"])
|
||||||
|
|
||||||
|
def test_run_factor_regression_rejects_rank_deficient_designs(self):
|
||||||
|
dates = pd.date_range("2024-01-01", periods=6, freq="B")
|
||||||
|
market = np.array([0.01, -0.02, 0.015, 0.005, -0.01, 0.02])
|
||||||
|
factors = pd.DataFrame(
|
||||||
|
{
|
||||||
|
"MKT_RF": market,
|
||||||
|
"SMB": market * 2.0,
|
||||||
|
},
|
||||||
|
index=dates,
|
||||||
|
)
|
||||||
|
strategy = pd.Series(0.0005 + 1.0 * factors["MKT_RF"] + 0.5 * factors["SMB"], index=dates)
|
||||||
|
|
||||||
|
with self.assertRaisesRegex(ValueError, "rank-deficient"):
|
||||||
|
run_factor_regression(strategy, factors, factor_cols=["MKT_RF", "SMB"])
|
||||||
|
|
||||||
def test_prepare_factor_models_uses_proxy_family_without_external_us_factors(self):
|
def test_prepare_factor_models_uses_proxy_family_without_external_us_factors(self):
|
||||||
dates = pd.date_range("2024-01-01", periods=5, freq="B")
|
dates = pd.date_range("2024-01-01", periods=5, freq="B")
|
||||||
extension = pd.DataFrame(
|
extension = pd.DataFrame(
|
||||||
|
|||||||
Reference in New Issue
Block a user