Guard factor regressions against unidentified models
This commit is contained in:
@@ -293,14 +293,23 @@ def run_factor_regression(
|
||||
|
||||
x = regression_frame[factor_cols].astype(float).to_numpy()
|
||||
x = np.column_stack([np.ones(len(regression_frame)), x])
|
||||
n_obs = len(regression_frame)
|
||||
param_count = x.shape[1]
|
||||
if n_obs <= param_count:
|
||||
raise ValueError(
|
||||
f"Insufficient observations for regression: need more than {param_count} rows, got {n_obs}"
|
||||
)
|
||||
|
||||
coefficients, _, rank, _ = np.linalg.lstsq(x, y.to_numpy(), rcond=None)
|
||||
if rank < param_count:
|
||||
raise ValueError(
|
||||
"Regression design matrix is rank-deficient; coefficients are not uniquely identified"
|
||||
)
|
||||
|
||||
coefficients, _, _, _ = np.linalg.lstsq(x, y.to_numpy(), rcond=None)
|
||||
fitted = x @ coefficients
|
||||
residuals = y.to_numpy() - fitted
|
||||
|
||||
n_obs = len(regression_frame)
|
||||
param_count = x.shape[1]
|
||||
dof = max(n_obs - param_count, 1)
|
||||
dof = n_obs - param_count
|
||||
residual_variance = float((residuals @ residuals) / dof)
|
||||
covariance = residual_variance * np.linalg.pinv(x.T @ x)
|
||||
standard_errors = np.sqrt(np.diag(covariance))
|
||||
|
||||
Reference in New Issue
Block a user