Add local attribution factor builders
This commit is contained in:
@@ -10,6 +10,7 @@ from pathlib import Path
|
||||
from urllib.error import URLError
|
||||
from urllib.request import Request, urlopen
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
KEN_FRENCH_DAILY_FF5_ZIP_URL = (
|
||||
@@ -140,3 +141,84 @@ def load_external_us_factors(cache_dir: Path | str = "data/factors") -> pd.DataF
|
||||
|
||||
factors.to_csv(cache_path)
|
||||
return factors
|
||||
|
||||
|
||||
def _select_stock_prices(price_data: pd.DataFrame, benchmark: str) -> pd.DataFrame:
|
||||
stocks = price_data.drop(columns=[benchmark], errors="ignore")
|
||||
return stocks.sort_index().astype(float)
|
||||
|
||||
|
||||
def _long_short_factor(
|
||||
scores: pd.DataFrame,
|
||||
returns: pd.DataFrame,
|
||||
quantile: float = 0.3,
|
||||
) -> pd.Series:
|
||||
lagged_scores = scores.shift(1)
|
||||
high_cutoff = lagged_scores.quantile(1 - quantile, axis=1)
|
||||
low_cutoff = lagged_scores.quantile(quantile, axis=1)
|
||||
|
||||
long_mask = lagged_scores.ge(high_cutoff, axis=0)
|
||||
short_mask = lagged_scores.le(low_cutoff, axis=0)
|
||||
long_returns = returns.where(long_mask).mean(axis=1)
|
||||
short_returns = returns.where(short_mask).mean(axis=1)
|
||||
return (long_returns - short_returns).rename(None)
|
||||
|
||||
|
||||
def build_extension_factors(
|
||||
price_data: pd.DataFrame,
|
||||
benchmark: str,
|
||||
market: str,
|
||||
) -> pd.DataFrame:
|
||||
del market
|
||||
|
||||
stocks = _select_stock_prices(price_data, benchmark)
|
||||
returns = stocks.pct_change()
|
||||
|
||||
momentum_scores = stocks.shift(21).pct_change(231)
|
||||
low_vol_scores = -returns.rolling(60, min_periods=60).std()
|
||||
recovery_scores = stocks / stocks.rolling(63, min_periods=63).min() - 1.0
|
||||
|
||||
return pd.DataFrame(
|
||||
{
|
||||
"MOM": _long_short_factor(momentum_scores, returns),
|
||||
"LOWVOL": _long_short_factor(low_vol_scores, returns),
|
||||
"RECOVERY": _long_short_factor(recovery_scores, returns),
|
||||
},
|
||||
index=price_data.index,
|
||||
)
|
||||
|
||||
|
||||
def _positive_share(values: np.ndarray) -> float:
|
||||
return float(np.mean(values > 0))
|
||||
|
||||
|
||||
def build_proxy_core_factors(
|
||||
price_data: pd.DataFrame,
|
||||
benchmark: str,
|
||||
market: str,
|
||||
) -> pd.DataFrame:
|
||||
del market
|
||||
|
||||
stocks = _select_stock_prices(price_data, benchmark)
|
||||
returns = stocks.pct_change()
|
||||
|
||||
if benchmark in price_data:
|
||||
market_factor = price_data[benchmark].astype(float).pct_change()
|
||||
else:
|
||||
market_factor = returns.mean(axis=1)
|
||||
|
||||
inverse_price_scores = -stocks
|
||||
value_proxy_scores = -(stocks / stocks.rolling(252, min_periods=252).min() - 1.0)
|
||||
profitability_proxy_scores = returns.rolling(63, min_periods=63).apply(_positive_share, raw=True)
|
||||
investment_proxy_scores = -stocks.pct_change(126)
|
||||
|
||||
return pd.DataFrame(
|
||||
{
|
||||
"MKT": market_factor,
|
||||
"SMB_PROXY": _long_short_factor(inverse_price_scores, returns),
|
||||
"HML_PROXY": _long_short_factor(value_proxy_scores, returns),
|
||||
"RMW_PROXY": _long_short_factor(profitability_proxy_scores, returns),
|
||||
"CMA_PROXY": _long_short_factor(investment_proxy_scores, returns),
|
||||
},
|
||||
index=price_data.index,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user