Add 32 factor-combo strategies with configurable rebalancing frequency
New FactorComboStrategy class (strategies/factor_combo.py) implements
8 champion factor signals (4 US, 4 CN) discovered through iterative
factor research, each at 4 rebalancing frequencies (daily/weekly/
biweekly/monthly). Registered in trader.py as fc_{signal}_{freq}.
Existing strategies and state files are untouched — safe to git pull
and restart monitor on server.
Also includes factor research scripts (factor_loop.py, factor_research.py,
etc.) used to discover and validate these factors.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
654
factor_loop.py
Normal file
654
factor_loop.py
Normal file
@@ -0,0 +1,654 @@
|
||||
"""
|
||||
Iterative Factor Discovery Loop.
|
||||
|
||||
Round 1: Academic & practitioner hypotheses (30+ factors)
|
||||
Round 2: Data-driven variations on Round 1 winners
|
||||
Round 3: Interaction and conditional factors
|
||||
Round 4: Parameter optimization on finalists
|
||||
Round 5: Best combinations
|
||||
|
||||
Each factor is tested immediately as a top-10 equal-weight strategy
|
||||
with monthly rebalancing and 10bps transaction costs.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import warnings
|
||||
from typing import Callable
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
import data_manager
|
||||
from universe import UNIVERSES
|
||||
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
FactorFunc = Callable[[pd.DataFrame], pd.DataFrame]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Backtest infrastructure
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def strat(
|
||||
prices: pd.DataFrame,
|
||||
signal_func: FactorFunc,
|
||||
top_n: int = 10,
|
||||
rebal: int = 21,
|
||||
warmup: int = 252,
|
||||
) -> pd.DataFrame:
|
||||
sig = signal_func(prices)
|
||||
rank = sig.rank(axis=1, ascending=False, na_option="bottom")
|
||||
n_valid = sig.notna().sum(axis=1)
|
||||
enough = n_valid >= top_n
|
||||
mask = (rank <= top_n) & enough.values.reshape(-1, 1)
|
||||
raw = mask.astype(float)
|
||||
w = raw.div(raw.sum(axis=1).replace(0, np.nan), axis=0).fillna(0.0)
|
||||
rmask = pd.Series(False, index=prices.index)
|
||||
rmask.iloc[list(range(warmup, len(prices), rebal))] = True
|
||||
w[~rmask] = np.nan
|
||||
w = w.ffill().fillna(0.0)
|
||||
w.iloc[:warmup] = 0.0
|
||||
return w.shift(1).fillna(0.0)
|
||||
|
||||
|
||||
def bt(weights: pd.DataFrame, prices: pd.DataFrame, cost: float = 0.001) -> pd.Series:
|
||||
ret = prices.pct_change().fillna(0.0)
|
||||
pr = (weights * ret).sum(axis=1)
|
||||
pr -= weights.diff().abs().sum(axis=1) * cost
|
||||
return (1 + pr).cumprod() * 100000
|
||||
|
||||
|
||||
def stats(eq: pd.Series) -> dict:
|
||||
dr = eq.pct_change().dropna()
|
||||
if len(dr) < 200 or dr.std() == 0:
|
||||
return {"cagr": np.nan, "sharpe": np.nan, "sortino": np.nan,
|
||||
"maxdd": np.nan, "calmar": np.nan}
|
||||
ny = len(dr) / 252
|
||||
tot = eq.iloc[-1] / eq.iloc[0] - 1
|
||||
cagr = (1 + tot) ** (1 / ny) - 1
|
||||
sh = dr.mean() / dr.std() * np.sqrt(252)
|
||||
sd = dr[dr < 0].std()
|
||||
so = dr.mean() / sd * np.sqrt(252) if sd > 0 else 0
|
||||
rm = eq.cummax()
|
||||
dd = ((eq - rm) / rm).min()
|
||||
cal = cagr / abs(dd) if dd != 0 else 0
|
||||
return {"cagr": cagr, "sharpe": sh, "sortino": so, "maxdd": dd, "calmar": cal}
|
||||
|
||||
|
||||
def yearly(eq: pd.Series) -> dict[int, float]:
|
||||
dr = eq.pct_change().fillna(0)
|
||||
return {y: float((1 + dr[dr.index.year == y]).prod() - 1) for y in sorted(dr.index.year.unique())}
|
||||
|
||||
|
||||
def test_factor(name: str, func: FactorFunc, prices: pd.DataFrame,
|
||||
top_n: int = 10) -> dict:
|
||||
w = strat(prices, func, top_n=top_n)
|
||||
eq = bt(w, prices)
|
||||
s = stats(eq)
|
||||
s["name"] = name
|
||||
s["equity"] = eq
|
||||
return s
|
||||
|
||||
|
||||
def combo(fws: list[tuple[FactorFunc, float]]) -> FactorFunc:
|
||||
def _c(p):
|
||||
return sum(w * f(p).rank(axis=1, pct=True, na_option="keep") for f, w in fws)
|
||||
return _c
|
||||
|
||||
|
||||
def print_results(results: list[dict], title: str):
|
||||
df = pd.DataFrame([{k: v for k, v in r.items() if k != "equity"} for r in results])
|
||||
df = df.set_index("name").sort_values("cagr", ascending=False)
|
||||
print(f"\n{'='*95}")
|
||||
print(f" {title}")
|
||||
print(f"{'='*95}")
|
||||
print(f" {'Factor':<45} {'CAGR':>7} {'Sharpe':>7} {'Sortino':>8} {'MaxDD':>7} {'Calmar':>7}")
|
||||
print(f" {'-'*85}")
|
||||
for name, row in df.iterrows():
|
||||
flag = " <<<" if "BASELINE" in str(name) else ""
|
||||
c = row['cagr']
|
||||
if np.isnan(c):
|
||||
continue
|
||||
print(f" {str(name):<45} {c:>+6.1%} {row['sharpe']:>7.2f} {row['sortino']:>8.2f} "
|
||||
f"{row['maxdd']:>+6.1%} {row['calmar']:>7.2f}{flag}")
|
||||
return df
|
||||
|
||||
|
||||
# =====================================================================
|
||||
# ROUND 1 — Academic & Practitioner Hypotheses
|
||||
# =====================================================================
|
||||
|
||||
# --- Momentum family ---
|
||||
def f_mom_12_1(p): return p.shift(21).pct_change(231)
|
||||
def f_mom_6_1(p): return p.shift(21).pct_change(105)
|
||||
def f_mom_3_1(p): return p.shift(21).pct_change(42)
|
||||
def f_mom_1_0(p): return p.pct_change(21) # 1-month (reversal in US)
|
||||
|
||||
# --- Recovery family ---
|
||||
def f_rec_63(p): return p / p.rolling(63, min_periods=63).min() - 1
|
||||
def f_rec_126(p): return p / p.rolling(126, min_periods=126).min() - 1
|
||||
def f_rec_21(p): return p / p.rolling(21, min_periods=21).min() - 1
|
||||
|
||||
# Novy-Marx 2012: intermediate momentum (7-12 month)
|
||||
def f_mom_intermediate(p): return p.shift(21).pct_change(147) # ~7 month
|
||||
|
||||
# Asness et al: quality/profitability proxy via return consistency
|
||||
def f_consistent_returns(p):
|
||||
ret = p.pct_change()
|
||||
return (ret > 0).astype(float).rolling(252, min_periods=126).mean()
|
||||
|
||||
# Da, Liu, Schaumburg 2014: information discreteness
|
||||
# Stocks with many small positive days > stocks with few large positive days
|
||||
def f_info_discrete(p):
|
||||
ret = p.pct_change()
|
||||
n_pos = (ret > 0).astype(float).rolling(60, min_periods=40).sum()
|
||||
sum_pos = ret.where(ret > 0, 0).rolling(60, min_periods=40).sum()
|
||||
avg_pos = sum_pos / n_pos.replace(0, np.nan)
|
||||
# High count of positive days + low average positive = smooth accumulation
|
||||
return n_pos / avg_pos.replace(0, np.nan)
|
||||
|
||||
# Accumulation proxy (worked in Round 1)
|
||||
def f_up_volume_proxy(p):
|
||||
ret = p.pct_change()
|
||||
return ret.where(ret > 0, 0).rolling(20, min_periods=15).sum()
|
||||
|
||||
# George & Hwang 2004: 52-week high ratio
|
||||
def f_52w_high(p):
|
||||
return p / p.rolling(252, min_periods=126).max()
|
||||
|
||||
# Frequency of large up-moves (worked in Round 1)
|
||||
def f_gap_up_freq(p):
|
||||
ret = p.pct_change()
|
||||
return (ret > 0.01).astype(float).rolling(60, min_periods=40).mean()
|
||||
|
||||
# Bali, Cakici, Whitelaw 2011: MAX effect (lottery demand)
|
||||
def f_anti_max(p):
|
||||
ret = p.pct_change()
|
||||
return -ret.rolling(20, min_periods=15).max()
|
||||
|
||||
# Ang et al 2006: idiosyncratic volatility (negative)
|
||||
def f_neg_ivol(p):
|
||||
ret = p.pct_change()
|
||||
return -ret.rolling(20, min_periods=15).std()
|
||||
|
||||
# Blitz & van Vliet 2007: low volatility anomaly
|
||||
def f_low_vol_60(p):
|
||||
ret = p.pct_change()
|
||||
return -ret.rolling(60, min_periods=40).std()
|
||||
|
||||
# Hurst exponent proxy — autocorrelation of returns
|
||||
# Stocks with positive autocorrelation = trending
|
||||
def f_autocorrelation(p):
|
||||
ret = p.pct_change()
|
||||
def _ac(x):
|
||||
x = x.dropna()
|
||||
if len(x) < 20:
|
||||
return np.nan
|
||||
return np.corrcoef(x[:-1], x[1:])[0, 1]
|
||||
return ret.rolling(60, min_periods=40).apply(_ac, raw=False)
|
||||
|
||||
# Short-term reversal (Jegadeesh 1990)
|
||||
def f_str_5d(p): return -p.pct_change(5)
|
||||
def f_str_10d(p): return -p.pct_change(10)
|
||||
|
||||
# Earnings drift proxy (worked in Round 1)
|
||||
def f_earnings_drift(p):
|
||||
ret_5d = p.pct_change(5)
|
||||
vol = p.pct_change().rolling(60, min_periods=40).std() * np.sqrt(5)
|
||||
z = ret_5d / vol.replace(0, np.nan)
|
||||
return z.rolling(60, min_periods=20).mean()
|
||||
|
||||
# Risk-adjusted momentum (Sharpe-momentum)
|
||||
def f_sharpe_mom(p):
|
||||
ret = p.pct_change()
|
||||
mu = ret.rolling(252, min_periods=126).mean()
|
||||
sigma = ret.rolling(252, min_periods=126).std()
|
||||
return mu / sigma.replace(0, np.nan)
|
||||
|
||||
# Trend strength: slope of log-price regression
|
||||
def f_trend_slope(p):
|
||||
log_p = np.log(p.replace(0, np.nan))
|
||||
def _slope(x):
|
||||
x = x.dropna().values
|
||||
if len(x) < 30:
|
||||
return np.nan
|
||||
t = np.arange(len(x), dtype=float)
|
||||
t -= t.mean()
|
||||
return (t * (x - x.mean())).sum() / (t * t).sum()
|
||||
return log_p.rolling(60, min_periods=30).apply(_slope, raw=False)
|
||||
|
||||
# Acceleration: recent momentum vs. longer-term momentum
|
||||
def f_mom_accel(p):
|
||||
m3 = p.shift(5).pct_change(58) # ~3mo
|
||||
m12 = p.shift(21).pct_change(231) # ~12mo
|
||||
return m3 - m12
|
||||
|
||||
# Mean reversion z-score
|
||||
def f_mean_rev_z(p):
|
||||
ma20 = p.rolling(20, min_periods=20).mean()
|
||||
vol = p.pct_change().rolling(60, min_periods=40).std() * p
|
||||
return -(p - ma20) / vol.replace(0, np.nan)
|
||||
|
||||
# Price relative to moving averages
|
||||
def f_above_ma200(p):
|
||||
return p / p.rolling(200, min_periods=200).mean() - 1
|
||||
|
||||
def f_above_ma50(p):
|
||||
return p / p.rolling(50, min_periods=50).mean() - 1
|
||||
|
||||
# Dual MA signal: 50-day MA / 200-day MA
|
||||
def f_golden_cross(p):
|
||||
ma50 = p.rolling(50, min_periods=50).mean()
|
||||
ma200 = p.rolling(200, min_periods=200).mean()
|
||||
return ma50 / ma200 - 1
|
||||
|
||||
# Drawdown recovery rate
|
||||
def f_dd_recovery_rate(p):
|
||||
rm = p.rolling(252, min_periods=126).max()
|
||||
dd = p / rm - 1 # negative when in drawdown
|
||||
return dd - dd.shift(20) # positive = recovering from drawdown
|
||||
|
||||
# A-share specific: short-term reversal x volatility
|
||||
def f_reversal_vol(p):
|
||||
return -p.pct_change(5) * p.pct_change().rolling(20, min_periods=15).std()
|
||||
|
||||
# Recovery + momentum (baseline)
|
||||
def f_rec_mom(p):
|
||||
r1 = f_rec_63(p).rank(axis=1, pct=True, na_option="keep")
|
||||
r2 = f_mom_12_1(p).rank(axis=1, pct=True, na_option="keep")
|
||||
return 0.5 * r1 + 0.5 * r2
|
||||
|
||||
|
||||
# =====================================================================
|
||||
# ROUND 2 — Second-order ideas from Round 1 analysis
|
||||
# =====================================================================
|
||||
|
||||
# The key insight: "quality of returns" matters more than "magnitude of returns"
|
||||
# Factors that measure HOW a stock goes up, not just that it went up.
|
||||
|
||||
# Smoothness-weighted momentum
|
||||
def f_smooth_momentum(p):
|
||||
"""Momentum penalized by path volatility. Stocks that go up smoothly."""
|
||||
mom = p.shift(21).pct_change(231)
|
||||
ret = p.pct_change()
|
||||
vol = ret.rolling(252, min_periods=126).std()
|
||||
return mom / (vol.replace(0, np.nan) ** 0.5) # sqrt to dampen
|
||||
|
||||
# Positive return ratio (like Sharpe numerator)
|
||||
def f_pos_ratio_60(p):
|
||||
"""Fraction of positive return days in 60 days. Quality signal."""
|
||||
ret = p.pct_change()
|
||||
return (ret > 0).astype(float).rolling(60, min_periods=40).mean()
|
||||
|
||||
# Cumulative positive returns vs cumulative negative returns
|
||||
def f_up_down_asymmetry(p):
|
||||
"""Ratio of cumulative up-move to cumulative down-move."""
|
||||
ret = p.pct_change()
|
||||
up = ret.where(ret > 0, 0).rolling(60, min_periods=40).sum()
|
||||
down = (-ret.where(ret < 0, 0)).rolling(60, min_periods=40).sum()
|
||||
return up / down.replace(0, np.nan)
|
||||
|
||||
# Streak momentum: max consecutive up days in last 40 days
|
||||
def f_max_streak(p):
|
||||
ret = p.pct_change()
|
||||
pos = (ret > 0).astype(float)
|
||||
def _max_streak(x):
|
||||
x = x.dropna().values
|
||||
if len(x) == 0:
|
||||
return 0
|
||||
best = cur = 0
|
||||
for v in x:
|
||||
cur = cur + 1 if v > 0.5 else 0
|
||||
best = max(best, cur)
|
||||
return best
|
||||
return pos.rolling(40, min_periods=20).apply(_max_streak, raw=False)
|
||||
|
||||
# Overnight proxy: gap between yesterday's close and today's pattern
|
||||
# Since we only have close prices, use close-to-close 1d return decomposition
|
||||
def f_up_capture(p):
|
||||
"""Up-market capture ratio over 60 days."""
|
||||
ret = p.pct_change()
|
||||
mkt = ret.mean(axis=1)
|
||||
up_mkt = mkt > 0
|
||||
arr = ret.values.copy()
|
||||
arr[~up_mkt.values, :] = np.nan
|
||||
stock_up = pd.DataFrame(arr, index=ret.index, columns=ret.columns)
|
||||
mkt_up_vals = mkt.where(up_mkt, np.nan)
|
||||
stock_avg = stock_up.rolling(60, min_periods=20).mean()
|
||||
mkt_avg = mkt_up_vals.rolling(60, min_periods=20).mean()
|
||||
return stock_avg.div(mkt_avg, axis=0)
|
||||
|
||||
# Down-market resilience
|
||||
def f_down_resilience(p):
|
||||
"""How much LESS a stock falls on down-market days."""
|
||||
ret = p.pct_change()
|
||||
mkt = ret.mean(axis=1)
|
||||
down_mkt = mkt < 0
|
||||
arr = ret.values.copy()
|
||||
arr[~down_mkt.values, :] = np.nan
|
||||
down_ret = pd.DataFrame(arr, index=ret.index, columns=ret.columns)
|
||||
return -down_ret.rolling(120, min_periods=30).mean()
|
||||
|
||||
# Recovery from rolling max with momentum filter
|
||||
def f_rec_mom_filtered(p):
|
||||
"""Recovery factor only for stocks with positive 6-month momentum.
|
||||
Filters out dead-cat bounces."""
|
||||
rec = p / p.rolling(126, min_periods=126).min() - 1
|
||||
mom = p.shift(21).pct_change(105)
|
||||
return rec.where(mom > 0, np.nan)
|
||||
|
||||
# Information discreteness v2: using the sign ratio
|
||||
def f_sign_ratio(p):
|
||||
"""Ratio of (count of positive days)^2 * avg_size to total return.
|
||||
High ratio = many small ups = institutional flow."""
|
||||
ret = p.pct_change()
|
||||
n_total = 60
|
||||
n_pos = (ret > 0).astype(float).rolling(n_total, min_periods=40).sum()
|
||||
total_ret = ret.rolling(n_total, min_periods=40).sum()
|
||||
sign_vol = n_pos / n_total
|
||||
# Stocks where most of the return comes from many small positive days
|
||||
return sign_vol * total_ret.clip(lower=0)
|
||||
|
||||
|
||||
# =====================================================================
|
||||
# ROUND 3 — Interaction & conditional factors
|
||||
# =====================================================================
|
||||
|
||||
def f_mom_x_recovery(p):
|
||||
"""Momentum × Recovery interaction. The product, not the sum."""
|
||||
mom_r = f_mom_12_1(p).rank(axis=1, pct=True, na_option="keep")
|
||||
rec_r = f_rec_63(p).rank(axis=1, pct=True, na_option="keep")
|
||||
return mom_r * rec_r
|
||||
|
||||
def f_mom_x_upvol(p):
|
||||
"""Momentum × Up-volume-proxy interaction."""
|
||||
mom_r = f_mom_12_1(p).rank(axis=1, pct=True, na_option="keep")
|
||||
up_r = f_up_volume_proxy(p).rank(axis=1, pct=True, na_option="keep")
|
||||
return mom_r * up_r
|
||||
|
||||
def f_rec_deep_x_upvol(p):
|
||||
"""Deep recovery × Up-volume interaction."""
|
||||
rec_r = f_rec_126(p).rank(axis=1, pct=True, na_option="keep")
|
||||
up_r = f_up_volume_proxy(p).rank(axis=1, pct=True, na_option="keep")
|
||||
return rec_r * up_r
|
||||
|
||||
def f_trend_x_mom(p):
|
||||
"""Trend strength × Momentum. Trending + momentum = double signal."""
|
||||
tr_r = f_trend_slope(p).rank(axis=1, pct=True, na_option="keep")
|
||||
mom_r = f_mom_12_1(p).rank(axis=1, pct=True, na_option="keep")
|
||||
return tr_r * mom_r
|
||||
|
||||
def f_quality_mom(p):
|
||||
"""Momentum filtered by consistency. Only persistent winners."""
|
||||
mom = f_mom_12_1(p)
|
||||
consist = f_consistent_returns(p)
|
||||
mom_r = mom.rank(axis=1, pct=True, na_option="keep")
|
||||
con_r = consist.rank(axis=1, pct=True, na_option="keep")
|
||||
return 0.4 * mom_r + 0.3 * con_r + 0.3 * f_up_volume_proxy(p).rank(axis=1, pct=True, na_option="keep")
|
||||
|
||||
def f_rec_deep_x_gap(p):
|
||||
"""Deep recovery × gap-up frequency."""
|
||||
rec_r = f_rec_126(p).rank(axis=1, pct=True, na_option="keep")
|
||||
gap_r = f_gap_up_freq(p).rank(axis=1, pct=True, na_option="keep")
|
||||
return rec_r * gap_r
|
||||
|
||||
def f_mom_x_gap(p):
|
||||
"""Momentum × gap-up frequency."""
|
||||
mom_r = f_mom_12_1(p).rank(axis=1, pct=True, na_option="keep")
|
||||
gap_r = f_gap_up_freq(p).rank(axis=1, pct=True, na_option="keep")
|
||||
return mom_r * gap_r
|
||||
|
||||
# Regime-conditional: momentum with volatility filter
|
||||
def f_mom_low_vol_regime(p):
|
||||
"""Momentum only when market vol is below median.
|
||||
Momentum crashes in high-vol regimes."""
|
||||
mom = f_mom_12_1(p)
|
||||
mkt_vol = p.pct_change().mean(axis=1).rolling(60).std()
|
||||
vol_median = mkt_vol.rolling(252, min_periods=126).median()
|
||||
low_vol = mkt_vol <= vol_median
|
||||
mask = pd.DataFrame(
|
||||
np.tile(low_vol.values[:, None], (1, mom.shape[1])),
|
||||
index=mom.index, columns=mom.columns,
|
||||
)
|
||||
return mom.where(mask, 0)
|
||||
|
||||
|
||||
# =====================================================================
|
||||
# Main loop
|
||||
# =====================================================================
|
||||
|
||||
def run_round(
|
||||
name: str,
|
||||
factors: list[tuple[str, FactorFunc]],
|
||||
prices: pd.DataFrame,
|
||||
top_n: int = 10,
|
||||
) -> list[dict]:
|
||||
results = []
|
||||
for fname, func in factors:
|
||||
r = test_factor(fname, func, prices, top_n=top_n)
|
||||
results.append(r)
|
||||
print_results(results, name)
|
||||
return results
|
||||
|
||||
|
||||
def run_market(market: str):
|
||||
config = UNIVERSES[market]
|
||||
benchmark = config["benchmark"]
|
||||
prices = data_manager.load(market)
|
||||
bench = prices[benchmark].dropna() if benchmark in prices.columns else None
|
||||
stocks = prices.drop(columns=[benchmark], errors="ignore")
|
||||
print(f"\n{'#'*95}")
|
||||
print(f" FACTOR DISCOVERY LOOP — {market.upper()} MARKET")
|
||||
print(f" {stocks.shape[1]} stocks, {stocks.shape[0]} days, "
|
||||
f"{stocks.index[0].date()} → {stocks.index[-1].date()}")
|
||||
print(f"{'#'*95}")
|
||||
|
||||
# Benchmark
|
||||
if bench is not None:
|
||||
eq_bench = bench / bench.iloc[0] * 100000
|
||||
bs = stats(eq_bench)
|
||||
print(f"\n Benchmark: CAGR {bs['cagr']:+.1%}, Sharpe {bs['sharpe']:.2f}")
|
||||
|
||||
# ================================================================
|
||||
# ROUND 1: Academic & practitioner factors
|
||||
# ================================================================
|
||||
r1_factors = [
|
||||
("BASELINE:rec+mom", f_rec_mom),
|
||||
# Momentum family
|
||||
("mom_12_1", f_mom_12_1),
|
||||
("mom_6_1", f_mom_6_1),
|
||||
("mom_3_1", f_mom_3_1),
|
||||
("mom_1_0", f_mom_1_0),
|
||||
("mom_intermediate_7m", f_mom_intermediate),
|
||||
("sharpe_momentum", f_sharpe_mom),
|
||||
# Recovery family
|
||||
("recovery_63d", f_rec_63),
|
||||
("recovery_126d", f_rec_126),
|
||||
("recovery_21d", f_rec_21),
|
||||
# Trend
|
||||
("trend_slope_60d", f_trend_slope),
|
||||
("golden_cross", f_golden_cross),
|
||||
("above_ma200", f_above_ma200),
|
||||
# Volatility
|
||||
("low_vol_60d", f_low_vol_60),
|
||||
("neg_ivol_20d", f_neg_ivol),
|
||||
# Reversal
|
||||
("STR_5d", f_str_5d),
|
||||
("STR_10d", f_str_10d),
|
||||
# Quality / accumulation
|
||||
("consistent_returns", f_consistent_returns),
|
||||
("up_volume_proxy", f_up_volume_proxy),
|
||||
("gap_up_freq", f_gap_up_freq),
|
||||
("info_discrete", f_info_discrete),
|
||||
("earnings_drift", f_earnings_drift),
|
||||
# Other academic
|
||||
("52w_high", f_52w_high),
|
||||
("anti_max_20d", f_anti_max),
|
||||
("dd_recovery_rate", f_dd_recovery_rate),
|
||||
("mom_acceleration", f_mom_accel),
|
||||
]
|
||||
if market == "cn":
|
||||
r1_factors.append(("reversal_vol_cn", f_reversal_vol))
|
||||
r1 = run_round("ROUND 1 — Academic & Practitioner Hypotheses", r1_factors, stocks)
|
||||
|
||||
# Identify top-10 from round 1
|
||||
r1_sorted = sorted(r1, key=lambda x: x.get("cagr", 0) or 0, reverse=True)
|
||||
r1_top_names = [r["name"] for r in r1_sorted[:10] if r.get("cagr") and r["cagr"] > 0]
|
||||
baseline_cagr = next((r["cagr"] for r in r1 if "BASELINE" in r["name"]), 0)
|
||||
print(f"\n Baseline CAGR: {baseline_cagr:+.1%}")
|
||||
print(f" Top 10: {r1_top_names}")
|
||||
|
||||
# ================================================================
|
||||
# ROUND 2: Second-order ideas based on what worked
|
||||
# ================================================================
|
||||
r2_factors = [
|
||||
("BASELINE:rec+mom", f_rec_mom),
|
||||
("smooth_momentum", f_smooth_momentum),
|
||||
("pos_ratio_60d", f_pos_ratio_60),
|
||||
("up_down_asymmetry", f_up_down_asymmetry),
|
||||
("max_streak_40d", f_max_streak),
|
||||
("up_capture_60d", f_up_capture),
|
||||
("down_resilience_120d", f_down_resilience),
|
||||
("rec_mom_filtered", f_rec_mom_filtered),
|
||||
("sign_ratio", f_sign_ratio),
|
||||
("autocorrelation_60d", f_autocorrelation),
|
||||
("mean_rev_z", f_mean_rev_z),
|
||||
]
|
||||
r2 = run_round("ROUND 2 — Return Quality & Behavioral Factors", r2_factors, stocks)
|
||||
|
||||
# ================================================================
|
||||
# ROUND 3: Interaction & conditional factors
|
||||
# ================================================================
|
||||
r3_factors = [
|
||||
("BASELINE:rec+mom", f_rec_mom),
|
||||
("mom×recovery", f_mom_x_recovery),
|
||||
("mom×upvol", f_mom_x_upvol),
|
||||
("rec_deep×upvol", f_rec_deep_x_upvol),
|
||||
("trend×mom", f_trend_x_mom),
|
||||
("quality_mom", f_quality_mom),
|
||||
("rec_deep×gap", f_rec_deep_x_gap),
|
||||
("mom×gap", f_mom_x_gap),
|
||||
("mom_low_vol_regime", f_mom_low_vol_regime),
|
||||
]
|
||||
r3 = run_round("ROUND 3 — Interaction & Conditional Factors", r3_factors, stocks)
|
||||
|
||||
# ================================================================
|
||||
# Collect ALL results from all rounds
|
||||
# ================================================================
|
||||
all_results = r1 + r2 + r3
|
||||
# Deduplicate baseline
|
||||
seen = set()
|
||||
unique = []
|
||||
for r in all_results:
|
||||
if r["name"] not in seen:
|
||||
seen.add(r["name"])
|
||||
unique.append(r)
|
||||
unique_sorted = sorted(unique, key=lambda x: x.get("cagr", 0) or 0, reverse=True)
|
||||
|
||||
print(f"\n{'='*95}")
|
||||
print(f" ALL ROUNDS COMBINED — TOP 15 FACTORS — {market.upper()}")
|
||||
print(f"{'='*95}")
|
||||
print(f" {'Factor':<45} {'CAGR':>7} {'Sharpe':>7} {'Sortino':>8} {'MaxDD':>7} {'Calmar':>7}")
|
||||
print(f" {'-'*85}")
|
||||
for r in unique_sorted[:15]:
|
||||
flag = " <<<" if "BASELINE" in r["name"] else ""
|
||||
print(f" {r['name']:<45} {r['cagr']:>+6.1%} {r['sharpe']:>7.2f} {r['sortino']:>8.2f} "
|
||||
f"{r['maxdd']:>+6.1%} {r['calmar']:>7.2f}{flag}")
|
||||
|
||||
# ================================================================
|
||||
# ROUND 4: Combine top non-baseline factors
|
||||
# ================================================================
|
||||
top_funcs = {}
|
||||
func_map_all = dict(r1_factors + r2_factors + r3_factors)
|
||||
non_baseline = [r for r in unique_sorted if "BASELINE" not in r["name"] and r.get("cagr", 0)]
|
||||
for r in non_baseline[:12]:
|
||||
if r["name"] in func_map_all:
|
||||
top_funcs[r["name"]] = func_map_all[r["name"]]
|
||||
|
||||
top_names = list(top_funcs.keys())
|
||||
print(f"\n Building combinations from top {len(top_names)} factors: {top_names}")
|
||||
|
||||
combo_factors = [("BASELINE:rec+mom", f_rec_mom)]
|
||||
|
||||
# All pairs from top-8
|
||||
for i in range(min(8, len(top_names))):
|
||||
for j in range(i + 1, min(8, len(top_names))):
|
||||
n1, n2 = top_names[i], top_names[j]
|
||||
combo_factors.append((
|
||||
f"{n1[:20]}+{n2[:20]}",
|
||||
combo([(top_funcs[n1], 0.5), (top_funcs[n2], 0.5)])
|
||||
))
|
||||
|
||||
# Triple combos from top-5
|
||||
for i in range(min(5, len(top_names))):
|
||||
for j in range(i + 1, min(5, len(top_names))):
|
||||
for k in range(j + 1, min(5, len(top_names))):
|
||||
n1, n2, n3 = top_names[i], top_names[j], top_names[k]
|
||||
combo_factors.append((
|
||||
f"{n1[:15]}+{n2[:15]}+{n3[:15]}",
|
||||
combo([(top_funcs[n1], 0.33), (top_funcs[n2], 0.33), (top_funcs[n3], 0.34)])
|
||||
))
|
||||
|
||||
r4 = run_round("ROUND 4 — Factor Combinations", combo_factors, stocks)
|
||||
|
||||
# ================================================================
|
||||
# ROUND 5: Yearly breakdown of top 5 combos
|
||||
# ================================================================
|
||||
r4_sorted = sorted(r4, key=lambda x: x.get("cagr", 0) or 0, reverse=True)
|
||||
top5 = r4_sorted[:5]
|
||||
# Make sure baseline is included
|
||||
base = next((r for r in r4 if "BASELINE" in r["name"]), None)
|
||||
if base and base not in top5:
|
||||
top5.append(base)
|
||||
|
||||
print(f"\n{'='*95}")
|
||||
print(f" ROUND 5 — YEARLY RETURNS OF BEST STRATEGIES — {market.upper()}")
|
||||
print(f"{'='*95}")
|
||||
|
||||
cols = [(r["name"], r["equity"]) for r in top5]
|
||||
if bench is not None:
|
||||
eq_bench = bench / bench.iloc[0] * 100000
|
||||
cols.append(("BENCHMARK", eq_bench))
|
||||
|
||||
# Header
|
||||
header = f" {'Year':<6}"
|
||||
for name, _ in cols:
|
||||
header += f" | {name[:22]:>22}"
|
||||
print(header)
|
||||
print(" " + "-" * (6 + 25 * len(cols)))
|
||||
|
||||
all_years = sorted(set(y for _, eq in cols for y in eq.index.year.unique()))
|
||||
for year in all_years:
|
||||
line = f" {year:<6}"
|
||||
for _, eq in cols:
|
||||
dr = eq.pct_change().fillna(0)
|
||||
yr = dr[dr.index.year == year]
|
||||
r = float((1 + yr).prod() - 1) if len(yr) > 0 else 0
|
||||
line += f" | {r:>+21.1%}"
|
||||
print(line)
|
||||
|
||||
# Period CAGRs
|
||||
for ny in [3, 5, 10]:
|
||||
cutoff = stocks.index[-1] - pd.DateOffset(years=ny)
|
||||
print(f"\n --- {ny}-year CAGR ---")
|
||||
for name, eq in cols:
|
||||
sl = eq[eq.index >= cutoff]
|
||||
if len(sl) < 50:
|
||||
continue
|
||||
tot = sl.iloc[-1] / sl.iloc[0] - 1
|
||||
cagr = (1 + tot) ** (1 / ny) - 1
|
||||
print(f" {name[:50]:<50} {cagr:>+8.1%}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--market", default="us", choices=["us", "cn"])
|
||||
args = parser.parse_args()
|
||||
run_market(args.market)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user