Files
quant/factor_loop.py
Gahow Wang ae25f2f6b5 Add 32 factor-combo strategies with configurable rebalancing frequency
New FactorComboStrategy class (strategies/factor_combo.py) implements
8 champion factor signals (4 US, 4 CN) discovered through iterative
factor research, each at 4 rebalancing frequencies (daily/weekly/
biweekly/monthly). Registered in trader.py as fc_{signal}_{freq}.

Existing strategies and state files are untouched — safe to git pull
and restart monitor on server.

Also includes factor research scripts (factor_loop.py, factor_research.py,
etc.) used to discover and validate these factors.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-08 10:41:34 +08:00

655 lines
24 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Iterative Factor Discovery Loop.
Round 1: Academic & practitioner hypotheses (30+ factors)
Round 2: Data-driven variations on Round 1 winners
Round 3: Interaction and conditional factors
Round 4: Parameter optimization on finalists
Round 5: Best combinations
Each factor is tested immediately as a top-10 equal-weight strategy
with monthly rebalancing and 10bps transaction costs.
"""
from __future__ import annotations
import argparse
import warnings
from typing import Callable
import numpy as np
import pandas as pd
import data_manager
from universe import UNIVERSES
warnings.filterwarnings("ignore")
FactorFunc = Callable[[pd.DataFrame], pd.DataFrame]
# ---------------------------------------------------------------------------
# Backtest infrastructure
# ---------------------------------------------------------------------------
def strat(
prices: pd.DataFrame,
signal_func: FactorFunc,
top_n: int = 10,
rebal: int = 21,
warmup: int = 252,
) -> pd.DataFrame:
sig = signal_func(prices)
rank = sig.rank(axis=1, ascending=False, na_option="bottom")
n_valid = sig.notna().sum(axis=1)
enough = n_valid >= top_n
mask = (rank <= top_n) & enough.values.reshape(-1, 1)
raw = mask.astype(float)
w = raw.div(raw.sum(axis=1).replace(0, np.nan), axis=0).fillna(0.0)
rmask = pd.Series(False, index=prices.index)
rmask.iloc[list(range(warmup, len(prices), rebal))] = True
w[~rmask] = np.nan
w = w.ffill().fillna(0.0)
w.iloc[:warmup] = 0.0
return w.shift(1).fillna(0.0)
def bt(weights: pd.DataFrame, prices: pd.DataFrame, cost: float = 0.001) -> pd.Series:
ret = prices.pct_change().fillna(0.0)
pr = (weights * ret).sum(axis=1)
pr -= weights.diff().abs().sum(axis=1) * cost
return (1 + pr).cumprod() * 100000
def stats(eq: pd.Series) -> dict:
dr = eq.pct_change().dropna()
if len(dr) < 200 or dr.std() == 0:
return {"cagr": np.nan, "sharpe": np.nan, "sortino": np.nan,
"maxdd": np.nan, "calmar": np.nan}
ny = len(dr) / 252
tot = eq.iloc[-1] / eq.iloc[0] - 1
cagr = (1 + tot) ** (1 / ny) - 1
sh = dr.mean() / dr.std() * np.sqrt(252)
sd = dr[dr < 0].std()
so = dr.mean() / sd * np.sqrt(252) if sd > 0 else 0
rm = eq.cummax()
dd = ((eq - rm) / rm).min()
cal = cagr / abs(dd) if dd != 0 else 0
return {"cagr": cagr, "sharpe": sh, "sortino": so, "maxdd": dd, "calmar": cal}
def yearly(eq: pd.Series) -> dict[int, float]:
dr = eq.pct_change().fillna(0)
return {y: float((1 + dr[dr.index.year == y]).prod() - 1) for y in sorted(dr.index.year.unique())}
def test_factor(name: str, func: FactorFunc, prices: pd.DataFrame,
top_n: int = 10) -> dict:
w = strat(prices, func, top_n=top_n)
eq = bt(w, prices)
s = stats(eq)
s["name"] = name
s["equity"] = eq
return s
def combo(fws: list[tuple[FactorFunc, float]]) -> FactorFunc:
def _c(p):
return sum(w * f(p).rank(axis=1, pct=True, na_option="keep") for f, w in fws)
return _c
def print_results(results: list[dict], title: str):
df = pd.DataFrame([{k: v for k, v in r.items() if k != "equity"} for r in results])
df = df.set_index("name").sort_values("cagr", ascending=False)
print(f"\n{'='*95}")
print(f" {title}")
print(f"{'='*95}")
print(f" {'Factor':<45} {'CAGR':>7} {'Sharpe':>7} {'Sortino':>8} {'MaxDD':>7} {'Calmar':>7}")
print(f" {'-'*85}")
for name, row in df.iterrows():
flag = " <<<" if "BASELINE" in str(name) else ""
c = row['cagr']
if np.isnan(c):
continue
print(f" {str(name):<45} {c:>+6.1%} {row['sharpe']:>7.2f} {row['sortino']:>8.2f} "
f"{row['maxdd']:>+6.1%} {row['calmar']:>7.2f}{flag}")
return df
# =====================================================================
# ROUND 1 — Academic & Practitioner Hypotheses
# =====================================================================
# --- Momentum family ---
def f_mom_12_1(p): return p.shift(21).pct_change(231)
def f_mom_6_1(p): return p.shift(21).pct_change(105)
def f_mom_3_1(p): return p.shift(21).pct_change(42)
def f_mom_1_0(p): return p.pct_change(21) # 1-month (reversal in US)
# --- Recovery family ---
def f_rec_63(p): return p / p.rolling(63, min_periods=63).min() - 1
def f_rec_126(p): return p / p.rolling(126, min_periods=126).min() - 1
def f_rec_21(p): return p / p.rolling(21, min_periods=21).min() - 1
# Novy-Marx 2012: intermediate momentum (7-12 month)
def f_mom_intermediate(p): return p.shift(21).pct_change(147) # ~7 month
# Asness et al: quality/profitability proxy via return consistency
def f_consistent_returns(p):
ret = p.pct_change()
return (ret > 0).astype(float).rolling(252, min_periods=126).mean()
# Da, Liu, Schaumburg 2014: information discreteness
# Stocks with many small positive days > stocks with few large positive days
def f_info_discrete(p):
ret = p.pct_change()
n_pos = (ret > 0).astype(float).rolling(60, min_periods=40).sum()
sum_pos = ret.where(ret > 0, 0).rolling(60, min_periods=40).sum()
avg_pos = sum_pos / n_pos.replace(0, np.nan)
# High count of positive days + low average positive = smooth accumulation
return n_pos / avg_pos.replace(0, np.nan)
# Accumulation proxy (worked in Round 1)
def f_up_volume_proxy(p):
ret = p.pct_change()
return ret.where(ret > 0, 0).rolling(20, min_periods=15).sum()
# George & Hwang 2004: 52-week high ratio
def f_52w_high(p):
return p / p.rolling(252, min_periods=126).max()
# Frequency of large up-moves (worked in Round 1)
def f_gap_up_freq(p):
ret = p.pct_change()
return (ret > 0.01).astype(float).rolling(60, min_periods=40).mean()
# Bali, Cakici, Whitelaw 2011: MAX effect (lottery demand)
def f_anti_max(p):
ret = p.pct_change()
return -ret.rolling(20, min_periods=15).max()
# Ang et al 2006: idiosyncratic volatility (negative)
def f_neg_ivol(p):
ret = p.pct_change()
return -ret.rolling(20, min_periods=15).std()
# Blitz & van Vliet 2007: low volatility anomaly
def f_low_vol_60(p):
ret = p.pct_change()
return -ret.rolling(60, min_periods=40).std()
# Hurst exponent proxy — autocorrelation of returns
# Stocks with positive autocorrelation = trending
def f_autocorrelation(p):
ret = p.pct_change()
def _ac(x):
x = x.dropna()
if len(x) < 20:
return np.nan
return np.corrcoef(x[:-1], x[1:])[0, 1]
return ret.rolling(60, min_periods=40).apply(_ac, raw=False)
# Short-term reversal (Jegadeesh 1990)
def f_str_5d(p): return -p.pct_change(5)
def f_str_10d(p): return -p.pct_change(10)
# Earnings drift proxy (worked in Round 1)
def f_earnings_drift(p):
ret_5d = p.pct_change(5)
vol = p.pct_change().rolling(60, min_periods=40).std() * np.sqrt(5)
z = ret_5d / vol.replace(0, np.nan)
return z.rolling(60, min_periods=20).mean()
# Risk-adjusted momentum (Sharpe-momentum)
def f_sharpe_mom(p):
ret = p.pct_change()
mu = ret.rolling(252, min_periods=126).mean()
sigma = ret.rolling(252, min_periods=126).std()
return mu / sigma.replace(0, np.nan)
# Trend strength: slope of log-price regression
def f_trend_slope(p):
log_p = np.log(p.replace(0, np.nan))
def _slope(x):
x = x.dropna().values
if len(x) < 30:
return np.nan
t = np.arange(len(x), dtype=float)
t -= t.mean()
return (t * (x - x.mean())).sum() / (t * t).sum()
return log_p.rolling(60, min_periods=30).apply(_slope, raw=False)
# Acceleration: recent momentum vs. longer-term momentum
def f_mom_accel(p):
m3 = p.shift(5).pct_change(58) # ~3mo
m12 = p.shift(21).pct_change(231) # ~12mo
return m3 - m12
# Mean reversion z-score
def f_mean_rev_z(p):
ma20 = p.rolling(20, min_periods=20).mean()
vol = p.pct_change().rolling(60, min_periods=40).std() * p
return -(p - ma20) / vol.replace(0, np.nan)
# Price relative to moving averages
def f_above_ma200(p):
return p / p.rolling(200, min_periods=200).mean() - 1
def f_above_ma50(p):
return p / p.rolling(50, min_periods=50).mean() - 1
# Dual MA signal: 50-day MA / 200-day MA
def f_golden_cross(p):
ma50 = p.rolling(50, min_periods=50).mean()
ma200 = p.rolling(200, min_periods=200).mean()
return ma50 / ma200 - 1
# Drawdown recovery rate
def f_dd_recovery_rate(p):
rm = p.rolling(252, min_periods=126).max()
dd = p / rm - 1 # negative when in drawdown
return dd - dd.shift(20) # positive = recovering from drawdown
# A-share specific: short-term reversal x volatility
def f_reversal_vol(p):
return -p.pct_change(5) * p.pct_change().rolling(20, min_periods=15).std()
# Recovery + momentum (baseline)
def f_rec_mom(p):
r1 = f_rec_63(p).rank(axis=1, pct=True, na_option="keep")
r2 = f_mom_12_1(p).rank(axis=1, pct=True, na_option="keep")
return 0.5 * r1 + 0.5 * r2
# =====================================================================
# ROUND 2 — Second-order ideas from Round 1 analysis
# =====================================================================
# The key insight: "quality of returns" matters more than "magnitude of returns"
# Factors that measure HOW a stock goes up, not just that it went up.
# Smoothness-weighted momentum
def f_smooth_momentum(p):
"""Momentum penalized by path volatility. Stocks that go up smoothly."""
mom = p.shift(21).pct_change(231)
ret = p.pct_change()
vol = ret.rolling(252, min_periods=126).std()
return mom / (vol.replace(0, np.nan) ** 0.5) # sqrt to dampen
# Positive return ratio (like Sharpe numerator)
def f_pos_ratio_60(p):
"""Fraction of positive return days in 60 days. Quality signal."""
ret = p.pct_change()
return (ret > 0).astype(float).rolling(60, min_periods=40).mean()
# Cumulative positive returns vs cumulative negative returns
def f_up_down_asymmetry(p):
"""Ratio of cumulative up-move to cumulative down-move."""
ret = p.pct_change()
up = ret.where(ret > 0, 0).rolling(60, min_periods=40).sum()
down = (-ret.where(ret < 0, 0)).rolling(60, min_periods=40).sum()
return up / down.replace(0, np.nan)
# Streak momentum: max consecutive up days in last 40 days
def f_max_streak(p):
ret = p.pct_change()
pos = (ret > 0).astype(float)
def _max_streak(x):
x = x.dropna().values
if len(x) == 0:
return 0
best = cur = 0
for v in x:
cur = cur + 1 if v > 0.5 else 0
best = max(best, cur)
return best
return pos.rolling(40, min_periods=20).apply(_max_streak, raw=False)
# Overnight proxy: gap between yesterday's close and today's pattern
# Since we only have close prices, use close-to-close 1d return decomposition
def f_up_capture(p):
"""Up-market capture ratio over 60 days."""
ret = p.pct_change()
mkt = ret.mean(axis=1)
up_mkt = mkt > 0
arr = ret.values.copy()
arr[~up_mkt.values, :] = np.nan
stock_up = pd.DataFrame(arr, index=ret.index, columns=ret.columns)
mkt_up_vals = mkt.where(up_mkt, np.nan)
stock_avg = stock_up.rolling(60, min_periods=20).mean()
mkt_avg = mkt_up_vals.rolling(60, min_periods=20).mean()
return stock_avg.div(mkt_avg, axis=0)
# Down-market resilience
def f_down_resilience(p):
"""How much LESS a stock falls on down-market days."""
ret = p.pct_change()
mkt = ret.mean(axis=1)
down_mkt = mkt < 0
arr = ret.values.copy()
arr[~down_mkt.values, :] = np.nan
down_ret = pd.DataFrame(arr, index=ret.index, columns=ret.columns)
return -down_ret.rolling(120, min_periods=30).mean()
# Recovery from rolling max with momentum filter
def f_rec_mom_filtered(p):
"""Recovery factor only for stocks with positive 6-month momentum.
Filters out dead-cat bounces."""
rec = p / p.rolling(126, min_periods=126).min() - 1
mom = p.shift(21).pct_change(105)
return rec.where(mom > 0, np.nan)
# Information discreteness v2: using the sign ratio
def f_sign_ratio(p):
"""Ratio of (count of positive days)^2 * avg_size to total return.
High ratio = many small ups = institutional flow."""
ret = p.pct_change()
n_total = 60
n_pos = (ret > 0).astype(float).rolling(n_total, min_periods=40).sum()
total_ret = ret.rolling(n_total, min_periods=40).sum()
sign_vol = n_pos / n_total
# Stocks where most of the return comes from many small positive days
return sign_vol * total_ret.clip(lower=0)
# =====================================================================
# ROUND 3 — Interaction & conditional factors
# =====================================================================
def f_mom_x_recovery(p):
"""Momentum × Recovery interaction. The product, not the sum."""
mom_r = f_mom_12_1(p).rank(axis=1, pct=True, na_option="keep")
rec_r = f_rec_63(p).rank(axis=1, pct=True, na_option="keep")
return mom_r * rec_r
def f_mom_x_upvol(p):
"""Momentum × Up-volume-proxy interaction."""
mom_r = f_mom_12_1(p).rank(axis=1, pct=True, na_option="keep")
up_r = f_up_volume_proxy(p).rank(axis=1, pct=True, na_option="keep")
return mom_r * up_r
def f_rec_deep_x_upvol(p):
"""Deep recovery × Up-volume interaction."""
rec_r = f_rec_126(p).rank(axis=1, pct=True, na_option="keep")
up_r = f_up_volume_proxy(p).rank(axis=1, pct=True, na_option="keep")
return rec_r * up_r
def f_trend_x_mom(p):
"""Trend strength × Momentum. Trending + momentum = double signal."""
tr_r = f_trend_slope(p).rank(axis=1, pct=True, na_option="keep")
mom_r = f_mom_12_1(p).rank(axis=1, pct=True, na_option="keep")
return tr_r * mom_r
def f_quality_mom(p):
"""Momentum filtered by consistency. Only persistent winners."""
mom = f_mom_12_1(p)
consist = f_consistent_returns(p)
mom_r = mom.rank(axis=1, pct=True, na_option="keep")
con_r = consist.rank(axis=1, pct=True, na_option="keep")
return 0.4 * mom_r + 0.3 * con_r + 0.3 * f_up_volume_proxy(p).rank(axis=1, pct=True, na_option="keep")
def f_rec_deep_x_gap(p):
"""Deep recovery × gap-up frequency."""
rec_r = f_rec_126(p).rank(axis=1, pct=True, na_option="keep")
gap_r = f_gap_up_freq(p).rank(axis=1, pct=True, na_option="keep")
return rec_r * gap_r
def f_mom_x_gap(p):
"""Momentum × gap-up frequency."""
mom_r = f_mom_12_1(p).rank(axis=1, pct=True, na_option="keep")
gap_r = f_gap_up_freq(p).rank(axis=1, pct=True, na_option="keep")
return mom_r * gap_r
# Regime-conditional: momentum with volatility filter
def f_mom_low_vol_regime(p):
"""Momentum only when market vol is below median.
Momentum crashes in high-vol regimes."""
mom = f_mom_12_1(p)
mkt_vol = p.pct_change().mean(axis=1).rolling(60).std()
vol_median = mkt_vol.rolling(252, min_periods=126).median()
low_vol = mkt_vol <= vol_median
mask = pd.DataFrame(
np.tile(low_vol.values[:, None], (1, mom.shape[1])),
index=mom.index, columns=mom.columns,
)
return mom.where(mask, 0)
# =====================================================================
# Main loop
# =====================================================================
def run_round(
name: str,
factors: list[tuple[str, FactorFunc]],
prices: pd.DataFrame,
top_n: int = 10,
) -> list[dict]:
results = []
for fname, func in factors:
r = test_factor(fname, func, prices, top_n=top_n)
results.append(r)
print_results(results, name)
return results
def run_market(market: str):
config = UNIVERSES[market]
benchmark = config["benchmark"]
prices = data_manager.load(market)
bench = prices[benchmark].dropna() if benchmark in prices.columns else None
stocks = prices.drop(columns=[benchmark], errors="ignore")
print(f"\n{'#'*95}")
print(f" FACTOR DISCOVERY LOOP — {market.upper()} MARKET")
print(f" {stocks.shape[1]} stocks, {stocks.shape[0]} days, "
f"{stocks.index[0].date()}{stocks.index[-1].date()}")
print(f"{'#'*95}")
# Benchmark
if bench is not None:
eq_bench = bench / bench.iloc[0] * 100000
bs = stats(eq_bench)
print(f"\n Benchmark: CAGR {bs['cagr']:+.1%}, Sharpe {bs['sharpe']:.2f}")
# ================================================================
# ROUND 1: Academic & practitioner factors
# ================================================================
r1_factors = [
("BASELINE:rec+mom", f_rec_mom),
# Momentum family
("mom_12_1", f_mom_12_1),
("mom_6_1", f_mom_6_1),
("mom_3_1", f_mom_3_1),
("mom_1_0", f_mom_1_0),
("mom_intermediate_7m", f_mom_intermediate),
("sharpe_momentum", f_sharpe_mom),
# Recovery family
("recovery_63d", f_rec_63),
("recovery_126d", f_rec_126),
("recovery_21d", f_rec_21),
# Trend
("trend_slope_60d", f_trend_slope),
("golden_cross", f_golden_cross),
("above_ma200", f_above_ma200),
# Volatility
("low_vol_60d", f_low_vol_60),
("neg_ivol_20d", f_neg_ivol),
# Reversal
("STR_5d", f_str_5d),
("STR_10d", f_str_10d),
# Quality / accumulation
("consistent_returns", f_consistent_returns),
("up_volume_proxy", f_up_volume_proxy),
("gap_up_freq", f_gap_up_freq),
("info_discrete", f_info_discrete),
("earnings_drift", f_earnings_drift),
# Other academic
("52w_high", f_52w_high),
("anti_max_20d", f_anti_max),
("dd_recovery_rate", f_dd_recovery_rate),
("mom_acceleration", f_mom_accel),
]
if market == "cn":
r1_factors.append(("reversal_vol_cn", f_reversal_vol))
r1 = run_round("ROUND 1 — Academic & Practitioner Hypotheses", r1_factors, stocks)
# Identify top-10 from round 1
r1_sorted = sorted(r1, key=lambda x: x.get("cagr", 0) or 0, reverse=True)
r1_top_names = [r["name"] for r in r1_sorted[:10] if r.get("cagr") and r["cagr"] > 0]
baseline_cagr = next((r["cagr"] for r in r1 if "BASELINE" in r["name"]), 0)
print(f"\n Baseline CAGR: {baseline_cagr:+.1%}")
print(f" Top 10: {r1_top_names}")
# ================================================================
# ROUND 2: Second-order ideas based on what worked
# ================================================================
r2_factors = [
("BASELINE:rec+mom", f_rec_mom),
("smooth_momentum", f_smooth_momentum),
("pos_ratio_60d", f_pos_ratio_60),
("up_down_asymmetry", f_up_down_asymmetry),
("max_streak_40d", f_max_streak),
("up_capture_60d", f_up_capture),
("down_resilience_120d", f_down_resilience),
("rec_mom_filtered", f_rec_mom_filtered),
("sign_ratio", f_sign_ratio),
("autocorrelation_60d", f_autocorrelation),
("mean_rev_z", f_mean_rev_z),
]
r2 = run_round("ROUND 2 — Return Quality & Behavioral Factors", r2_factors, stocks)
# ================================================================
# ROUND 3: Interaction & conditional factors
# ================================================================
r3_factors = [
("BASELINE:rec+mom", f_rec_mom),
("mom×recovery", f_mom_x_recovery),
("mom×upvol", f_mom_x_upvol),
("rec_deep×upvol", f_rec_deep_x_upvol),
("trend×mom", f_trend_x_mom),
("quality_mom", f_quality_mom),
("rec_deep×gap", f_rec_deep_x_gap),
("mom×gap", f_mom_x_gap),
("mom_low_vol_regime", f_mom_low_vol_regime),
]
r3 = run_round("ROUND 3 — Interaction & Conditional Factors", r3_factors, stocks)
# ================================================================
# Collect ALL results from all rounds
# ================================================================
all_results = r1 + r2 + r3
# Deduplicate baseline
seen = set()
unique = []
for r in all_results:
if r["name"] not in seen:
seen.add(r["name"])
unique.append(r)
unique_sorted = sorted(unique, key=lambda x: x.get("cagr", 0) or 0, reverse=True)
print(f"\n{'='*95}")
print(f" ALL ROUNDS COMBINED — TOP 15 FACTORS — {market.upper()}")
print(f"{'='*95}")
print(f" {'Factor':<45} {'CAGR':>7} {'Sharpe':>7} {'Sortino':>8} {'MaxDD':>7} {'Calmar':>7}")
print(f" {'-'*85}")
for r in unique_sorted[:15]:
flag = " <<<" if "BASELINE" in r["name"] else ""
print(f" {r['name']:<45} {r['cagr']:>+6.1%} {r['sharpe']:>7.2f} {r['sortino']:>8.2f} "
f"{r['maxdd']:>+6.1%} {r['calmar']:>7.2f}{flag}")
# ================================================================
# ROUND 4: Combine top non-baseline factors
# ================================================================
top_funcs = {}
func_map_all = dict(r1_factors + r2_factors + r3_factors)
non_baseline = [r for r in unique_sorted if "BASELINE" not in r["name"] and r.get("cagr", 0)]
for r in non_baseline[:12]:
if r["name"] in func_map_all:
top_funcs[r["name"]] = func_map_all[r["name"]]
top_names = list(top_funcs.keys())
print(f"\n Building combinations from top {len(top_names)} factors: {top_names}")
combo_factors = [("BASELINE:rec+mom", f_rec_mom)]
# All pairs from top-8
for i in range(min(8, len(top_names))):
for j in range(i + 1, min(8, len(top_names))):
n1, n2 = top_names[i], top_names[j]
combo_factors.append((
f"{n1[:20]}+{n2[:20]}",
combo([(top_funcs[n1], 0.5), (top_funcs[n2], 0.5)])
))
# Triple combos from top-5
for i in range(min(5, len(top_names))):
for j in range(i + 1, min(5, len(top_names))):
for k in range(j + 1, min(5, len(top_names))):
n1, n2, n3 = top_names[i], top_names[j], top_names[k]
combo_factors.append((
f"{n1[:15]}+{n2[:15]}+{n3[:15]}",
combo([(top_funcs[n1], 0.33), (top_funcs[n2], 0.33), (top_funcs[n3], 0.34)])
))
r4 = run_round("ROUND 4 — Factor Combinations", combo_factors, stocks)
# ================================================================
# ROUND 5: Yearly breakdown of top 5 combos
# ================================================================
r4_sorted = sorted(r4, key=lambda x: x.get("cagr", 0) or 0, reverse=True)
top5 = r4_sorted[:5]
# Make sure baseline is included
base = next((r for r in r4 if "BASELINE" in r["name"]), None)
if base and base not in top5:
top5.append(base)
print(f"\n{'='*95}")
print(f" ROUND 5 — YEARLY RETURNS OF BEST STRATEGIES — {market.upper()}")
print(f"{'='*95}")
cols = [(r["name"], r["equity"]) for r in top5]
if bench is not None:
eq_bench = bench / bench.iloc[0] * 100000
cols.append(("BENCHMARK", eq_bench))
# Header
header = f" {'Year':<6}"
for name, _ in cols:
header += f" | {name[:22]:>22}"
print(header)
print(" " + "-" * (6 + 25 * len(cols)))
all_years = sorted(set(y for _, eq in cols for y in eq.index.year.unique()))
for year in all_years:
line = f" {year:<6}"
for _, eq in cols:
dr = eq.pct_change().fillna(0)
yr = dr[dr.index.year == year]
r = float((1 + yr).prod() - 1) if len(yr) > 0 else 0
line += f" | {r:>+21.1%}"
print(line)
# Period CAGRs
for ny in [3, 5, 10]:
cutoff = stocks.index[-1] - pd.DateOffset(years=ny)
print(f"\n --- {ny}-year CAGR ---")
for name, eq in cols:
sl = eq[eq.index >= cutoff]
if len(sl) < 50:
continue
tot = sl.iloc[-1] / sl.iloc[0] - 1
cagr = (1 + tot) ** (1 / ny) - 1
print(f" {name[:50]:<50} {cagr:>+8.1%}")
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--market", default="us", choices=["us", "cn"])
args = parser.parse_args()
run_market(args.market)
if __name__ == "__main__":
main()