Files
quant/research/permanent_yearly.py
Gahow Wang 541f7bcf5b research: add strategy evaluation and exploration scripts
Add 28 research scripts covering DCA simulation, momentum evaluation,
Sharpe optimization, trend rider analysis, and US fundamentals exploration.
2026-05-14 12:54:08 +08:00

323 lines
13 KiB
Python

"""Yearly evaluation of Permanent / TrendRider strategies vs stock pickers.
Two test cases per strategy, 2015-01-01 → 2025-12-31:
Test 1 (annual reset): each calendar year starts with $10,000.
We compute that year's compounded return and report the
end-of-year equity. Years are independent.
Test 2 (annual contribution): start with $10,000 in 2015, add
$10,000 cash on the first trading day of each subsequent year.
Report the running portfolio value at year-end (after all
contributions and that year's gains/losses).
Strategies covered:
* PermanentOverlay — Browne 25/25/25/25 + Faber MA200 stock-slot overlay
* TrendRiderV3 — risk-on/risk-off basket with regime gates
* PermanentV4 — improved Permanent (momentum baskets + bond trend)
* Recovery+Mom Top10 — current top US stock-picking strategy
Run:
uv run python -m research.permanent_yearly
"""
from __future__ import annotations
import os
import sys
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
# Allow running as a script ("python research/permanent_yearly.py") and
# as a module ("python -m research.permanent_yearly")
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import yfinance as yf
import data_manager
from strategies.permanent import (
ETF_UNIVERSE,
GLOBAL_ETF_UNIVERSE,
HK_ETF_UNIVERSE,
PermanentOverlay,
PermanentV4,
TrendRiderV3,
)
from strategies.recovery_momentum import RecoveryMomentumStrategy
ETF_CACHE = "data/etfs.csv"
STOCKS_LONG_CACHE = "data/us_long.csv"
def load_long_stock_history(tickers: list[str], start: str = "2014-01-01") -> pd.DataFrame:
"""Stock prices going back further than the 10-year data_manager cache.
We need 2014 data so the 252-day momentum warmup completes before 2015.
Caches to data/us_long.csv. Refreshes once a day if the latest date is
older than yesterday.
"""
cached: pd.DataFrame | None = None
if os.path.exists(STOCKS_LONG_CACHE):
cached = pd.read_csv(STOCKS_LONG_CACHE, index_col=0, parse_dates=True)
fresh_today = (
cached is not None
and cached.index.max() >= pd.Timestamp(datetime.now().date() - timedelta(days=1))
)
have_all_tickers = (
cached is not None
and all(t in cached.columns for t in tickers)
)
if fresh_today and have_all_tickers:
return cached[tickers].ffill()
print(f"--- Downloading {len(tickers)} stock tickers (long history) from {start} ---")
raw = yf.download(tickers, start=start, auto_adjust=True, progress=False, threads=True)
if isinstance(raw.columns, pd.MultiIndex):
df = raw["Close"]
else:
df = raw[["Close"]].rename(columns={"Close": tickers[0]})
df = df.dropna(how="all")
# Drop tickers with >50% missing — same convention as data_manager
good = df.columns[df.notna().mean() > 0.5]
df = df[good]
df = df.ffill()
if cached is not None:
df = cached.combine_first(df)
df = df.sort_index()
os.makedirs("data", exist_ok=True)
df.to_csv(STOCKS_LONG_CACHE)
print(f"--- Saved {df.shape[0]} days x {df.shape[1]} tickers to {STOCKS_LONG_CACHE} ---")
return df
# ---------------------------------------------------------------------------
# ETF data loader (separate cache so we don't pollute data/us.csv)
# ---------------------------------------------------------------------------
def load_etfs(tickers: list[str], start: str = "2014-01-01") -> pd.DataFrame:
"""Load ETF closes from local cache; download missing dates from Yahoo.
Returns the panel WITHOUT ffill so callers can detect which dates are
real trading days for which symbol. Caller is expected to anchor the
panel to a master calendar (e.g. SPY) and then ffill.
"""
cached: pd.DataFrame | None = None
if os.path.exists(ETF_CACHE):
cached = pd.read_csv(ETF_CACHE, index_col=0, parse_dates=True)
need_download = (
cached is None
or any(t not in cached.columns for t in tickers)
or cached.index.max() < pd.Timestamp(datetime.now() - timedelta(days=2))
)
if need_download:
print(f"--- Downloading ETF prices: {tickers} ---")
raw = yf.download(tickers, start=start, auto_adjust=True, progress=False)
if isinstance(raw.columns, pd.MultiIndex):
df = raw["Close"]
else:
df = raw[["Close"]].rename(columns={"Close": tickers[0]})
df = df.dropna(how="all")
if cached is not None:
df = cached.combine_first(df)
df = df.sort_index()
os.makedirs("data", exist_ok=True)
df.to_csv(ETF_CACHE)
print(f"--- Saved {df.shape[0]} days x {df.shape[1]} ETFs to {ETF_CACHE} ---")
return df
return cached[tickers].dropna(how="all")
# ---------------------------------------------------------------------------
# Backtest engine: returns daily portfolio returns from a weights DataFrame.
# ---------------------------------------------------------------------------
def daily_returns(weights: pd.DataFrame, prices: pd.DataFrame,
txn_cost: float = 0.001) -> pd.Series:
"""Compute daily portfolio returns net of turnover cost.
weights : already 1-day lagged so weights[t] is decided using info
up through t-1 and applies to the t-1 → t close return.
prices : aligned price data over the same columns/dates.
"""
aligned = weights.reindex(index=prices.index, columns=prices.columns).fillna(0.0)
daily_pct = prices.pct_change().fillna(0.0)
port = (daily_pct * aligned).sum(axis=1)
turnover = aligned.diff().abs().sum(axis=1).fillna(0.0)
return port - turnover * txn_cost
def equity_with_cashflows(returns: pd.Series, contributions: pd.Series,
start_capital: float) -> pd.Series:
"""Simulate equity given a daily return series and dated cash injections.
contributions : Series indexed by dates with positive values for cash
added that day (added at end-of-day, after returns).
start_capital : amount on the first index date (returns[0] applies to
day 1; we assume returns[0] = 0).
"""
contrib = contributions.reindex(returns.index).fillna(0.0)
eq = np.empty(len(returns))
val = start_capital
for i, r in enumerate(returns.values):
val = val * (1.0 + float(r)) + float(contrib.iat[i])
eq[i] = val
return pd.Series(eq, index=returns.index)
# ---------------------------------------------------------------------------
# Yearly tests
# ---------------------------------------------------------------------------
def test1_annual_reset(returns: pd.Series, years: list[int],
start_capital: float = 10_000) -> pd.Series:
"""Each year independently: start at $start_capital, return year-end value."""
out: dict[int, float] = {}
for y in years:
mask = returns.index.year == y
if not mask.any():
out[y] = float("nan")
continue
cum = (1.0 + returns[mask]).prod()
out[y] = float(start_capital * cum)
return pd.Series(out, name="year_end")
def test2_with_contributions(returns: pd.Series, years: list[int],
initial: float = 10_000,
annual_contrib: float = 10_000) -> pd.Series:
"""Start initial in year 1; add annual_contrib at first trading day of years 2+.
Returns a Series indexed by year with end-of-year portfolio value.
"""
yr_returns = returns[returns.index.year.isin(years)].copy()
if yr_returns.empty:
return pd.Series(dtype=float)
contrib = pd.Series(0.0, index=yr_returns.index)
for y in years[1:]:
ymask = yr_returns.index.year == y
if ymask.any():
first_day = yr_returns.index[ymask][0]
contrib.at[first_day] = annual_contrib
eq = equity_with_cashflows(yr_returns, contrib, start_capital=initial)
out = {y: float(eq[eq.index.year == y].iloc[-1]) if (eq.index.year == y).any() else float("nan")
for y in years}
return pd.Series(out, name="year_end")
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main() -> None:
years = list(range(2015, 2026)) # 2015 .. 2025 inclusive
# 1) ETF prices for TAA strategies — include global + HK variants too.
# Anchor to the US (SPY) trading calendar so rolling windows are
# consistent across strategies. HK ETFs get reindexed + ffilled onto
# NYSE dates; on HK holidays we use the latest HK close.
full_universe = sorted(set(ETF_UNIVERSE + GLOBAL_ETF_UNIVERSE + HK_ETF_UNIVERSE))
etfs = load_etfs(full_universe, start="2013-06-01")
nyse_index = etfs["SPY"].dropna().index
etfs = etfs.reindex(nyse_index).ffill()
etfs = etfs[(etfs.index >= "2013-06-01") & (etfs.index <= f"{years[-1]}-12-31")]
print(f"--- ETF panel: {etfs.shape[0]} days x {etfs.shape[1]} cols, "
f"{etfs.index.min().date()} to {etfs.index.max().date()} ---")
# 2) S&P 500 prices for stock-picking strategies — needs longer history
# than data_manager's 10-year cache so that 252-day momentum warmup
# completes before 2015.
from universe import UNIVERSES
universe = UNIVERSES["us"]
tickers = universe["fetch"]()
benchmark = universe["benchmark"]
all_tickers = sorted(set(tickers + [benchmark]))
stocks = load_long_stock_history(all_tickers, start="2013-06-01")
stocks = stocks[(stocks.index >= "2013-06-01") & (stocks.index <= f"{years[-1]}-12-31")]
member_cols = [c for c in stocks.columns if c in tickers]
print(f"--- Stock panel: {stocks.shape[0]} days x {len(member_cols)} members ---")
# 3) Build strategies and compute their daily return series
series: dict[str, pd.Series] = {}
for name, strat in [
("PermanentOverlay", PermanentOverlay()),
("PermanentV4", PermanentV4()),
("TrendRiderV3-US", TrendRiderV3()),
("TrendRiderV3-Global",
TrendRiderV3(risk_on=("TQQQ", "UPRO", "YINN", "CHAU"),
risk_off=("GLD", "DBC"))),
("TrendRiderV3-HK",
TrendRiderV3(risk_on=("7200.HK", "7500.HK"),
risk_off=("GLD", "DBC"))),
]:
print(f"\nRunning: {name}")
w = strat.generate_signals(etfs)
rets = daily_returns(w, etfs[w.columns])
series[name] = rets
print("\nRunning: Recovery+Mom Top10")
rec = RecoveryMomentumStrategy(top_n=10)
w = rec.generate_signals(stocks[member_cols])
series["Recovery+Mom Top10"] = daily_returns(w, stocks[member_cols])
# Buy & hold SPY benchmark for context
spy = etfs["SPY"]
series["SPY Buy&Hold"] = spy.pct_change().fillna(0.0)
# 4) Restrict every series to 2015-01-01 onward, common index per series
for k, s in series.items():
series[k] = s[(s.index >= f"{years[0]}-01-01") & (s.index <= f"{years[-1]}-12-31")]
# 5) Test 1 — annual reset
t1 = pd.DataFrame({name: test1_annual_reset(s, years) for name, s in series.items()})
t1.index.name = "year"
# 6) Test 2 — annual $10k contribution
t2 = pd.DataFrame({name: test2_with_contributions(s, years) for name, s in series.items()})
t2.index.name = "year"
# 7) Print reports
pd.set_option("display.float_format", lambda x: f"{x:,.0f}")
print("\n" + "=" * 78)
print("TEST 1 — Each year starts at $10,000 (independent year-end value)")
print("=" * 78)
print(t1.to_string())
annual_ret = (t1 / 10_000.0 - 1.0) * 100
pd.set_option("display.float_format", lambda x: f"{x:+.2f}%")
print("\nAnnual returns (%)")
print(annual_ret.to_string())
avg = annual_ret.mean(axis=0)
win_years = (annual_ret > 0).sum(axis=0)
print("\nMean annual return / years up:")
for c in annual_ret.columns:
print(f" {c:22s} mean={avg[c]:+6.2f}% up_years={int(win_years[c])}/{len(years)}")
pd.set_option("display.float_format", lambda x: f"{x:,.0f}")
print("\n" + "=" * 78)
print("TEST 2 — Start $10,000 in 2015, add $10,000 each subsequent year")
print("=" * 78)
print(t2.to_string())
total_in = pd.Series({y: 10_000 * (years.index(y) + 1) for y in years}, name="contributed")
print("\nTotal $ contributed by year-end:")
print(total_in.to_string())
# Total return on contributions, year-by-year
print("\nMultiple of contributed capital:")
pd.set_option("display.float_format", lambda x: f"{x:.2f}x")
multiple = t2.div(total_in, axis=0)
print(multiple.to_string())
# 8) Save CSVs
os.makedirs("data", exist_ok=True)
pd.set_option("display.float_format", None)
t1.to_csv("data/permanent_yearly_test1_reset.csv")
t2.to_csv("data/permanent_yearly_test2_contrib.csv")
print("\nSaved: data/permanent_yearly_test1_reset.csv")
print("Saved: data/permanent_yearly_test2_contrib.csv")
if __name__ == "__main__":
main()