Add 28 research scripts covering DCA simulation, momentum evaluation, Sharpe optimization, trend rider analysis, and US fundamentals exploration.
323 lines
13 KiB
Python
323 lines
13 KiB
Python
"""Yearly evaluation of Permanent / TrendRider strategies vs stock pickers.
|
|
|
|
Two test cases per strategy, 2015-01-01 → 2025-12-31:
|
|
|
|
Test 1 (annual reset): each calendar year starts with $10,000.
|
|
We compute that year's compounded return and report the
|
|
end-of-year equity. Years are independent.
|
|
Test 2 (annual contribution): start with $10,000 in 2015, add
|
|
$10,000 cash on the first trading day of each subsequent year.
|
|
Report the running portfolio value at year-end (after all
|
|
contributions and that year's gains/losses).
|
|
|
|
Strategies covered:
|
|
* PermanentOverlay — Browne 25/25/25/25 + Faber MA200 stock-slot overlay
|
|
* TrendRiderV3 — risk-on/risk-off basket with regime gates
|
|
* PermanentV4 — improved Permanent (momentum baskets + bond trend)
|
|
* Recovery+Mom Top10 — current top US stock-picking strategy
|
|
|
|
Run:
|
|
uv run python -m research.permanent_yearly
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import sys
|
|
from datetime import datetime, timedelta
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
|
|
# Allow running as a script ("python research/permanent_yearly.py") and
|
|
# as a module ("python -m research.permanent_yearly")
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
import yfinance as yf
|
|
|
|
import data_manager
|
|
from strategies.permanent import (
|
|
ETF_UNIVERSE,
|
|
GLOBAL_ETF_UNIVERSE,
|
|
HK_ETF_UNIVERSE,
|
|
PermanentOverlay,
|
|
PermanentV4,
|
|
TrendRiderV3,
|
|
)
|
|
from strategies.recovery_momentum import RecoveryMomentumStrategy
|
|
|
|
ETF_CACHE = "data/etfs.csv"
|
|
STOCKS_LONG_CACHE = "data/us_long.csv"
|
|
|
|
|
|
def load_long_stock_history(tickers: list[str], start: str = "2014-01-01") -> pd.DataFrame:
|
|
"""Stock prices going back further than the 10-year data_manager cache.
|
|
|
|
We need 2014 data so the 252-day momentum warmup completes before 2015.
|
|
Caches to data/us_long.csv. Refreshes once a day if the latest date is
|
|
older than yesterday.
|
|
"""
|
|
cached: pd.DataFrame | None = None
|
|
if os.path.exists(STOCKS_LONG_CACHE):
|
|
cached = pd.read_csv(STOCKS_LONG_CACHE, index_col=0, parse_dates=True)
|
|
|
|
fresh_today = (
|
|
cached is not None
|
|
and cached.index.max() >= pd.Timestamp(datetime.now().date() - timedelta(days=1))
|
|
)
|
|
have_all_tickers = (
|
|
cached is not None
|
|
and all(t in cached.columns for t in tickers)
|
|
)
|
|
if fresh_today and have_all_tickers:
|
|
return cached[tickers].ffill()
|
|
|
|
print(f"--- Downloading {len(tickers)} stock tickers (long history) from {start} ---")
|
|
raw = yf.download(tickers, start=start, auto_adjust=True, progress=False, threads=True)
|
|
if isinstance(raw.columns, pd.MultiIndex):
|
|
df = raw["Close"]
|
|
else:
|
|
df = raw[["Close"]].rename(columns={"Close": tickers[0]})
|
|
df = df.dropna(how="all")
|
|
# Drop tickers with >50% missing — same convention as data_manager
|
|
good = df.columns[df.notna().mean() > 0.5]
|
|
df = df[good]
|
|
df = df.ffill()
|
|
if cached is not None:
|
|
df = cached.combine_first(df)
|
|
df = df.sort_index()
|
|
os.makedirs("data", exist_ok=True)
|
|
df.to_csv(STOCKS_LONG_CACHE)
|
|
print(f"--- Saved {df.shape[0]} days x {df.shape[1]} tickers to {STOCKS_LONG_CACHE} ---")
|
|
return df
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# ETF data loader (separate cache so we don't pollute data/us.csv)
|
|
# ---------------------------------------------------------------------------
|
|
def load_etfs(tickers: list[str], start: str = "2014-01-01") -> pd.DataFrame:
|
|
"""Load ETF closes from local cache; download missing dates from Yahoo.
|
|
|
|
Returns the panel WITHOUT ffill so callers can detect which dates are
|
|
real trading days for which symbol. Caller is expected to anchor the
|
|
panel to a master calendar (e.g. SPY) and then ffill.
|
|
"""
|
|
cached: pd.DataFrame | None = None
|
|
if os.path.exists(ETF_CACHE):
|
|
cached = pd.read_csv(ETF_CACHE, index_col=0, parse_dates=True)
|
|
|
|
need_download = (
|
|
cached is None
|
|
or any(t not in cached.columns for t in tickers)
|
|
or cached.index.max() < pd.Timestamp(datetime.now() - timedelta(days=2))
|
|
)
|
|
|
|
if need_download:
|
|
print(f"--- Downloading ETF prices: {tickers} ---")
|
|
raw = yf.download(tickers, start=start, auto_adjust=True, progress=False)
|
|
if isinstance(raw.columns, pd.MultiIndex):
|
|
df = raw["Close"]
|
|
else:
|
|
df = raw[["Close"]].rename(columns={"Close": tickers[0]})
|
|
df = df.dropna(how="all")
|
|
if cached is not None:
|
|
df = cached.combine_first(df)
|
|
df = df.sort_index()
|
|
os.makedirs("data", exist_ok=True)
|
|
df.to_csv(ETF_CACHE)
|
|
print(f"--- Saved {df.shape[0]} days x {df.shape[1]} ETFs to {ETF_CACHE} ---")
|
|
return df
|
|
|
|
return cached[tickers].dropna(how="all")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Backtest engine: returns daily portfolio returns from a weights DataFrame.
|
|
# ---------------------------------------------------------------------------
|
|
def daily_returns(weights: pd.DataFrame, prices: pd.DataFrame,
|
|
txn_cost: float = 0.001) -> pd.Series:
|
|
"""Compute daily portfolio returns net of turnover cost.
|
|
|
|
weights : already 1-day lagged so weights[t] is decided using info
|
|
up through t-1 and applies to the t-1 → t close return.
|
|
prices : aligned price data over the same columns/dates.
|
|
"""
|
|
aligned = weights.reindex(index=prices.index, columns=prices.columns).fillna(0.0)
|
|
daily_pct = prices.pct_change().fillna(0.0)
|
|
port = (daily_pct * aligned).sum(axis=1)
|
|
turnover = aligned.diff().abs().sum(axis=1).fillna(0.0)
|
|
return port - turnover * txn_cost
|
|
|
|
|
|
def equity_with_cashflows(returns: pd.Series, contributions: pd.Series,
|
|
start_capital: float) -> pd.Series:
|
|
"""Simulate equity given a daily return series and dated cash injections.
|
|
|
|
contributions : Series indexed by dates with positive values for cash
|
|
added that day (added at end-of-day, after returns).
|
|
start_capital : amount on the first index date (returns[0] applies to
|
|
day 1; we assume returns[0] = 0).
|
|
"""
|
|
contrib = contributions.reindex(returns.index).fillna(0.0)
|
|
eq = np.empty(len(returns))
|
|
val = start_capital
|
|
for i, r in enumerate(returns.values):
|
|
val = val * (1.0 + float(r)) + float(contrib.iat[i])
|
|
eq[i] = val
|
|
return pd.Series(eq, index=returns.index)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Yearly tests
|
|
# ---------------------------------------------------------------------------
|
|
def test1_annual_reset(returns: pd.Series, years: list[int],
|
|
start_capital: float = 10_000) -> pd.Series:
|
|
"""Each year independently: start at $start_capital, return year-end value."""
|
|
out: dict[int, float] = {}
|
|
for y in years:
|
|
mask = returns.index.year == y
|
|
if not mask.any():
|
|
out[y] = float("nan")
|
|
continue
|
|
cum = (1.0 + returns[mask]).prod()
|
|
out[y] = float(start_capital * cum)
|
|
return pd.Series(out, name="year_end")
|
|
|
|
|
|
def test2_with_contributions(returns: pd.Series, years: list[int],
|
|
initial: float = 10_000,
|
|
annual_contrib: float = 10_000) -> pd.Series:
|
|
"""Start initial in year 1; add annual_contrib at first trading day of years 2+.
|
|
|
|
Returns a Series indexed by year with end-of-year portfolio value.
|
|
"""
|
|
yr_returns = returns[returns.index.year.isin(years)].copy()
|
|
if yr_returns.empty:
|
|
return pd.Series(dtype=float)
|
|
contrib = pd.Series(0.0, index=yr_returns.index)
|
|
for y in years[1:]:
|
|
ymask = yr_returns.index.year == y
|
|
if ymask.any():
|
|
first_day = yr_returns.index[ymask][0]
|
|
contrib.at[first_day] = annual_contrib
|
|
|
|
eq = equity_with_cashflows(yr_returns, contrib, start_capital=initial)
|
|
out = {y: float(eq[eq.index.year == y].iloc[-1]) if (eq.index.year == y).any() else float("nan")
|
|
for y in years}
|
|
return pd.Series(out, name="year_end")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Main
|
|
# ---------------------------------------------------------------------------
|
|
def main() -> None:
|
|
years = list(range(2015, 2026)) # 2015 .. 2025 inclusive
|
|
|
|
# 1) ETF prices for TAA strategies — include global + HK variants too.
|
|
# Anchor to the US (SPY) trading calendar so rolling windows are
|
|
# consistent across strategies. HK ETFs get reindexed + ffilled onto
|
|
# NYSE dates; on HK holidays we use the latest HK close.
|
|
full_universe = sorted(set(ETF_UNIVERSE + GLOBAL_ETF_UNIVERSE + HK_ETF_UNIVERSE))
|
|
etfs = load_etfs(full_universe, start="2013-06-01")
|
|
nyse_index = etfs["SPY"].dropna().index
|
|
etfs = etfs.reindex(nyse_index).ffill()
|
|
etfs = etfs[(etfs.index >= "2013-06-01") & (etfs.index <= f"{years[-1]}-12-31")]
|
|
print(f"--- ETF panel: {etfs.shape[0]} days x {etfs.shape[1]} cols, "
|
|
f"{etfs.index.min().date()} to {etfs.index.max().date()} ---")
|
|
|
|
# 2) S&P 500 prices for stock-picking strategies — needs longer history
|
|
# than data_manager's 10-year cache so that 252-day momentum warmup
|
|
# completes before 2015.
|
|
from universe import UNIVERSES
|
|
universe = UNIVERSES["us"]
|
|
tickers = universe["fetch"]()
|
|
benchmark = universe["benchmark"]
|
|
all_tickers = sorted(set(tickers + [benchmark]))
|
|
stocks = load_long_stock_history(all_tickers, start="2013-06-01")
|
|
stocks = stocks[(stocks.index >= "2013-06-01") & (stocks.index <= f"{years[-1]}-12-31")]
|
|
member_cols = [c for c in stocks.columns if c in tickers]
|
|
print(f"--- Stock panel: {stocks.shape[0]} days x {len(member_cols)} members ---")
|
|
|
|
# 3) Build strategies and compute their daily return series
|
|
series: dict[str, pd.Series] = {}
|
|
|
|
for name, strat in [
|
|
("PermanentOverlay", PermanentOverlay()),
|
|
("PermanentV4", PermanentV4()),
|
|
("TrendRiderV3-US", TrendRiderV3()),
|
|
("TrendRiderV3-Global",
|
|
TrendRiderV3(risk_on=("TQQQ", "UPRO", "YINN", "CHAU"),
|
|
risk_off=("GLD", "DBC"))),
|
|
("TrendRiderV3-HK",
|
|
TrendRiderV3(risk_on=("7200.HK", "7500.HK"),
|
|
risk_off=("GLD", "DBC"))),
|
|
]:
|
|
print(f"\nRunning: {name}")
|
|
w = strat.generate_signals(etfs)
|
|
rets = daily_returns(w, etfs[w.columns])
|
|
series[name] = rets
|
|
|
|
print("\nRunning: Recovery+Mom Top10")
|
|
rec = RecoveryMomentumStrategy(top_n=10)
|
|
w = rec.generate_signals(stocks[member_cols])
|
|
series["Recovery+Mom Top10"] = daily_returns(w, stocks[member_cols])
|
|
|
|
# Buy & hold SPY benchmark for context
|
|
spy = etfs["SPY"]
|
|
series["SPY Buy&Hold"] = spy.pct_change().fillna(0.0)
|
|
|
|
# 4) Restrict every series to 2015-01-01 onward, common index per series
|
|
for k, s in series.items():
|
|
series[k] = s[(s.index >= f"{years[0]}-01-01") & (s.index <= f"{years[-1]}-12-31")]
|
|
|
|
# 5) Test 1 — annual reset
|
|
t1 = pd.DataFrame({name: test1_annual_reset(s, years) for name, s in series.items()})
|
|
t1.index.name = "year"
|
|
|
|
# 6) Test 2 — annual $10k contribution
|
|
t2 = pd.DataFrame({name: test2_with_contributions(s, years) for name, s in series.items()})
|
|
t2.index.name = "year"
|
|
|
|
# 7) Print reports
|
|
pd.set_option("display.float_format", lambda x: f"{x:,.0f}")
|
|
|
|
print("\n" + "=" * 78)
|
|
print("TEST 1 — Each year starts at $10,000 (independent year-end value)")
|
|
print("=" * 78)
|
|
print(t1.to_string())
|
|
annual_ret = (t1 / 10_000.0 - 1.0) * 100
|
|
pd.set_option("display.float_format", lambda x: f"{x:+.2f}%")
|
|
print("\nAnnual returns (%)")
|
|
print(annual_ret.to_string())
|
|
avg = annual_ret.mean(axis=0)
|
|
win_years = (annual_ret > 0).sum(axis=0)
|
|
print("\nMean annual return / years up:")
|
|
for c in annual_ret.columns:
|
|
print(f" {c:22s} mean={avg[c]:+6.2f}% up_years={int(win_years[c])}/{len(years)}")
|
|
|
|
pd.set_option("display.float_format", lambda x: f"{x:,.0f}")
|
|
print("\n" + "=" * 78)
|
|
print("TEST 2 — Start $10,000 in 2015, add $10,000 each subsequent year")
|
|
print("=" * 78)
|
|
print(t2.to_string())
|
|
total_in = pd.Series({y: 10_000 * (years.index(y) + 1) for y in years}, name="contributed")
|
|
print("\nTotal $ contributed by year-end:")
|
|
print(total_in.to_string())
|
|
|
|
# Total return on contributions, year-by-year
|
|
print("\nMultiple of contributed capital:")
|
|
pd.set_option("display.float_format", lambda x: f"{x:.2f}x")
|
|
multiple = t2.div(total_in, axis=0)
|
|
print(multiple.to_string())
|
|
|
|
# 8) Save CSVs
|
|
os.makedirs("data", exist_ok=True)
|
|
pd.set_option("display.float_format", None)
|
|
t1.to_csv("data/permanent_yearly_test1_reset.csv")
|
|
t2.to_csv("data/permanent_yearly_test2_contrib.csv")
|
|
print("\nSaved: data/permanent_yearly_test1_reset.csv")
|
|
print("Saved: data/permanent_yearly_test2_contrib.csv")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|