"""Yearly evaluation of Permanent / TrendRider strategies vs stock pickers. Two test cases per strategy, 2015-01-01 → 2025-12-31: Test 1 (annual reset): each calendar year starts with $10,000. We compute that year's compounded return and report the end-of-year equity. Years are independent. Test 2 (annual contribution): start with $10,000 in 2015, add $10,000 cash on the first trading day of each subsequent year. Report the running portfolio value at year-end (after all contributions and that year's gains/losses). Strategies covered: * PermanentOverlay — Browne 25/25/25/25 + Faber MA200 stock-slot overlay * TrendRiderV3 — risk-on/risk-off basket with regime gates * PermanentV4 — improved Permanent (momentum baskets + bond trend) * Recovery+Mom Top10 — current top US stock-picking strategy Run: uv run python -m research.permanent_yearly """ from __future__ import annotations import os import sys from datetime import datetime, timedelta import numpy as np import pandas as pd # Allow running as a script ("python research/permanent_yearly.py") and # as a module ("python -m research.permanent_yearly") sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import yfinance as yf import data_manager from strategies.permanent import ( ETF_UNIVERSE, GLOBAL_ETF_UNIVERSE, HK_ETF_UNIVERSE, PermanentOverlay, PermanentV4, TrendRiderV3, ) from strategies.recovery_momentum import RecoveryMomentumStrategy ETF_CACHE = "data/etfs.csv" STOCKS_LONG_CACHE = "data/us_long.csv" def load_long_stock_history(tickers: list[str], start: str = "2014-01-01") -> pd.DataFrame: """Stock prices going back further than the 10-year data_manager cache. We need 2014 data so the 252-day momentum warmup completes before 2015. Caches to data/us_long.csv. Refreshes once a day if the latest date is older than yesterday. """ cached: pd.DataFrame | None = None if os.path.exists(STOCKS_LONG_CACHE): cached = pd.read_csv(STOCKS_LONG_CACHE, index_col=0, parse_dates=True) fresh_today = ( cached is not None and cached.index.max() >= pd.Timestamp(datetime.now().date() - timedelta(days=1)) ) have_all_tickers = ( cached is not None and all(t in cached.columns for t in tickers) ) if fresh_today and have_all_tickers: return cached[tickers].ffill() print(f"--- Downloading {len(tickers)} stock tickers (long history) from {start} ---") raw = yf.download(tickers, start=start, auto_adjust=True, progress=False, threads=True) if isinstance(raw.columns, pd.MultiIndex): df = raw["Close"] else: df = raw[["Close"]].rename(columns={"Close": tickers[0]}) df = df.dropna(how="all") # Drop tickers with >50% missing — same convention as data_manager good = df.columns[df.notna().mean() > 0.5] df = df[good] df = df.ffill() if cached is not None: df = cached.combine_first(df) df = df.sort_index() os.makedirs("data", exist_ok=True) df.to_csv(STOCKS_LONG_CACHE) print(f"--- Saved {df.shape[0]} days x {df.shape[1]} tickers to {STOCKS_LONG_CACHE} ---") return df # --------------------------------------------------------------------------- # ETF data loader (separate cache so we don't pollute data/us.csv) # --------------------------------------------------------------------------- def load_etfs(tickers: list[str], start: str = "2014-01-01") -> pd.DataFrame: """Load ETF closes from local cache; download missing dates from Yahoo. Returns the panel WITHOUT ffill so callers can detect which dates are real trading days for which symbol. Caller is expected to anchor the panel to a master calendar (e.g. SPY) and then ffill. """ cached: pd.DataFrame | None = None if os.path.exists(ETF_CACHE): cached = pd.read_csv(ETF_CACHE, index_col=0, parse_dates=True) need_download = ( cached is None or any(t not in cached.columns for t in tickers) or cached.index.max() < pd.Timestamp(datetime.now() - timedelta(days=2)) ) if need_download: print(f"--- Downloading ETF prices: {tickers} ---") raw = yf.download(tickers, start=start, auto_adjust=True, progress=False) if isinstance(raw.columns, pd.MultiIndex): df = raw["Close"] else: df = raw[["Close"]].rename(columns={"Close": tickers[0]}) df = df.dropna(how="all") if cached is not None: df = cached.combine_first(df) df = df.sort_index() os.makedirs("data", exist_ok=True) df.to_csv(ETF_CACHE) print(f"--- Saved {df.shape[0]} days x {df.shape[1]} ETFs to {ETF_CACHE} ---") return df return cached[tickers].dropna(how="all") # --------------------------------------------------------------------------- # Backtest engine: returns daily portfolio returns from a weights DataFrame. # --------------------------------------------------------------------------- def daily_returns(weights: pd.DataFrame, prices: pd.DataFrame, txn_cost: float = 0.001) -> pd.Series: """Compute daily portfolio returns net of turnover cost. weights : already 1-day lagged so weights[t] is decided using info up through t-1 and applies to the t-1 → t close return. prices : aligned price data over the same columns/dates. """ aligned = weights.reindex(index=prices.index, columns=prices.columns).fillna(0.0) daily_pct = prices.pct_change().fillna(0.0) port = (daily_pct * aligned).sum(axis=1) turnover = aligned.diff().abs().sum(axis=1).fillna(0.0) return port - turnover * txn_cost def equity_with_cashflows(returns: pd.Series, contributions: pd.Series, start_capital: float) -> pd.Series: """Simulate equity given a daily return series and dated cash injections. contributions : Series indexed by dates with positive values for cash added that day (added at end-of-day, after returns). start_capital : amount on the first index date (returns[0] applies to day 1; we assume returns[0] = 0). """ contrib = contributions.reindex(returns.index).fillna(0.0) eq = np.empty(len(returns)) val = start_capital for i, r in enumerate(returns.values): val = val * (1.0 + float(r)) + float(contrib.iat[i]) eq[i] = val return pd.Series(eq, index=returns.index) # --------------------------------------------------------------------------- # Yearly tests # --------------------------------------------------------------------------- def test1_annual_reset(returns: pd.Series, years: list[int], start_capital: float = 10_000) -> pd.Series: """Each year independently: start at $start_capital, return year-end value.""" out: dict[int, float] = {} for y in years: mask = returns.index.year == y if not mask.any(): out[y] = float("nan") continue cum = (1.0 + returns[mask]).prod() out[y] = float(start_capital * cum) return pd.Series(out, name="year_end") def test2_with_contributions(returns: pd.Series, years: list[int], initial: float = 10_000, annual_contrib: float = 10_000) -> pd.Series: """Start initial in year 1; add annual_contrib at first trading day of years 2+. Returns a Series indexed by year with end-of-year portfolio value. """ yr_returns = returns[returns.index.year.isin(years)].copy() if yr_returns.empty: return pd.Series(dtype=float) contrib = pd.Series(0.0, index=yr_returns.index) for y in years[1:]: ymask = yr_returns.index.year == y if ymask.any(): first_day = yr_returns.index[ymask][0] contrib.at[first_day] = annual_contrib eq = equity_with_cashflows(yr_returns, contrib, start_capital=initial) out = {y: float(eq[eq.index.year == y].iloc[-1]) if (eq.index.year == y).any() else float("nan") for y in years} return pd.Series(out, name="year_end") # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- def main() -> None: years = list(range(2015, 2026)) # 2015 .. 2025 inclusive # 1) ETF prices for TAA strategies — include global + HK variants too. # Anchor to the US (SPY) trading calendar so rolling windows are # consistent across strategies. HK ETFs get reindexed + ffilled onto # NYSE dates; on HK holidays we use the latest HK close. full_universe = sorted(set(ETF_UNIVERSE + GLOBAL_ETF_UNIVERSE + HK_ETF_UNIVERSE)) etfs = load_etfs(full_universe, start="2013-06-01") nyse_index = etfs["SPY"].dropna().index etfs = etfs.reindex(nyse_index).ffill() etfs = etfs[(etfs.index >= "2013-06-01") & (etfs.index <= f"{years[-1]}-12-31")] print(f"--- ETF panel: {etfs.shape[0]} days x {etfs.shape[1]} cols, " f"{etfs.index.min().date()} to {etfs.index.max().date()} ---") # 2) S&P 500 prices for stock-picking strategies — needs longer history # than data_manager's 10-year cache so that 252-day momentum warmup # completes before 2015. from universe import UNIVERSES universe = UNIVERSES["us"] tickers = universe["fetch"]() benchmark = universe["benchmark"] all_tickers = sorted(set(tickers + [benchmark])) stocks = load_long_stock_history(all_tickers, start="2013-06-01") stocks = stocks[(stocks.index >= "2013-06-01") & (stocks.index <= f"{years[-1]}-12-31")] member_cols = [c for c in stocks.columns if c in tickers] print(f"--- Stock panel: {stocks.shape[0]} days x {len(member_cols)} members ---") # 3) Build strategies and compute their daily return series series: dict[str, pd.Series] = {} for name, strat in [ ("PermanentOverlay", PermanentOverlay()), ("PermanentV4", PermanentV4()), ("TrendRiderV3-US", TrendRiderV3()), ("TrendRiderV3-Global", TrendRiderV3(risk_on=("TQQQ", "UPRO", "YINN", "CHAU"), risk_off=("GLD", "DBC"))), ("TrendRiderV3-HK", TrendRiderV3(risk_on=("7200.HK", "7500.HK"), risk_off=("GLD", "DBC"))), ]: print(f"\nRunning: {name}") w = strat.generate_signals(etfs) rets = daily_returns(w, etfs[w.columns]) series[name] = rets print("\nRunning: Recovery+Mom Top10") rec = RecoveryMomentumStrategy(top_n=10) w = rec.generate_signals(stocks[member_cols]) series["Recovery+Mom Top10"] = daily_returns(w, stocks[member_cols]) # Buy & hold SPY benchmark for context spy = etfs["SPY"] series["SPY Buy&Hold"] = spy.pct_change().fillna(0.0) # 4) Restrict every series to 2015-01-01 onward, common index per series for k, s in series.items(): series[k] = s[(s.index >= f"{years[0]}-01-01") & (s.index <= f"{years[-1]}-12-31")] # 5) Test 1 — annual reset t1 = pd.DataFrame({name: test1_annual_reset(s, years) for name, s in series.items()}) t1.index.name = "year" # 6) Test 2 — annual $10k contribution t2 = pd.DataFrame({name: test2_with_contributions(s, years) for name, s in series.items()}) t2.index.name = "year" # 7) Print reports pd.set_option("display.float_format", lambda x: f"{x:,.0f}") print("\n" + "=" * 78) print("TEST 1 — Each year starts at $10,000 (independent year-end value)") print("=" * 78) print(t1.to_string()) annual_ret = (t1 / 10_000.0 - 1.0) * 100 pd.set_option("display.float_format", lambda x: f"{x:+.2f}%") print("\nAnnual returns (%)") print(annual_ret.to_string()) avg = annual_ret.mean(axis=0) win_years = (annual_ret > 0).sum(axis=0) print("\nMean annual return / years up:") for c in annual_ret.columns: print(f" {c:22s} mean={avg[c]:+6.2f}% up_years={int(win_years[c])}/{len(years)}") pd.set_option("display.float_format", lambda x: f"{x:,.0f}") print("\n" + "=" * 78) print("TEST 2 — Start $10,000 in 2015, add $10,000 each subsequent year") print("=" * 78) print(t2.to_string()) total_in = pd.Series({y: 10_000 * (years.index(y) + 1) for y in years}, name="contributed") print("\nTotal $ contributed by year-end:") print(total_in.to_string()) # Total return on contributions, year-by-year print("\nMultiple of contributed capital:") pd.set_option("display.float_format", lambda x: f"{x:.2f}x") multiple = t2.div(total_in, axis=0) print(multiple.to_string()) # 8) Save CSVs os.makedirs("data", exist_ok=True) pd.set_option("display.float_format", None) t1.to_csv("data/permanent_yearly_test1_reset.csv") t2.to_csv("data/permanent_yearly_test2_contrib.csv") print("\nSaved: data/permanent_yearly_test1_reset.csv") print("Saved: data/permanent_yearly_test2_contrib.csv") if __name__ == "__main__": main()