Add weekly strategy report comparing top strategies to baselines
This commit is contained in:
309
weekly_strategy_report.py
Normal file
309
weekly_strategy_report.py
Normal file
@@ -0,0 +1,309 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Weekly top-strategy report against market baselines."""
|
||||
|
||||
import argparse
|
||||
import glob
|
||||
import json
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from datetime import timedelta
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlencode
|
||||
from urllib.request import Request, urlopen
|
||||
|
||||
import pandas as pd
|
||||
import yfinance as yf
|
||||
|
||||
import data_manager
|
||||
|
||||
INITIAL_VALUE = 10_000.0
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Baseline:
|
||||
label: str
|
||||
yahoo_symbol: str | None = None
|
||||
cache_symbol: str | None = None
|
||||
sohu_code: str | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class StrategyEquity:
|
||||
name: str
|
||||
series: pd.Series
|
||||
total_return: float
|
||||
|
||||
|
||||
BASELINES = {
|
||||
"us": [
|
||||
Baseline("NASDAQ Composite", yahoo_symbol="^IXIC"),
|
||||
Baseline("SPY", yahoo_symbol="SPY", cache_symbol="SPY"),
|
||||
],
|
||||
"cn": [
|
||||
Baseline("CSI 300", yahoo_symbol="000300.SS", cache_symbol="000300.SS", sohu_code="zs_000300"),
|
||||
Baseline("CSI 800", yahoo_symbol="000906.SS", sohu_code="zs_000906"),
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def _state_name(path: str, market: str) -> str:
|
||||
base = Path(path).name
|
||||
prefix = f"trader_{market}_"
|
||||
return base[len(prefix):-len(".json")]
|
||||
|
||||
|
||||
def load_strategies(market: str, include_sim: bool) -> list[StrategyEquity]:
|
||||
rows: list[StrategyEquity] = []
|
||||
for path in sorted(glob.glob(f"data/trader_{market}_*.json")):
|
||||
name = _state_name(path, market)
|
||||
if name.startswith("sim_") and not include_sim:
|
||||
continue
|
||||
|
||||
state = json.loads(Path(path).read_text())
|
||||
daily_equity = state.get("daily_equity", {}) or {}
|
||||
if len(daily_equity) < 2:
|
||||
continue
|
||||
|
||||
series = pd.Series(daily_equity, dtype=float)
|
||||
series.index = pd.to_datetime(series.index)
|
||||
series = series.sort_index()
|
||||
|
||||
initial = float(state.get("initial_capital") or series.iloc[0] or INITIAL_VALUE)
|
||||
total_return = series.iloc[-1] / initial - 1.0 if initial else 0.0
|
||||
rows.append(StrategyEquity(name=name, series=series, total_return=total_return))
|
||||
|
||||
rows.sort(key=lambda row: row.total_return, reverse=True)
|
||||
return rows
|
||||
|
||||
|
||||
def _close_from_yahoo_frame(raw: pd.DataFrame, symbol: str) -> pd.Series:
|
||||
if raw.empty:
|
||||
return pd.Series(dtype=float)
|
||||
|
||||
if isinstance(raw.columns, pd.MultiIndex):
|
||||
if "Close" not in raw.columns.get_level_values(0):
|
||||
return pd.Series(dtype=float)
|
||||
close = raw["Close"]
|
||||
if isinstance(close, pd.DataFrame):
|
||||
if symbol in close.columns:
|
||||
return close[symbol].dropna().astype(float)
|
||||
if len(close.columns) == 1:
|
||||
return close.iloc[:, 0].dropna().astype(float)
|
||||
return close.dropna().astype(float)
|
||||
|
||||
if "Close" not in raw.columns:
|
||||
return pd.Series(dtype=float)
|
||||
return raw["Close"].dropna().astype(float)
|
||||
|
||||
|
||||
def download_yahoo_close(symbol: str, start: pd.Timestamp, end: pd.Timestamp) -> pd.Series:
|
||||
end_exclusive = (end + timedelta(days=1)).strftime("%Y-%m-%d")
|
||||
raw = yf.download(
|
||||
symbol,
|
||||
start=start.strftime("%Y-%m-%d"),
|
||||
end=end_exclusive,
|
||||
auto_adjust=True,
|
||||
progress=False,
|
||||
)
|
||||
series = _close_from_yahoo_frame(raw, symbol)
|
||||
series.index = pd.to_datetime(series.index).tz_localize(None)
|
||||
return series
|
||||
|
||||
|
||||
def download_sohu_close(code: str, start: pd.Timestamp, end: pd.Timestamp) -> pd.Series:
|
||||
params = {
|
||||
"code": code,
|
||||
"start": start.strftime("%Y%m%d"),
|
||||
"end": end.strftime("%Y%m%d"),
|
||||
"stat": "1",
|
||||
"order": "D",
|
||||
"period": "d",
|
||||
"callback": "historySearchHandler",
|
||||
"rt": "jsonp",
|
||||
}
|
||||
url = "https://q.stock.sohu.com/hisHq?" + urlencode(params)
|
||||
req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
|
||||
text = ""
|
||||
for attempt in range(3):
|
||||
try:
|
||||
with urlopen(req, timeout=20) as resp:
|
||||
text = resp.read().decode("gbk").strip()
|
||||
break
|
||||
except Exception:
|
||||
if attempt == 2:
|
||||
return pd.Series(dtype=float)
|
||||
time.sleep(1)
|
||||
|
||||
prefix = "historySearchHandler("
|
||||
if not text.startswith(prefix) or not text.endswith(")"):
|
||||
return pd.Series(dtype=float)
|
||||
|
||||
payload = json.loads(text[len(prefix):-1])
|
||||
if not payload or payload[0].get("status") != 0:
|
||||
return pd.Series(dtype=float)
|
||||
|
||||
rows = payload[0].get("hq", [])
|
||||
if not rows:
|
||||
return pd.Series(dtype=float)
|
||||
|
||||
data = {row[0]: float(row[2]) for row in rows if len(row) >= 3}
|
||||
series = pd.Series(data, dtype=float)
|
||||
series.index = pd.to_datetime(series.index)
|
||||
return series.sort_index()
|
||||
|
||||
|
||||
def clip_dates(series: pd.Series, start: pd.Timestamp, end: pd.Timestamp) -> pd.Series:
|
||||
if series.empty:
|
||||
return series
|
||||
clipped = series.copy()
|
||||
clipped.index = pd.to_datetime(clipped.index).tz_localize(None)
|
||||
return clipped.loc[(clipped.index >= start) & (clipped.index <= end)].dropna()
|
||||
|
||||
|
||||
def load_baseline(market: str, baseline: Baseline, start: pd.Timestamp, end: pd.Timestamp) -> pd.Series:
|
||||
if baseline.sohu_code:
|
||||
series = download_sohu_close(baseline.sohu_code, start, end)
|
||||
else:
|
||||
series = pd.Series(dtype=float)
|
||||
|
||||
series = clip_dates(series, start, end)
|
||||
if len(series) < 2:
|
||||
cached = data_manager.load(market)
|
||||
if cached is not None and baseline.cache_symbol in cached.columns:
|
||||
series = cached[baseline.cache_symbol].dropna().astype(float)
|
||||
else:
|
||||
series = pd.Series(dtype=float)
|
||||
|
||||
series = clip_dates(series, start, end)
|
||||
if len(series) < 2 and baseline.yahoo_symbol:
|
||||
series = download_yahoo_close(baseline.yahoo_symbol, start, end)
|
||||
|
||||
series = clip_dates(series, start, end)
|
||||
if len(series) < 2 and baseline.sohu_code:
|
||||
series = download_sohu_close(baseline.sohu_code, start, end)
|
||||
|
||||
series = clip_dates(series, start, end)
|
||||
if series.empty:
|
||||
return series
|
||||
return series / series.iloc[0] * INITIAL_VALUE
|
||||
|
||||
|
||||
def select_period(strategies: list[StrategyEquity], start: str | None, end: str | None) -> tuple[pd.Timestamp, pd.Timestamp]:
|
||||
if start:
|
||||
start_ts = pd.Timestamp(start)
|
||||
else:
|
||||
start_ts = max(row.series.index.min() for row in strategies)
|
||||
|
||||
if end:
|
||||
end_ts = pd.Timestamp(end)
|
||||
else:
|
||||
end_ts = min(row.series.index.max() for row in strategies)
|
||||
|
||||
if start_ts > end_ts:
|
||||
raise ValueError(f"Invalid period: start {start_ts.date()} is after end {end_ts.date()}")
|
||||
return start_ts, end_ts
|
||||
|
||||
|
||||
def weekly_last(frame: pd.DataFrame) -> pd.DataFrame:
|
||||
rows = []
|
||||
for _, group in frame.groupby(pd.Grouper(freq="W-FRI")):
|
||||
group = group.dropna(how="all")
|
||||
if group.empty:
|
||||
continue
|
||||
last = group.iloc[-1].copy()
|
||||
last.name = group.index[-1].strftime("%Y-%m-%d")
|
||||
rows.append(last)
|
||||
if not rows:
|
||||
return pd.DataFrame(columns=frame.columns)
|
||||
result = pd.DataFrame(rows)
|
||||
result.index.name = "week_date"
|
||||
return result
|
||||
|
||||
|
||||
def build_market_report(
|
||||
market: str,
|
||||
top: int,
|
||||
include_sim: bool,
|
||||
start: str | None,
|
||||
end: str | None,
|
||||
) -> tuple[pd.DataFrame, pd.DataFrame, list[str]]:
|
||||
strategies = load_strategies(market, include_sim=include_sim)
|
||||
if not strategies:
|
||||
raise RuntimeError(f"No strategy equity data found for market '{market}'")
|
||||
|
||||
selected = strategies[:top]
|
||||
start_ts, end_ts = select_period(selected, start, end)
|
||||
|
||||
frame = pd.DataFrame({
|
||||
row.name: row.series.loc[(row.series.index >= start_ts) & (row.series.index <= end_ts)]
|
||||
for row in selected
|
||||
})
|
||||
|
||||
warnings = []
|
||||
for baseline in BASELINES[market]:
|
||||
series = load_baseline(market, baseline, start_ts, end_ts)
|
||||
if series.empty:
|
||||
warnings.append(f"{market.upper()} baseline '{baseline.label}' has no data for {start_ts.date()} to {end_ts.date()}")
|
||||
continue
|
||||
frame[baseline.label] = series
|
||||
|
||||
weekly_values = weekly_last(frame)
|
||||
weekly_returns = (weekly_values / INITIAL_VALUE - 1.0) * 100.0
|
||||
return weekly_values, weekly_returns, warnings
|
||||
|
||||
|
||||
def write_outputs(market: str, values: pd.DataFrame, returns: pd.DataFrame, output_dir: Path) -> tuple[Path, Path]:
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
value_path = output_dir / f"weekly_{market}_top10_vs_baselines.csv"
|
||||
return_path = output_dir / f"weekly_{market}_top10_vs_baselines_returns.csv"
|
||||
values.round(2).to_csv(value_path)
|
||||
returns.round(4).to_csv(return_path)
|
||||
return value_path, return_path
|
||||
|
||||
|
||||
def print_returns(market: str, returns: pd.DataFrame) -> None:
|
||||
print(f"\n{market.upper()} weekly return %")
|
||||
if returns.empty:
|
||||
print(" No weekly rows.")
|
||||
return
|
||||
printable = returns.copy()
|
||||
printable = printable.map(lambda value: "" if pd.isna(value) else f"{value:+.2f}%")
|
||||
print(printable.to_string())
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Compare each market's top10 live strategies with weekly baseline data."
|
||||
)
|
||||
parser.add_argument("--market", choices=["all", "us", "cn"], default="all")
|
||||
parser.add_argument("--top", type=int, default=10)
|
||||
parser.add_argument("--start", help="Optional start date, YYYY-MM-DD")
|
||||
parser.add_argument("--end", help="Optional end date, YYYY-MM-DD")
|
||||
parser.add_argument("--include-sim", action="store_true")
|
||||
parser.add_argument("--output-dir", default="data")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = parse_args()
|
||||
markets = ["us", "cn"] if args.market == "all" else [args.market]
|
||||
output_dir = Path(args.output_dir)
|
||||
|
||||
for market in markets:
|
||||
values, returns, warnings = build_market_report(
|
||||
market=market,
|
||||
top=args.top,
|
||||
include_sim=args.include_sim,
|
||||
start=args.start,
|
||||
end=args.end,
|
||||
)
|
||||
value_path, return_path = write_outputs(market, values, returns, output_dir)
|
||||
print_returns(market, returns)
|
||||
print(f" values: {value_path}")
|
||||
print(f" returns: {return_path}")
|
||||
for warning in warnings:
|
||||
print(f" warning: {warning}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user