Initial commit: quant backtesting framework with daily trading simulator

Backtesting engine supporting 11 strategies across US (S&P 500) and CN (CSI 300)
markets with open-to-close execution, proportional + fixed per-trade fees.

Daily trader (trader.py) with auto/morning/evening/simulate/status commands
and cron-friendly `auto` mode for unattended daily runs on a server.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-05 00:41:19 +08:00
commit 42218741d4
23 changed files with 3136 additions and 0 deletions

135
universe.py Normal file
View File

@@ -0,0 +1,135 @@
"""
Stock universe providers — fetch index constituents for backtesting.
Each function returns a dict with 'tickers' (list[str]) and 'benchmark' (str)
compatible with Yahoo Finance ticker format. Results are cached locally so
Wikipedia / web scrapes only happen once per day.
"""
import io
import json
import os
import urllib.request
from datetime import date
import pandas as pd
_HEADERS = {"User-Agent": "Mozilla/5.0 (quant-backtest)"}
def _fetch_html_tables(url: str) -> list[pd.DataFrame]:
"""Fetch HTML tables from a URL with a proper User-Agent."""
req = urllib.request.Request(url, headers=_HEADERS)
with urllib.request.urlopen(req) as resp:
html = resp.read().decode("utf-8")
return pd.read_html(io.StringIO(html))
CACHE_DIR = "data"
def _read_cache(name: str) -> list[str] | None:
path = os.path.join(CACHE_DIR, f"universe_{name}.json")
if not os.path.exists(path):
return None
try:
with open(path) as f:
data = json.load(f)
if data.get("date") == str(date.today()):
return data["tickers"]
except Exception:
pass
return None
def _write_cache(name: str, tickers: list[str]) -> None:
os.makedirs(CACHE_DIR, exist_ok=True)
path = os.path.join(CACHE_DIR, f"universe_{name}.json")
with open(path, "w") as f:
json.dump({"date": str(date.today()), "tickers": tickers}, f)
# ---------------------------------------------------------------------------
# S&P 500
# ---------------------------------------------------------------------------
def get_sp500() -> list[str]:
"""Fetch current S&P 500 constituents from Wikipedia."""
cached = _read_cache("sp500")
if cached:
print(f"--- Loaded S&P 500 universe from cache ({len(cached)} tickers) ---")
return cached
print("--- Fetching S&P 500 constituents from Wikipedia ---")
url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
tables = _fetch_html_tables(url)
df = tables[0]
tickers = sorted(df["Symbol"].str.replace(".", "-", regex=False).tolist())
_write_cache("sp500", tickers)
print(f"--- Got {len(tickers)} S&P 500 tickers ---")
return tickers
# ---------------------------------------------------------------------------
# CSI 300
# ---------------------------------------------------------------------------
# Yahoo Finance format: Shanghai → XXXXXX.SS, Shenzhen → XXXXXX.SZ
# Wikipedia CSI 300 page has a table with stock codes and exchanges.
def get_csi300() -> list[str]:
"""Fetch current CSI 300 constituents from Wikipedia."""
cached = _read_cache("csi300")
if cached:
print(f"--- Loaded CSI 300 universe from cache ({len(cached)} tickers) ---")
return cached
print("--- Fetching CSI 300 constituents from Wikipedia ---")
url = "https://en.wikipedia.org/wiki/CSI_300_Index"
tables = _fetch_html_tables(url)
# Find the constituent table — it has ~300 rows and a "Ticker" column
# with format like "SSE: 600519" or "SZSE: 300750"
df = None
for t in tables:
if len(t) >= 200 and "Ticker" in t.columns:
df = t
break
if df is None:
raise RuntimeError("Could not find CSI 300 constituent table on Wikipedia")
tickers = []
for _, row in df.iterrows():
raw = str(row["Ticker"]) # e.g. "SSE: 600519" or "SZSE: 300750"
parts = raw.split(":")
if len(parts) != 2:
continue
exchange_prefix = parts[0].strip().upper()
code = parts[1].strip()
if not code.isdigit():
continue
suffix = ".SS" if exchange_prefix == "SSE" else ".SZ"
tickers.append(code + suffix)
tickers = sorted(set(tickers))
_write_cache("csi300", tickers)
print(f"--- Got {len(tickers)} CSI 300 tickers ---")
return tickers
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
UNIVERSES = {
"us": {
"fetch": get_sp500,
"benchmark": "SPY",
"benchmark_label": "SPY (Benchmark)",
},
"cn": {
"fetch": get_csi300,
"benchmark": "000300.SS",
"benchmark_label": "CSI 300 (Benchmark)",
},
}