"""Pre-fetch quality-benchmark datasets into local JSON. Run this on a machine WITH network (e.g. your laptop). The resulting tools/bench/data/*.json files are then shipped to the GPU host (which has no network) by the bench sync step. Usage: python3 -m tools.bench.fetch_datasets # all tasks python3 -m tools.bench.fetch_datasets aime2025 # one task """ from __future__ import annotations import os import sys if __package__ in (None, ""): sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) from tools.bench.tasks import aime, gsm8k, save_local FETCHERS = { "aime2025": aime.load_remote, "gsm8k": gsm8k.load_remote, } def main() -> None: wanted = sys.argv[1:] or list(FETCHERS) for name in wanted: if name not in FETCHERS: raise SystemExit(f"unknown task: {name} (have: {', '.join(FETCHERS)})") print(f"[fetch] {name} ...") records = FETCHERS[name]() path = save_local(name, records) print(f"[fetch] {name}: {len(records)} records -> {path}") if __name__ == "__main__": main()