"""Shared helpers for quality tasks. Each task can be backed by a pre-fetched local JSON file (so the GPU host doesn't need network). The JSON is a list of records: [{"id": str, "problem": str, "answer": str, "source": str}, ...] Use tools/bench/fetch_datasets.py on a networked machine to produce these files, then ship them to the GPU host (the bench sync does this automatically). """ from __future__ import annotations import json import os from typing import Any def data_dir() -> str: """Directory holding pre-fetched dataset JSON. Override via BENCH_DATA_DIR.""" return os.environ.get( "BENCH_DATA_DIR", os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "data"), ) def local_json_path(task_name: str) -> str: return os.path.normpath(os.path.join(data_dir(), f"{task_name}.json")) def load_local(task_name: str) -> list[dict[str, Any]] | None: """Return records from the local JSON file if present, else None.""" path = local_json_path(task_name) if not os.path.isfile(path): return None with open(path) as f: records = json.load(f) print(f"[tasks] loaded {len(records)} records from {path}") return records def save_local(task_name: str, records: list[dict[str, Any]]) -> str: path = local_json_path(task_name) os.makedirs(os.path.dirname(path), exist_ok=True) with open(path, "w") as f: json.dump(records, f, ensure_ascii=False, indent=1) return path