#!/usr/bin/env bash # Run a clean same-policy harness-vs-naive pair from one or two base specs. # # Required env: # RUN_LABEL # CASE_ID # HARNESS_BASE_SPEC # # Optional env: # NAIVE_BASE_SPEC defaults to HARNESS_BASE_SPEC # MAX_TRIALS defaults to 12 # CASE_DESCRIPTION # CASE_TAGS_JSON JSON list, defaults to [] # BUDGETS_JSON JSON list, defaults to [1,2,3,4,6,8,MAX_TRIALS] # COMMON_SPEC_PATCH_FILE JSON deep-merged into both generated specs # HARNESS_SPEC_PATCH_FILE JSON deep-merged into harness generated spec # NAIVE_SPEC_PATCH_FILE JSON deep-merged into naive generated spec set -euo pipefail RUN_LABEL="${RUN_LABEL:?RUN_LABEL is required}" CASE_ID="${CASE_ID:?CASE_ID is required}" HARNESS_BASE_SPEC="${HARNESS_BASE_SPEC:?HARNESS_BASE_SPEC is required}" NAIVE_BASE_SPEC="${NAIVE_BASE_SPEC:-${HARNESS_BASE_SPEC}}" MAX_TRIALS="${MAX_TRIALS:-12}" CASE_DESCRIPTION="${CASE_DESCRIPTION:-Clean same-policy harness-vs-naive pair.}" CASE_TAGS_JSON="${CASE_TAGS_JSON:-[]}" BUDGETS_JSON="${BUDGETS_JSON:-}" ROOT="$(pwd)" RUN_CONFIG_ROOT=".aituner-run-configs/${RUN_LABEL}" HARNESS_SPEC="${RUN_CONFIG_ROOT}/harness.json" NAIVE_SPEC="${RUN_CONFIG_ROOT}/naive.json" HARNESS_STORE=".aituner/${RUN_LABEL}-harness" NAIVE_STORE=".aituner/${RUN_LABEL}-naive" REPORT_ROOT=".aituner-reports/${RUN_LABEL}" REPORT_SPEC=".aituner-reports/${RUN_LABEL}.spec.json" export RUN_LABEL CASE_ID HARNESS_BASE_SPEC NAIVE_BASE_SPEC MAX_TRIALS CASE_DESCRIPTION export CASE_TAGS_JSON BUDGETS_JSON ROOT RUN_CONFIG_ROOT HARNESS_SPEC NAIVE_SPEC export HARNESS_STORE NAIVE_STORE REPORT_ROOT REPORT_SPEC read_key() { if [ -z "${OPENAI_API_KEY:-}" ]; then export OPENAI_API_KEY OPENAI_API_KEY="$(python3 -c 'import json,pathlib;print(json.load(open(pathlib.Path.home()/".codex/auth.json"))["OPENAI_API_KEY"])')" fi } export http_proxy= https_proxy= all_proxy= HTTP_PROXY= HTTPS_PROXY= ALL_PROXY= no_proxy='*' mkdir -p "${RUN_CONFIG_ROOT}" .aituner .aituner-reports rm -rf "${HARNESS_STORE}" "${NAIVE_STORE}" "${REPORT_ROOT}" "${REPORT_SPEC}" python3 - <<'PY' import json import os from pathlib import Path from typing import Any def deep_merge(base: dict[str, Any], patch: dict[str, Any]) -> dict[str, Any]: merged = dict(base) for key, value in patch.items(): if isinstance(value, dict) and isinstance(merged.get(key), dict): merged[key] = deep_merge(merged[key], value) else: merged[key] = value return merged def load_patch(env_name: str) -> dict[str, Any]: path = os.environ.get(env_name) if not path: return {} payload = json.loads(Path(path).read_text(encoding="utf-8")) if not isinstance(payload, dict): raise SystemExit(f"{env_name} must point to a JSON object") return payload def generated_spec(base_path: str, *, use_harness: bool, suffix: str, arm_patch: dict[str, Any]) -> dict[str, Any]: base = json.loads(Path(base_path).read_text(encoding="utf-8")) if not isinstance(base, dict): raise SystemExit(f"{base_path} must contain a JSON object") common = load_patch("COMMON_SPEC_PATCH_FILE") spec = deep_merge(base, common) spec = deep_merge(spec, arm_patch) spec["study_id"] = str(spec.get("study_id") or os.environ["CASE_ID"]) + f"-{suffix}" llm = dict(spec.get("llm") or {}) llm["use_harness"] = use_harness spec["llm"] = llm return spec run_config_root = Path(os.environ["RUN_CONFIG_ROOT"]) harness = generated_spec( os.environ["HARNESS_BASE_SPEC"], use_harness=True, suffix="harness", arm_patch=load_patch("HARNESS_SPEC_PATCH_FILE"), ) naive = generated_spec( os.environ["NAIVE_BASE_SPEC"], use_harness=False, suffix="naive", arm_patch=load_patch("NAIVE_SPEC_PATCH_FILE"), ) (run_config_root / "harness.json").write_text(json.dumps(harness, indent=2) + "\n", encoding="utf-8") (run_config_root / "naive.json").write_text(json.dumps(naive, indent=2) + "\n", encoding="utf-8") print(json.dumps({"harness_study_id": harness["study_id"], "naive_study_id": naive["study_id"]}, ensure_ascii=False)) PY read_key echo "=== harness clean pair start $(date -Is) label=${RUN_LABEL} ===" PYTHONPATH=src python3 -m aituner.cli study tune \ --spec "${HARNESS_SPEC}" \ --store-root "${HARNESS_STORE}" --max-trials "${MAX_TRIALS}" --skip-baseline \ > ".aituner/${RUN_LABEL}-harness.log" 2>&1 echo "=== harness clean pair done $(date -Is) ===" read_key echo "=== naive clean pair start $(date -Is) label=${RUN_LABEL} ===" PYTHONPATH=src python3 -m aituner.cli study tune \ --spec "${NAIVE_SPEC}" \ --store-root "${NAIVE_STORE}" --max-trials "${MAX_TRIALS}" --skip-baseline \ > ".aituner/${RUN_LABEL}-naive.log" 2>&1 echo "=== naive clean pair done $(date -Is) ===" python3 - <<'PY' import json import os from pathlib import Path root = Path(os.environ["ROOT"]) run_label = os.environ["RUN_LABEL"] harness = json.loads(Path(os.environ["HARNESS_SPEC"]).read_text(encoding="utf-8")) naive = json.loads(Path(os.environ["NAIVE_SPEC"]).read_text(encoding="utf-8")) max_trials = int(os.environ["MAX_TRIALS"]) budgets_text = os.environ.get("BUDGETS_JSON") or "" if budgets_text: budgets = json.loads(budgets_text) else: budgets = [1, 2, 3, 4, 6, 8, max_trials] budgets = sorted({int(item) for item in budgets if int(item) > 0}) tags = json.loads(os.environ.get("CASE_TAGS_JSON") or "[]") spec = { "report_id": run_label, "output_root": str(root / os.environ["REPORT_ROOT"]), "target_fraction": 0.95, "min_final_ratio": 0.98, "cases": [ { "case_id": os.environ["CASE_ID"], "description": os.environ["CASE_DESCRIPTION"], "tags": tags, "budgets": budgets, "arms": [ { "name": "harness", "kind": "harness", "study_root": str( root / os.environ["HARNESS_STORE"] / harness["study_id"] ), }, { "name": "naive", "kind": "naive", "study_root": str(root / os.environ["NAIVE_STORE"] / naive["study_id"]), }, ], } ], } Path(os.environ["REPORT_SPEC"]).write_text(json.dumps(spec, indent=2) + "\n", encoding="utf-8") PY PYTHONPATH=src python3 scripts/tuning_report.py --spec "${REPORT_SPEC}" touch ".aituner/${RUN_LABEL}.DONE" echo "=== clean pair report ready ${REPORT_ROOT} $(date -Is) ==="