#!/usr/bin/env bash # Clean same-policy harness-vs-naive ablation on dash1. # # This is intended as the first robustness gate for harness evaluation: # both arms use the same study substrate and the same configured LLM endpoint; # the only intended difference is llm.use_harness. set -euo pipefail RUN_LABEL="${AITUNER_RUN_ID:-qwen27b-clean-pair-$(date -u +%Y%m%dT%H%M%SZ)}" MAX_TRIALS="${MAX_TRIALS:-12}" ROOT="$(pwd)" HARNESS_STORE=".aituner/${RUN_LABEL}-harness" NAIVE_STORE=".aituner/${RUN_LABEL}-naive" REPORT_ROOT=".aituner-reports/${RUN_LABEL}" SPEC_PATH=".aituner-reports/${RUN_LABEL}.spec.json" read_key() { if [ -z "${OPENAI_API_KEY:-}" ]; then export OPENAI_API_KEY OPENAI_API_KEY="$(python3 -c 'import json,pathlib;print(json.load(open(pathlib.Path.home()/".codex/auth.json"))["OPENAI_API_KEY"])')" fi } export http_proxy= https_proxy= all_proxy= HTTP_PROXY= HTTPS_PROXY= ALL_PROXY= no_proxy='*' mkdir -p .aituner .aituner-reports rm -rf "${HARNESS_STORE}" "${NAIVE_STORE}" "${REPORT_ROOT}" "${SPEC_PATH}" read_key echo "=== harness ON clean pair start $(date -Is) label=${RUN_LABEL} ===" PYTHONPATH=src python3 -m aituner.cli study tune \ --spec configs/examples/dash0_qwen27b_ablation_harness_on.json \ --store-root "${HARNESS_STORE}" --max-trials "${MAX_TRIALS}" --skip-baseline \ > ".aituner/${RUN_LABEL}-harness.log" 2>&1 echo "=== harness ON clean pair done $(date -Is) ===" read_key echo "=== naive OFF clean pair start $(date -Is) label=${RUN_LABEL} ===" PYTHONPATH=src python3 -m aituner.cli study tune \ --spec configs/examples/dash0_qwen27b_ablation_naive_off.json \ --store-root "${NAIVE_STORE}" --max-trials "${MAX_TRIALS}" --skip-baseline \ > ".aituner/${RUN_LABEL}-naive.log" 2>&1 echo "=== naive OFF clean pair done $(date -Is) ===" python3 - <