From 95c02d7dd9c40d114d43f6ec5811461ec22ef59e Mon Sep 17 00:00:00 2001 From: Gahow Wang Date: Thu, 18 Jun 2026 09:06:05 +0800 Subject: [PATCH] Fig-18: chained driver for 2 extra naive runs (n=3 nondeterminism) A single naive run can luck into the TP4 optimum at iter 1 (gpt-5.4 free-form guess), which weakens the single-curve story. Run naive 2 more times on the same real-output substrate to capture the fail/slow/lucky spread -- the actual finding. Waits for ABLATION12_DONE so it never contends for GPUs with the main pair. Co-Authored-By: Claude Opus 4.8 --- scripts/run_naive_repeats_d1.sh | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 scripts/run_naive_repeats_d1.sh diff --git a/scripts/run_naive_repeats_d1.sh b/scripts/run_naive_repeats_d1.sh new file mode 100644 index 0000000..2ff94de --- /dev/null +++ b/scripts/run_naive_repeats_d1.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# Fig-18 naive nondeterminism: after the main pair (ABLATION12_DONE) finishes, run +# 2 more naive arms (runs 2 and 3) on the SAME substrate. The naive LLM (gpt-5.4, +# use_harness=false) is nondeterministic, so the run-to-run spread (fail / slow / +# lucky) is the result. Harness arm stays a single deterministic curve. Run from +# the repo root on dash1; survives disconnect via setsid/nohup at launch. +set -u +export OPENAI_API_KEY=$(python3 -c 'import json,pathlib;print(json.load(open(pathlib.Path.home()/".codex/auth.json"))["OPENAI_API_KEY"])') +export http_proxy= https_proxy= all_proxy= HTTP_PROXY= HTTPS_PROXY= ALL_PROXY= no_proxy='*' + +# Wait for the main harness+naive(run1) pair to complete so we never contend for GPUs. +echo "=== waiting for ABLATION12_DONE $(date -Is) ===" +while [ ! -f .aituner/ABLATION12_DONE ]; do sleep 120; done +echo "=== main pair done, starting naive repeats $(date -Is) ===" + +for r in 2 3; do + rm -rf ".aituner/abl12-naive${r}" ".aituner/abl12-naive${r}.log" + echo "=== naive run ${r} start $(date -Is) ===" + PYTHONPATH=src python3 -m aituner.cli study tune \ + --spec configs/examples/dash0_qwen27b_ablation_naive_off.json \ + --store-root ".aituner/abl12-naive${r}" --max-trials 12 --skip-baseline > ".aituner/abl12-naive${r}.log" 2>&1 + echo "=== naive run ${r} done $(date -Is) ===" +done + +touch .aituner/NAIVE_REPEATS_DONE +echo "=== all naive repeats done $(date -Is) ==="