Add Stop-B authority: deterministic validator overrides LLM stop

Phase 4 of the two-stop work. The harness already pre-empts the LLM with
deterministic stops and guided probes, but an LLM-originated should_stop could
still end the loop while the validator saw remaining opportunity.

Add harness._stop_authority, exposed as context["stop_authority"], whose
`authorized` mirrors the deterministic harness stop decision and whose
`opportunity_remains` flags an open topology frontier or a high-value planned
candidate. In study tune, an LLM-originated should_stop is now honored only when
the validator authorizes it; an unauthorized stop is vetoed (bounded budget) so
the loop cannot converge prematurely on the agent's say-so. File- and
harness-originated stops are unaffected, and the stop reason chain is recorded.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-15 14:45:14 +08:00
parent 51a9e4a007
commit a8f903498d
3 changed files with 150 additions and 3 deletions

View File

@@ -1,6 +1,7 @@
from __future__ import annotations
import json
import contextlib
import io
import math
import os
@@ -418,6 +419,23 @@ class CoreFlowTests(unittest.TestCase):
self.assertTrue(early)
self.assertTrue(any(c["family_similarity"]["C"] < 0.9 for c in early))
def test_stop_authority_mirrors_validator_and_blocks_fresh_stop(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
study = load_study_spec(_write_study_assets(Path(tmp)))
state = StudyState(study_id=study.study_id, trials=[])
context = build_harness_context(
study=study,
window_summary={},
state=state,
)
authority = context["stop_authority"]
# The authority is the deterministic validator; with no completed
# trials it must not authorize a stop.
self.assertEqual(
authority["authorized"], context["harness_stop"]["should_stop"]
)
self.assertFalse(authority["authorized"])
def test_adaptive_replay_set_truncates_only_when_enabled(self) -> None:
from types import SimpleNamespace
@@ -3956,6 +3974,56 @@ class CoreFlowTests(unittest.TestCase):
state = store.load_state("study-1")
self.assertEqual(state.next_trial_index, 1)
def test_cli_tune_vetoes_unauthorized_llm_stop(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
study_path = _write_study_assets(tmp_path)
spec = json.loads(study_path.read_text(encoding="utf-8"))
spec["llm"]["endpoint"] = {
"provider": "custom",
"base_url": "http://localhost:9/v1",
"model": "test-model",
"api_key_env": "AITUNER_TEST_KEY",
}
study_path.write_text(json.dumps(spec), encoding="utf-8")
store_root = tmp_path / "store"
stop_payload = json.dumps(
{
"observation": "looks done",
"diagnosis": "agent thinks it converged",
"config_patch": {"env_patch": {}, "flag_patch": {}},
"expected_effects": ["stop"],
"why_not_previous_failures": "n/a",
"should_stop": True,
}
)
buffer = io.StringIO()
with mock.patch("aituner.cli.run_trial") as run_trial_mock, mock.patch(
"aituner.cli.call_llm_for_proposal", return_value=stop_payload
), contextlib.redirect_stdout(buffer):
exit_code = cli_main(
[
"study",
"tune",
"--spec",
str(study_path),
"--store-root",
str(store_root),
"--skip-baseline",
"--max-trials",
"2",
]
)
self.assertEqual(exit_code, 0)
run_trial_mock.assert_not_called()
executed = json.loads(buffer.getvalue())["executed_trials"]
# The first unauthorized LLM stop is vetoed; the second is honored
# only after the veto budget is spent.
self.assertTrue(any(item.get("stop_vetoed") for item in executed))
honored = [item for item in executed if item.get("stopped")]
self.assertTrue(honored)
self.assertEqual(honored[-1]["stop_authorized_by"], "llm_after_veto_budget")
def test_cli_tune_uses_harness_stop_before_llm(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)