Add Stop-B authority: deterministic validator overrides LLM stop
Phase 4 of the two-stop work. The harness already pre-empts the LLM with deterministic stops and guided probes, but an LLM-originated should_stop could still end the loop while the validator saw remaining opportunity. Add harness._stop_authority, exposed as context["stop_authority"], whose `authorized` mirrors the deterministic harness stop decision and whose `opportunity_remains` flags an open topology frontier or a high-value planned candidate. In study tune, an LLM-originated should_stop is now honored only when the validator authorizes it; an unauthorized stop is vetoed (bounded budget) so the loop cannot converge prematurely on the agent's say-so. File- and harness-originated stops are unaffected, and the stop reason chain is recorded. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import contextlib
|
||||
import io
|
||||
import math
|
||||
import os
|
||||
@@ -418,6 +419,23 @@ class CoreFlowTests(unittest.TestCase):
|
||||
self.assertTrue(early)
|
||||
self.assertTrue(any(c["family_similarity"]["C"] < 0.9 for c in early))
|
||||
|
||||
def test_stop_authority_mirrors_validator_and_blocks_fresh_stop(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
study = load_study_spec(_write_study_assets(Path(tmp)))
|
||||
state = StudyState(study_id=study.study_id, trials=[])
|
||||
context = build_harness_context(
|
||||
study=study,
|
||||
window_summary={},
|
||||
state=state,
|
||||
)
|
||||
authority = context["stop_authority"]
|
||||
# The authority is the deterministic validator; with no completed
|
||||
# trials it must not authorize a stop.
|
||||
self.assertEqual(
|
||||
authority["authorized"], context["harness_stop"]["should_stop"]
|
||||
)
|
||||
self.assertFalse(authority["authorized"])
|
||||
|
||||
def test_adaptive_replay_set_truncates_only_when_enabled(self) -> None:
|
||||
from types import SimpleNamespace
|
||||
|
||||
@@ -3956,6 +3974,56 @@ class CoreFlowTests(unittest.TestCase):
|
||||
state = store.load_state("study-1")
|
||||
self.assertEqual(state.next_trial_index, 1)
|
||||
|
||||
def test_cli_tune_vetoes_unauthorized_llm_stop(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
study_path = _write_study_assets(tmp_path)
|
||||
spec = json.loads(study_path.read_text(encoding="utf-8"))
|
||||
spec["llm"]["endpoint"] = {
|
||||
"provider": "custom",
|
||||
"base_url": "http://localhost:9/v1",
|
||||
"model": "test-model",
|
||||
"api_key_env": "AITUNER_TEST_KEY",
|
||||
}
|
||||
study_path.write_text(json.dumps(spec), encoding="utf-8")
|
||||
store_root = tmp_path / "store"
|
||||
stop_payload = json.dumps(
|
||||
{
|
||||
"observation": "looks done",
|
||||
"diagnosis": "agent thinks it converged",
|
||||
"config_patch": {"env_patch": {}, "flag_patch": {}},
|
||||
"expected_effects": ["stop"],
|
||||
"why_not_previous_failures": "n/a",
|
||||
"should_stop": True,
|
||||
}
|
||||
)
|
||||
buffer = io.StringIO()
|
||||
with mock.patch("aituner.cli.run_trial") as run_trial_mock, mock.patch(
|
||||
"aituner.cli.call_llm_for_proposal", return_value=stop_payload
|
||||
), contextlib.redirect_stdout(buffer):
|
||||
exit_code = cli_main(
|
||||
[
|
||||
"study",
|
||||
"tune",
|
||||
"--spec",
|
||||
str(study_path),
|
||||
"--store-root",
|
||||
str(store_root),
|
||||
"--skip-baseline",
|
||||
"--max-trials",
|
||||
"2",
|
||||
]
|
||||
)
|
||||
self.assertEqual(exit_code, 0)
|
||||
run_trial_mock.assert_not_called()
|
||||
executed = json.loads(buffer.getvalue())["executed_trials"]
|
||||
# The first unauthorized LLM stop is vetoed; the second is honored
|
||||
# only after the veto budget is spent.
|
||||
self.assertTrue(any(item.get("stop_vetoed") for item in executed))
|
||||
honored = [item for item in executed if item.get("stopped")]
|
||||
self.assertTrue(honored)
|
||||
self.assertEqual(honored[-1]["stop_authorized_by"], "llm_after_veto_budget")
|
||||
|
||||
def test_cli_tune_uses_harness_stop_before_llm(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
|
||||
Reference in New Issue
Block a user