Add Stop-B authority: deterministic validator overrides LLM stop

Phase 4 of the two-stop work. The harness already pre-empts the LLM with deterministic stops and guided probes, but an LLM-originated should_stop could still end the loop while the validator saw remaining opportunity. Add harness._stop_authority, exposed as context["stop_authority"], whose `authorized` mirrors the deterministic harness stop decision and whose `opportunity_remains` flags an open topology frontier or a high-value planned candidate. In study tune, an LLM-originated should_stop is now honored only when the validator authorizes it; an unauthorized stop is vetoed (bounded budget) so the loop cannot converge prematurely on the agent's say-so. File- and harness-originated stops are unaffected, and the stop reason chain is recorded. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-15 14:45:14 +08:00
parent 51a9e4a007
commit a8f903498d
3 changed files with 150 additions and 3 deletions
--- a/tests/test_core_flow.py
+++ b/tests/test_core_flow.py
@@ -1,6 +1,7 @@
 from __future__ import annotations

 import json
+import contextlib
 import io
 import math
 import os
@@ -418,6 +419,23 @@ class CoreFlowTests(unittest.TestCase):
        self.assertTrue(early)
        self.assertTrue(any(c["family_similarity"]["C"] < 0.9 for c in early))

+    def test_stop_authority_mirrors_validator_and_blocks_fresh_stop(self) -> None:
+        with tempfile.TemporaryDirectory() as tmp:
+            study = load_study_spec(_write_study_assets(Path(tmp)))
+            state = StudyState(study_id=study.study_id, trials=[])
+            context = build_harness_context(
+                study=study,
+                window_summary={},
+                state=state,
+            )
+            authority = context["stop_authority"]
+            # The authority is the deterministic validator; with no completed
+            # trials it must not authorize a stop.
+            self.assertEqual(
+                authority["authorized"], context["harness_stop"]["should_stop"]
+            )
+            self.assertFalse(authority["authorized"])
+
    def test_adaptive_replay_set_truncates_only_when_enabled(self) -> None:
        from types import SimpleNamespace

@@ -3956,6 +3974,56 @@ class CoreFlowTests(unittest.TestCase):
            state = store.load_state("study-1")
            self.assertEqual(state.next_trial_index, 1)

+    def test_cli_tune_vetoes_unauthorized_llm_stop(self) -> None:
+        with tempfile.TemporaryDirectory() as tmp:
+            tmp_path = Path(tmp)
+            study_path = _write_study_assets(tmp_path)
+            spec = json.loads(study_path.read_text(encoding="utf-8"))
+            spec["llm"]["endpoint"] = {
+                "provider": "custom",
+                "base_url": "http://localhost:9/v1",
+                "model": "test-model",
+                "api_key_env": "AITUNER_TEST_KEY",
+            }
+            study_path.write_text(json.dumps(spec), encoding="utf-8")
+            store_root = tmp_path / "store"
+            stop_payload = json.dumps(
+                {
+                    "observation": "looks done",
+                    "diagnosis": "agent thinks it converged",
+                    "config_patch": {"env_patch": {}, "flag_patch": {}},
+                    "expected_effects": ["stop"],
+                    "why_not_previous_failures": "n/a",
+                    "should_stop": True,
+                }
+            )
+            buffer = io.StringIO()
+            with mock.patch("aituner.cli.run_trial") as run_trial_mock, mock.patch(
+                "aituner.cli.call_llm_for_proposal", return_value=stop_payload
+            ), contextlib.redirect_stdout(buffer):
+                exit_code = cli_main(
+                    [
+                        "study",
+                        "tune",
+                        "--spec",
+                        str(study_path),
+                        "--store-root",
+                        str(store_root),
+                        "--skip-baseline",
+                        "--max-trials",
+                        "2",
+                    ]
+                )
+            self.assertEqual(exit_code, 0)
+            run_trial_mock.assert_not_called()
+            executed = json.loads(buffer.getvalue())["executed_trials"]
+            # The first unauthorized LLM stop is vetoed; the second is honored
+            # only after the veto budget is spent.
+            self.assertTrue(any(item.get("stop_vetoed") for item in executed))
+            honored = [item for item in executed if item.get("stopped")]
+            self.assertTrue(honored)
+            self.assertEqual(honored[-1]["stop_authorized_by"], "llm_after_veto_budget")
+
    def test_cli_tune_uses_harness_stop_before_llm(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            tmp_path = Path(tmp)