Add Stop-B authority: deterministic validator overrides LLM stop
Phase 4 of the two-stop work. The harness already pre-empts the LLM with deterministic stops and guided probes, but an LLM-originated should_stop could still end the loop while the validator saw remaining opportunity. Add harness._stop_authority, exposed as context["stop_authority"], whose `authorized` mirrors the deterministic harness stop decision and whose `opportunity_remains` flags an open topology frontier or a high-value planned candidate. In study tune, an LLM-originated should_stop is now honored only when the validator authorizes it; an unauthorized stop is vetoed (bounded budget) so the loop cannot converge prematurely on the agent's say-so. File- and harness-originated stops are unaffected, and the stop reason chain is recorded. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -226,6 +226,8 @@ def cmd_study_tune(args: argparse.Namespace) -> int:
|
||||
if proposal_files and max_trials > len(proposal_files):
|
||||
max_trials = len(proposal_files)
|
||||
executed: list[dict[str, object]] = []
|
||||
stop_vetoes = 0
|
||||
max_llm_stop_vetoes = 1
|
||||
for idx in range(max_trials):
|
||||
state = store.load_state(study.study_id)
|
||||
if state.tuning_stop_reason:
|
||||
@@ -334,7 +336,34 @@ def cmd_study_tune(args: argparse.Namespace) -> int:
|
||||
proposal = parse_proposal_text(proposal_text, study)
|
||||
store.write_proposal(study.study_id, proposal_name, proposal)
|
||||
if proposal.should_stop:
|
||||
if proposal_name.startswith("harness-stop-"):
|
||||
is_harness_stop = proposal_name.startswith("harness-stop-")
|
||||
is_llm_stop = not is_harness_stop and proposal_source is None
|
||||
stop_authority = (
|
||||
harness_context.get("stop_authority")
|
||||
if isinstance(harness_context, dict)
|
||||
else None
|
||||
)
|
||||
authorized = stop_authority is None or bool(stop_authority.get("authorized"))
|
||||
# Stop-B authority: the deterministic validator overrides an
|
||||
# LLM-originated stop. Veto an unauthorized stop (bounded) so the
|
||||
# loop does not converge prematurely on the agent's say-so alone.
|
||||
if is_llm_stop and not authorized and stop_vetoes < max_llm_stop_vetoes:
|
||||
stop_vetoes += 1
|
||||
executed.append(
|
||||
{
|
||||
"trial_id": None,
|
||||
"proposal_name": proposal_name,
|
||||
"proposal_source": "llm",
|
||||
"stop_vetoed": True,
|
||||
"reason": "validator_did_not_authorize_stop",
|
||||
"validator_reason": (
|
||||
stop_authority.get("reason") if stop_authority else None
|
||||
),
|
||||
"diagnosis": proposal.diagnosis,
|
||||
}
|
||||
)
|
||||
continue
|
||||
if is_harness_stop:
|
||||
proposal_source_label = "harness"
|
||||
else:
|
||||
proposal_source_label = str(proposal_source) if proposal_source else "llm"
|
||||
@@ -344,6 +373,11 @@ def cmd_study_tune(args: argparse.Namespace) -> int:
|
||||
"proposal_name": proposal_name,
|
||||
"proposal_source": proposal_source_label,
|
||||
"stopped": True,
|
||||
"stop_authorized_by": (
|
||||
"validator"
|
||||
if (is_harness_stop or authorized)
|
||||
else "llm_after_veto_budget"
|
||||
),
|
||||
"diagnosis": proposal.diagnosis,
|
||||
"state_best_trial_id": state.best_trial_id,
|
||||
"state_best_request_rate": state.best_request_rate,
|
||||
|
||||
@@ -48,6 +48,12 @@ def build_harness_context(
|
||||
trial_profiles,
|
||||
bottleneck_hypotheses,
|
||||
)
|
||||
harness_stop = _harness_stop_decision(
|
||||
study,
|
||||
state,
|
||||
recent_diagnostics,
|
||||
experiment_plan=experiment_plan,
|
||||
)
|
||||
return {
|
||||
"paper_alignment": {
|
||||
"goal": "Use workload-feature-to-knob harnesses to reduce wasted trials and avoid regressing after a good configuration is found.",
|
||||
@@ -61,11 +67,13 @@ def build_harness_context(
|
||||
"candidate_actions": experiment_plan["candidate_actions"],
|
||||
"experiment_plan": experiment_plan,
|
||||
"convergence_guard": _convergence_guard(state, recent_diagnostics),
|
||||
"harness_stop": _harness_stop_decision(
|
||||
"harness_stop": harness_stop,
|
||||
"stop_authority": _stop_authority(
|
||||
study,
|
||||
state,
|
||||
recent_diagnostics,
|
||||
experiment_plan=experiment_plan,
|
||||
experiment_plan,
|
||||
harness_stop,
|
||||
),
|
||||
"harness_proposal": _harness_proposal_decision(
|
||||
study,
|
||||
@@ -808,6 +816,43 @@ def _harness_stop_decision(
|
||||
}
|
||||
|
||||
|
||||
def _stop_authority(
|
||||
study: StudySpec,
|
||||
state: StudyState,
|
||||
recent_diagnostics: list[dict[str, Any]],
|
||||
experiment_plan: dict[str, Any] | None,
|
||||
harness_stop: dict[str, Any],
|
||||
) -> dict[str, Any]:
|
||||
"""Stop-B authority: the deterministic validator decides if stopping is justified.
|
||||
|
||||
``authorized`` mirrors the deterministic harness stop decision. The LLM's
|
||||
should_stop is only a corroborating signal: the tuning loop honors an
|
||||
LLM-originated stop only when this validator authorizes it (or when the
|
||||
harness is disabled). ``opportunity_remains`` flags that a concrete adjacent
|
||||
probe (open topology frontier or a high-value planned candidate) still exists,
|
||||
so an early stop would leave measured headroom on the table.
|
||||
"""
|
||||
frontier = _topology_frontier_status(study, state, recent_diagnostics)
|
||||
next_action = (
|
||||
experiment_plan.get("next_action") if isinstance(experiment_plan, dict) else None
|
||||
)
|
||||
has_candidate = (
|
||||
isinstance(next_action, dict) and _as_float(next_action.get("score")) >= 0.35
|
||||
)
|
||||
opportunity_remains = bool(frontier.get("frontier_open")) or has_candidate
|
||||
authorized = bool(harness_stop.get("should_stop"))
|
||||
return {
|
||||
"authorized": authorized,
|
||||
"reason": harness_stop.get("reason"),
|
||||
"opportunity_remains": opportunity_remains,
|
||||
"summary": (
|
||||
"Deterministic validator authorizes stop; no adjacent bottleneck probe remains."
|
||||
if authorized
|
||||
else "Validator does not authorize stop; LLM should_stop is advisory only."
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def _harness_proposal_decision(
|
||||
study: StudySpec,
|
||||
window_summary: dict[str, Any],
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import contextlib
|
||||
import io
|
||||
import math
|
||||
import os
|
||||
@@ -418,6 +419,23 @@ class CoreFlowTests(unittest.TestCase):
|
||||
self.assertTrue(early)
|
||||
self.assertTrue(any(c["family_similarity"]["C"] < 0.9 for c in early))
|
||||
|
||||
def test_stop_authority_mirrors_validator_and_blocks_fresh_stop(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
study = load_study_spec(_write_study_assets(Path(tmp)))
|
||||
state = StudyState(study_id=study.study_id, trials=[])
|
||||
context = build_harness_context(
|
||||
study=study,
|
||||
window_summary={},
|
||||
state=state,
|
||||
)
|
||||
authority = context["stop_authority"]
|
||||
# The authority is the deterministic validator; with no completed
|
||||
# trials it must not authorize a stop.
|
||||
self.assertEqual(
|
||||
authority["authorized"], context["harness_stop"]["should_stop"]
|
||||
)
|
||||
self.assertFalse(authority["authorized"])
|
||||
|
||||
def test_adaptive_replay_set_truncates_only_when_enabled(self) -> None:
|
||||
from types import SimpleNamespace
|
||||
|
||||
@@ -3956,6 +3974,56 @@ class CoreFlowTests(unittest.TestCase):
|
||||
state = store.load_state("study-1")
|
||||
self.assertEqual(state.next_trial_index, 1)
|
||||
|
||||
def test_cli_tune_vetoes_unauthorized_llm_stop(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
study_path = _write_study_assets(tmp_path)
|
||||
spec = json.loads(study_path.read_text(encoding="utf-8"))
|
||||
spec["llm"]["endpoint"] = {
|
||||
"provider": "custom",
|
||||
"base_url": "http://localhost:9/v1",
|
||||
"model": "test-model",
|
||||
"api_key_env": "AITUNER_TEST_KEY",
|
||||
}
|
||||
study_path.write_text(json.dumps(spec), encoding="utf-8")
|
||||
store_root = tmp_path / "store"
|
||||
stop_payload = json.dumps(
|
||||
{
|
||||
"observation": "looks done",
|
||||
"diagnosis": "agent thinks it converged",
|
||||
"config_patch": {"env_patch": {}, "flag_patch": {}},
|
||||
"expected_effects": ["stop"],
|
||||
"why_not_previous_failures": "n/a",
|
||||
"should_stop": True,
|
||||
}
|
||||
)
|
||||
buffer = io.StringIO()
|
||||
with mock.patch("aituner.cli.run_trial") as run_trial_mock, mock.patch(
|
||||
"aituner.cli.call_llm_for_proposal", return_value=stop_payload
|
||||
), contextlib.redirect_stdout(buffer):
|
||||
exit_code = cli_main(
|
||||
[
|
||||
"study",
|
||||
"tune",
|
||||
"--spec",
|
||||
str(study_path),
|
||||
"--store-root",
|
||||
str(store_root),
|
||||
"--skip-baseline",
|
||||
"--max-trials",
|
||||
"2",
|
||||
]
|
||||
)
|
||||
self.assertEqual(exit_code, 0)
|
||||
run_trial_mock.assert_not_called()
|
||||
executed = json.loads(buffer.getvalue())["executed_trials"]
|
||||
# The first unauthorized LLM stop is vetoed; the second is honored
|
||||
# only after the veto budget is spent.
|
||||
self.assertTrue(any(item.get("stop_vetoed") for item in executed))
|
||||
honored = [item for item in executed if item.get("stopped")]
|
||||
self.assertTrue(honored)
|
||||
self.assertEqual(honored[-1]["stop_authorized_by"], "llm_after_veto_budget")
|
||||
|
||||
def test_cli_tune_uses_harness_stop_before_llm(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
|
||||
Reference in New Issue
Block a user