Harness stop uses full state baseline

This commit is contained in:
2026-06-20 22:48:27 +08:00
parent a9d237bbfd
commit 426151bc9f
2 changed files with 139 additions and 18 deletions

View File

@@ -1068,6 +1068,119 @@ class CoreFlowTests(unittest.TestCase):
self.assertTrue(context["harness_stop"]["should_stop"])
self.assertEqual(context["harness_stop"]["reason"], "post_incumbent_validation_exhausted")
def test_harness_validation_uses_full_state_baseline_when_history_window_moves(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
study_path = _write_study_assets(
tmp_path,
engine_overrides={"tunable_flags": ["max-num-seqs"]},
)
study = load_study_spec(study_path)
state = StudyState(
study_id=study.study_id,
best_trial_id="trial-0006",
best_parallel_size=8,
best_request_rate=2.4,
best_request_rate_per_gpu=0.3,
trials=[
TrialSummary(
trial_id="trial-0001",
status="completed",
parallel_size=8,
best_request_rate=0.8,
best_request_rate_per_gpu=0.1,
config_patch={"env_patch": {}, "flag_patch": {}},
),
TrialSummary(
trial_id="trial-0002",
status="completed",
parallel_size=8,
best_request_rate=0.88,
best_request_rate_per_gpu=0.11,
config_patch={"env_patch": {}, "flag_patch": {"max-num-seqs": 16}},
),
TrialSummary(
trial_id="trial-0003",
status="completed",
parallel_size=8,
best_request_rate=0.96,
best_request_rate_per_gpu=0.12,
config_patch={"env_patch": {}, "flag_patch": {"max-num-seqs": 24}},
),
TrialSummary(
trial_id="trial-0004",
status="completed",
parallel_size=8,
best_request_rate=1.04,
best_request_rate_per_gpu=0.13,
config_patch={"env_patch": {}, "flag_patch": {"max-num-seqs": 32}},
),
TrialSummary(
trial_id="trial-0005",
status="completed",
parallel_size=8,
best_request_rate=2.24,
best_request_rate_per_gpu=0.28,
config_patch={"env_patch": {}, "flag_patch": {"max-num-seqs": 40}},
),
TrialSummary(
trial_id="trial-0006",
status="completed",
parallel_size=8,
best_request_rate=2.4,
best_request_rate_per_gpu=0.3,
config_patch={"env_patch": {}, "flag_patch": {"max-num-seqs": 48}},
),
TrialSummary(
trial_id="trial-0007",
status="completed",
parallel_size=8,
config_patch={"env_patch": {}, "flag_patch": {"max-num-seqs": 56}},
),
TrialSummary(
trial_id="trial-0008",
status="completed",
parallel_size=8,
config_patch={"env_patch": {}, "flag_patch": {"max-num-seqs": 64}},
),
TrialSummary(
trial_id="trial-0009",
status="completed",
parallel_size=8,
config_patch={"env_patch": {}, "flag_patch": {"max-num-seqs": 72}},
),
TrialSummary(
trial_id="trial-0010",
status="completed",
parallel_size=8,
config_patch={"env_patch": {}, "flag_patch": {"max-num-seqs": 80}},
),
TrialSummary(
trial_id="trial-0011",
status="failed",
parallel_size=8,
config_patch={"env_patch": {}, "flag_patch": {"max-num-seqs": 88}},
),
TrialSummary(
trial_id="trial-0012",
status="completed",
parallel_size=8,
config_patch={"env_patch": {}, "flag_patch": {"max-num-seqs": 96}},
),
],
)
context = build_harness_context(
study=study,
window_summary={"prompt_tokens_p95": 2048},
state=state,
)
self.assertTrue(context["harness_stop"]["should_stop"])
self.assertEqual(context["harness_stop"]["reason"], "post_incumbent_validation_exhausted")
self.assertGreater(
context["harness_stop"]["evidence"]["incumbent_gain_vs_baseline"],
2.9,
)
def test_harness_does_not_stop_immediately_after_strong_incumbent(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)