Gate GMU climb on measured improvement

This commit is contained in:
2026-06-29 02:00:41 +08:00
parent ee101a7c24
commit 6b25d56c1f
3 changed files with 39 additions and 14 deletions

View File

@@ -2594,10 +2594,9 @@ class CoreFlowTests(unittest.TestCase):
)
self.assertNotIn("tensor-parallel-size", proposal.config_patch.flag_patch)
def test_harness_continues_gpu_mem_util_after_tied_same_topology_probe(self) -> None:
"""After adjacent topology validation, gpu-memory-utilization should hill-climb
on the incumbent topology even if an earlier gmu step tied the incumbent and
did not become state.best_trial_id."""
def test_harness_stops_gpu_mem_util_climb_after_tied_same_topology_probe(self) -> None:
"""A same-topology gpu-memory-utilization probe must improve per-GPU rate before
the hill-climb continues; launch success alone is not evidence to keep climbing."""
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
study_path = _write_study_assets(
@@ -2711,11 +2710,14 @@ class CoreFlowTests(unittest.TestCase):
window_summary={"prompt_tokens_p95": 1500},
state=state,
)
proposal = build_harness_guided_proposal(context)
self.assertIsNotNone(proposal)
self.assertEqual(
proposal.config_patch.flag_patch,
candidates = context["experiment_plan"]["candidate_actions"]
self.assertNotIn(
{"tensor-parallel-size": 2, "gpu-memory-utilization": 0.94},
[
item["config_patch"]["flag_patch"]
for item in candidates
if item["knob_family"] == "gpu-memory-utilization"
],
)
def test_harness_validates_unmeasured_tp_frontier_before_runtime_refinement(self) -> None: