From f653af09a871ae7993eed2696d1f3528536cb342 Mon Sep 17 00:00:00 2001 From: Gahow Wang Date: Wed, 6 May 2026 17:59:09 +0800 Subject: [PATCH] Stop harness when feasible probe reaches search high --- src/aituner/harness.py | 16 ++--------- tests/test_core_flow.py | 62 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 14 deletions(-) diff --git a/src/aituner/harness.py b/src/aituner/harness.py index 7eaf033..ee274c7 100644 --- a/src/aituner/harness.py +++ b/src/aituner/harness.py @@ -798,10 +798,6 @@ def _search_high_saturation_guard( float(study.search.tolerance), (float(study.search.high) - float(study.search.low)) / float(2 ** max(study.search.max_probes, 1)), ) - latency_summary = last_probe.get("latency_summary") - failed = latency_summary.get("failed_reason_counts") if isinstance(latency_summary, dict) else {} - if not isinstance(failed, dict): - failed = {} if not last_probe.get("feasible"): return { **default, @@ -817,20 +813,12 @@ def _search_high_saturation_guard( "threshold_gap_to_high": threshold_gap, "binary_probe_resolution": binary_probe_resolution, } - if failed: - return { - **default, - "reason": "incumbent_high_probe_has_slo_failures", - "last_threshold": last_threshold, - "threshold_gap_to_high": threshold_gap, - "failed_reason_counts": failed, - } return { "saturated": True, "reason": "search_high_saturated_by_incumbent", "summary": ( - "The incumbent's highest measured probe is feasible, has no SLO failures, " - "and is within the configured binary-search resolution of search.high." + "The incumbent's highest measured probe is feasible and is within " + "the configured binary-search resolution of search.high." ), "incumbent_trial_id": state.best_trial_id, "search_high": study.search.high, diff --git a/tests/test_core_flow.py b/tests/test_core_flow.py index 5b267e1..5b10457 100644 --- a/tests/test_core_flow.py +++ b/tests/test_core_flow.py @@ -600,6 +600,68 @@ class CoreFlowTests(unittest.TestCase): self.assertIsNotNone(proposal) self.assertTrue(proposal.should_stop) + def test_harness_stop_allows_feasible_high_probe_with_some_failures(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + tmp_path = Path(tmp) + study_path = _write_study_assets(tmp_path) + study = load_study_spec(study_path) + result_path = tmp_path / "trial-0004.json" + result_path.write_text( + json.dumps( + { + "status": "completed", + "best_sampling_u": 0.99609375, + "best_request_rate": 1.77, + "best_pass_rate": 0.968, + "probes": [ + { + "threshold": 0.99609375, + "feasible": True, + "payload": { + "request_count": 1063, + "pass_rate": 0.968, + "request_rate": 1.77, + "early_stopped": False, + "early_stop_reason": "", + "latency_summary": { + "failed_reason_counts": { + "tpot_ms>50.0": 34, + } + }, + }, + } + ], + } + ), + encoding="utf-8", + ) + state = StudyState( + study_id=study.study_id, + best_trial_id="trial-0004", + best_request_rate=1.77, + best_request_rate_per_gpu=0.4425, + trials=[ + TrialSummary( + trial_id="trial-0004", + status="completed", + best_request_rate=1.77, + best_request_rate_per_gpu=0.4425, + result_path=str(result_path), + config_patch={ + "env_patch": {}, + "flag_patch": {"tensor-parallel-size": 4}, + }, + ) + ], + ) + context = build_harness_context( + study=study, + window_summary={"prompt_tokens_p95": 2048}, + state=state, + ) + self.assertTrue(context["harness_stop"]["should_stop"]) + self.assertEqual(context["harness_stop"]["reason"], "search_high_saturated_by_incumbent") + def test_harness_guided_first_tp_probe_for_latency_bottleneck(self) -> None: with tempfile.TemporaryDirectory() as tmp: tmp_path = Path(tmp)