Add infeasible plateau guard to harness

This commit is contained in:
2026-04-25 18:47:32 +08:00
parent 6c04b9dbbc
commit e188de7735
3 changed files with 320 additions and 8 deletions

View File

@@ -13,6 +13,7 @@ from aituner.compare import load_compare_spec, run_compare
from aituner.engine import build_launch_recipe
from aituner.http_client import _auth_headers, _openai_url, _should_bypass_proxy
from aituner.job import append_job, build_trial_job
from aituner.harness import build_harness_context
from aituner.llm import _extract_response_text, build_prompt, parse_proposal_text, validate_proposal
from aituner.search import ThresholdProbe, binary_search_max_feasible
from aituner.slo import RequestOutcome, evaluate_request, summarize_evaluations
@@ -226,6 +227,143 @@ class CoreFlowTests(unittest.TestCase):
self.assertIn("knob_harnesses", prompt)
self.assertTrue(study_root.exists())
def test_harness_uses_latency_failures_before_generic_unrecoverable(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
study_path = _write_study_assets(tmp_path)
study = load_study_spec(study_path)
result_path = tmp_path / "trial-result.json"
result_path.write_text(
json.dumps(
{
"status": "completed",
"probes": [
{
"threshold": 0.25,
"feasible": False,
"payload": {
"request_count": 100,
"pass_rate": 0.3,
"request_rate": 1.0,
"early_stopped": True,
"early_stop_reason": "slo_pass_rate_unrecoverable",
"latency_summary": {
"failed_reason_counts": {
"ttft_ms>5000.0": 70,
"tpot_ms>50.0": 5,
},
"ttft_ms": {"p95": 6500.0, "p99": 7200.0},
},
},
}
],
}
),
encoding="utf-8",
)
state = StudyState(
study_id=study.study_id,
trials=[
TrialSummary(
trial_id="trial-0001",
status="completed",
result_path=str(result_path),
config_patch={"env_patch": {}, "flag_patch": {}},
)
],
)
context = build_harness_context(
study=study,
window_summary={
"prompt_tokens_p95": 5000,
"prompt_tail_ratio_p95_p50": 3.0,
},
state=state,
)
self.assertEqual(
context["recent_trial_diagnostics"][0]["active_bottleneck"],
"ttft_prefill",
)
def test_harness_blocks_repeating_infeasible_plateau_family(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
study_path = _write_study_assets(
tmp_path,
engine_overrides={
"tunable_flags": [
"tensor-parallel-size",
"data-parallel-size",
"expert-parallel-size",
],
"topology_constraints": {
"allowed_tensor_parallel_sizes": [1, 2, 4],
"allowed_data_parallel_sizes": [1, 2, 4, 8],
"allowed_expert_parallel_sizes": [1],
"allowed_tp_dp_products": [1, 2, 4, 8],
},
},
)
study = load_study_spec(study_path)
trial_summaries = []
for index, (dp, pass_rate, p95) in enumerate(
[(4, 0.345, 3818.4), (8, 0.345, 3823.4)], start=3
):
result_path = tmp_path / f"trial-{index:04d}.json"
result_path.write_text(
json.dumps(
{
"status": "completed",
"best_request_rate": None,
"all_infeasible_diagnostics": {
"threshold": 0.0078125,
"request_count": 148,
"request_rate": 0.22,
"pass_rate": pass_rate,
"early_stopped": True,
"early_stop_reason": "elapsed",
"latency_summary": {
"failed_reason_counts": {"ttft_ms>5000.0": 97},
"ttft_ms": {"p95": p95, "p99": 5800.0},
},
},
}
),
encoding="utf-8",
)
trial_summaries.append(
TrialSummary(
trial_id=f"trial-{index:04d}",
status="completed",
result_path=str(result_path),
config_patch={
"env_patch": {},
"flag_patch": {
"tensor-parallel-size": 1,
"data-parallel-size": dp,
"expert-parallel-size": 1,
},
},
)
)
context = build_harness_context(
study=study,
window_summary={
"prompt_tokens_p95": 7628,
"prompt_tail_ratio_p95_p50": 3.83,
},
state=StudyState(study_id=study.study_id, trials=trial_summaries),
)
guard = context["convergence_guard"]["infeasible_progress"]
self.assertTrue(guard["plateau_detected"])
self.assertTrue(guard["stop_if_next_probe_repeats_family"])
self.assertEqual(guard["blocked_primary_family"], "data-parallel-size")
self.assertTrue(
context["convergence_guard"][
"should_stop_if_no_harness_can_justify_a_new_adjacent_probe"
]
)
def test_trace_input_length_filter_keeps_only_matching_rows(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)