Add infeasible plateau guard to harness
This commit is contained in:
@@ -13,6 +13,7 @@ from aituner.compare import load_compare_spec, run_compare
|
||||
from aituner.engine import build_launch_recipe
|
||||
from aituner.http_client import _auth_headers, _openai_url, _should_bypass_proxy
|
||||
from aituner.job import append_job, build_trial_job
|
||||
from aituner.harness import build_harness_context
|
||||
from aituner.llm import _extract_response_text, build_prompt, parse_proposal_text, validate_proposal
|
||||
from aituner.search import ThresholdProbe, binary_search_max_feasible
|
||||
from aituner.slo import RequestOutcome, evaluate_request, summarize_evaluations
|
||||
@@ -226,6 +227,143 @@ class CoreFlowTests(unittest.TestCase):
|
||||
self.assertIn("knob_harnesses", prompt)
|
||||
self.assertTrue(study_root.exists())
|
||||
|
||||
def test_harness_uses_latency_failures_before_generic_unrecoverable(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
study_path = _write_study_assets(tmp_path)
|
||||
study = load_study_spec(study_path)
|
||||
result_path = tmp_path / "trial-result.json"
|
||||
result_path.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"status": "completed",
|
||||
"probes": [
|
||||
{
|
||||
"threshold": 0.25,
|
||||
"feasible": False,
|
||||
"payload": {
|
||||
"request_count": 100,
|
||||
"pass_rate": 0.3,
|
||||
"request_rate": 1.0,
|
||||
"early_stopped": True,
|
||||
"early_stop_reason": "slo_pass_rate_unrecoverable",
|
||||
"latency_summary": {
|
||||
"failed_reason_counts": {
|
||||
"ttft_ms>5000.0": 70,
|
||||
"tpot_ms>50.0": 5,
|
||||
},
|
||||
"ttft_ms": {"p95": 6500.0, "p99": 7200.0},
|
||||
},
|
||||
},
|
||||
}
|
||||
],
|
||||
}
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
state = StudyState(
|
||||
study_id=study.study_id,
|
||||
trials=[
|
||||
TrialSummary(
|
||||
trial_id="trial-0001",
|
||||
status="completed",
|
||||
result_path=str(result_path),
|
||||
config_patch={"env_patch": {}, "flag_patch": {}},
|
||||
)
|
||||
],
|
||||
)
|
||||
context = build_harness_context(
|
||||
study=study,
|
||||
window_summary={
|
||||
"prompt_tokens_p95": 5000,
|
||||
"prompt_tail_ratio_p95_p50": 3.0,
|
||||
},
|
||||
state=state,
|
||||
)
|
||||
self.assertEqual(
|
||||
context["recent_trial_diagnostics"][0]["active_bottleneck"],
|
||||
"ttft_prefill",
|
||||
)
|
||||
|
||||
def test_harness_blocks_repeating_infeasible_plateau_family(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
study_path = _write_study_assets(
|
||||
tmp_path,
|
||||
engine_overrides={
|
||||
"tunable_flags": [
|
||||
"tensor-parallel-size",
|
||||
"data-parallel-size",
|
||||
"expert-parallel-size",
|
||||
],
|
||||
"topology_constraints": {
|
||||
"allowed_tensor_parallel_sizes": [1, 2, 4],
|
||||
"allowed_data_parallel_sizes": [1, 2, 4, 8],
|
||||
"allowed_expert_parallel_sizes": [1],
|
||||
"allowed_tp_dp_products": [1, 2, 4, 8],
|
||||
},
|
||||
},
|
||||
)
|
||||
study = load_study_spec(study_path)
|
||||
trial_summaries = []
|
||||
for index, (dp, pass_rate, p95) in enumerate(
|
||||
[(4, 0.345, 3818.4), (8, 0.345, 3823.4)], start=3
|
||||
):
|
||||
result_path = tmp_path / f"trial-{index:04d}.json"
|
||||
result_path.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"status": "completed",
|
||||
"best_request_rate": None,
|
||||
"all_infeasible_diagnostics": {
|
||||
"threshold": 0.0078125,
|
||||
"request_count": 148,
|
||||
"request_rate": 0.22,
|
||||
"pass_rate": pass_rate,
|
||||
"early_stopped": True,
|
||||
"early_stop_reason": "elapsed",
|
||||
"latency_summary": {
|
||||
"failed_reason_counts": {"ttft_ms>5000.0": 97},
|
||||
"ttft_ms": {"p95": p95, "p99": 5800.0},
|
||||
},
|
||||
},
|
||||
}
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
trial_summaries.append(
|
||||
TrialSummary(
|
||||
trial_id=f"trial-{index:04d}",
|
||||
status="completed",
|
||||
result_path=str(result_path),
|
||||
config_patch={
|
||||
"env_patch": {},
|
||||
"flag_patch": {
|
||||
"tensor-parallel-size": 1,
|
||||
"data-parallel-size": dp,
|
||||
"expert-parallel-size": 1,
|
||||
},
|
||||
},
|
||||
)
|
||||
)
|
||||
context = build_harness_context(
|
||||
study=study,
|
||||
window_summary={
|
||||
"prompt_tokens_p95": 7628,
|
||||
"prompt_tail_ratio_p95_p50": 3.83,
|
||||
},
|
||||
state=StudyState(study_id=study.study_id, trials=trial_summaries),
|
||||
)
|
||||
guard = context["convergence_guard"]["infeasible_progress"]
|
||||
self.assertTrue(guard["plateau_detected"])
|
||||
self.assertTrue(guard["stop_if_next_probe_repeats_family"])
|
||||
self.assertEqual(guard["blocked_primary_family"], "data-parallel-size")
|
||||
self.assertTrue(
|
||||
context["convergence_guard"][
|
||||
"should_stop_if_no_harness_can_justify_a_new_adjacent_probe"
|
||||
]
|
||||
)
|
||||
|
||||
def test_trace_input_length_filter_keeps_only_matching_rows(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
|
||||
Reference in New Issue
Block a user