Add infeasible plateau guard to harness

2026-04-25 18:47:32 +08:00
parent 6c04b9dbbc
commit e188de7735
3 changed files with 320 additions and 8 deletions
--- a/tests/test_core_flow.py
+++ b/tests/test_core_flow.py
@@ -13,6 +13,7 @@ from aituner.compare import load_compare_spec, run_compare
 from aituner.engine import build_launch_recipe
 from aituner.http_client import _auth_headers, _openai_url, _should_bypass_proxy
 from aituner.job import append_job, build_trial_job
+from aituner.harness import build_harness_context
 from aituner.llm import _extract_response_text, build_prompt, parse_proposal_text, validate_proposal
 from aituner.search import ThresholdProbe, binary_search_max_feasible
 from aituner.slo import RequestOutcome, evaluate_request, summarize_evaluations
@@ -226,6 +227,143 @@ class CoreFlowTests(unittest.TestCase):
            self.assertIn("knob_harnesses", prompt)
            self.assertTrue(study_root.exists())

+    def test_harness_uses_latency_failures_before_generic_unrecoverable(self) -> None:
+        with tempfile.TemporaryDirectory() as tmp:
+            tmp_path = Path(tmp)
+            study_path = _write_study_assets(tmp_path)
+            study = load_study_spec(study_path)
+            result_path = tmp_path / "trial-result.json"
+            result_path.write_text(
+                json.dumps(
+                    {
+                        "status": "completed",
+                        "probes": [
+                            {
+                                "threshold": 0.25,
+                                "feasible": False,
+                                "payload": {
+                                    "request_count": 100,
+                                    "pass_rate": 0.3,
+                                    "request_rate": 1.0,
+                                    "early_stopped": True,
+                                    "early_stop_reason": "slo_pass_rate_unrecoverable",
+                                    "latency_summary": {
+                                        "failed_reason_counts": {
+                                            "ttft_ms>5000.0": 70,
+                                            "tpot_ms>50.0": 5,
+                                        },
+                                        "ttft_ms": {"p95": 6500.0, "p99": 7200.0},
+                                    },
+                                },
+                            }
+                        ],
+                    }
+                ),
+                encoding="utf-8",
+            )
+            state = StudyState(
+                study_id=study.study_id,
+                trials=[
+                    TrialSummary(
+                        trial_id="trial-0001",
+                        status="completed",
+                        result_path=str(result_path),
+                        config_patch={"env_patch": {}, "flag_patch": {}},
+                    )
+                ],
+            )
+            context = build_harness_context(
+                study=study,
+                window_summary={
+                    "prompt_tokens_p95": 5000,
+                    "prompt_tail_ratio_p95_p50": 3.0,
+                },
+                state=state,
+            )
+            self.assertEqual(
+                context["recent_trial_diagnostics"][0]["active_bottleneck"],
+                "ttft_prefill",
+            )
+
+    def test_harness_blocks_repeating_infeasible_plateau_family(self) -> None:
+        with tempfile.TemporaryDirectory() as tmp:
+            tmp_path = Path(tmp)
+            study_path = _write_study_assets(
+                tmp_path,
+                engine_overrides={
+                    "tunable_flags": [
+                        "tensor-parallel-size",
+                        "data-parallel-size",
+                        "expert-parallel-size",
+                    ],
+                    "topology_constraints": {
+                        "allowed_tensor_parallel_sizes": [1, 2, 4],
+                        "allowed_data_parallel_sizes": [1, 2, 4, 8],
+                        "allowed_expert_parallel_sizes": [1],
+                        "allowed_tp_dp_products": [1, 2, 4, 8],
+                    },
+                },
+            )
+            study = load_study_spec(study_path)
+            trial_summaries = []
+            for index, (dp, pass_rate, p95) in enumerate(
+                [(4, 0.345, 3818.4), (8, 0.345, 3823.4)], start=3
+            ):
+                result_path = tmp_path / f"trial-{index:04d}.json"
+                result_path.write_text(
+                    json.dumps(
+                        {
+                            "status": "completed",
+                            "best_request_rate": None,
+                            "all_infeasible_diagnostics": {
+                                "threshold": 0.0078125,
+                                "request_count": 148,
+                                "request_rate": 0.22,
+                                "pass_rate": pass_rate,
+                                "early_stopped": True,
+                                "early_stop_reason": "elapsed",
+                                "latency_summary": {
+                                    "failed_reason_counts": {"ttft_ms>5000.0": 97},
+                                    "ttft_ms": {"p95": p95, "p99": 5800.0},
+                                },
+                            },
+                        }
+                    ),
+                    encoding="utf-8",
+                )
+                trial_summaries.append(
+                    TrialSummary(
+                        trial_id=f"trial-{index:04d}",
+                        status="completed",
+                        result_path=str(result_path),
+                        config_patch={
+                            "env_patch": {},
+                            "flag_patch": {
+                                "tensor-parallel-size": 1,
+                                "data-parallel-size": dp,
+                                "expert-parallel-size": 1,
+                            },
+                        },
+                    )
+                )
+            context = build_harness_context(
+                study=study,
+                window_summary={
+                    "prompt_tokens_p95": 7628,
+                    "prompt_tail_ratio_p95_p50": 3.83,
+                },
+                state=StudyState(study_id=study.study_id, trials=trial_summaries),
+            )
+            guard = context["convergence_guard"]["infeasible_progress"]
+            self.assertTrue(guard["plateau_detected"])
+            self.assertTrue(guard["stop_if_next_probe_repeats_family"])
+            self.assertEqual(guard["blocked_primary_family"], "data-parallel-size")
+            self.assertTrue(
+                context["convergence_guard"][
+                    "should_stop_if_no_harness_can_justify_a_new_adjacent_probe"
+                ]
+            )
+
    def test_trace_input_length_filter_keeps_only_matching_rows(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            tmp_path = Path(tmp)