Stop tuning when baseline is infeasible

2026-05-08 01:07:36 +08:00
parent a7a5e9ad80
commit f212673f44
4 changed files with 170 additions and 0 deletions
--- a/src/aituner/cli.py
+++ b/src/aituner/cli.py
@@ -19,6 +19,43 @@ from .trace import load_trace_requests, summarize_window
 from .worker import run_trial


+def _is_empty_config_patch(proposal: Proposal) -> bool:
+    return not proposal.config_patch.env_patch and not proposal.config_patch.flag_patch
+
+
+def _baseline_all_infeasible_diagnosis(result: dict[str, object]) -> str | None:
+    if result.get("status") != "completed":
+        return None
+    if isinstance(result.get("best_request_rate"), (int, float)):
+        return None
+    probes = result.get("probes")
+    if not isinstance(probes, list) or not probes:
+        return None
+    if any(isinstance(probe, dict) and probe.get("feasible") for probe in probes):
+        return None
+
+    diagnostics = result.get("all_infeasible_diagnostics")
+    if not isinstance(diagnostics, dict):
+        diagnostics = {}
+    lowest_rate = diagnostics.get("request_rate")
+    lowest_threshold = diagnostics.get("threshold")
+    pass_rate = diagnostics.get("pass_rate")
+    early_stop_reason = str(diagnostics.get("early_stop_reason") or "").strip()
+    pieces = [
+        "Baseline configuration has no feasible probe under the current SLO.",
+        "Stopping tuning because even the lowest sampled request rate did not meet the target pass rate.",
+    ]
+    if isinstance(lowest_rate, (int, float)):
+        pieces.append(f"lowest_sampled_request_rate={float(lowest_rate):.6g}")
+    if isinstance(lowest_threshold, (int, float)):
+        pieces.append(f"lowest_sampling_u={float(lowest_threshold):.6g}")
+    if isinstance(pass_rate, (int, float)):
+        pieces.append(f"lowest_probe_pass_rate={float(pass_rate):.6g}")
+    if early_stop_reason:
+        pieces.append(f"early_stop_reason={early_stop_reason}")
+    return " ".join(pieces)
+
+
 def _study_source_path(study_root: Path) -> Path:
    return Path((study_root / "study_spec.source").read_text(encoding="utf-8").strip())

@@ -126,6 +163,18 @@ def cmd_study_tune(args: argparse.Namespace) -> int:
    executed: list[dict[str, object]] = []
    for idx in range(max_trials):
        state = store.load_state(study.study_id)
+        if state.tuning_stop_reason:
+            executed.append(
+                {
+                    "trial_id": None,
+                    "stopped": True,
+                    "reason": state.tuning_stop_reason,
+                    "diagnosis": state.tuning_stop_diagnosis,
+                    "state_best_trial_id": state.best_trial_id,
+                    "state_best_request_rate": state.best_request_rate,
+                }
+            )
+            break
        if state.next_trial_index > max_trials:
            break
        window, requests = load_trace_requests(study, study_spec_path=spec_path)
@@ -228,6 +277,13 @@ def cmd_study_tune(args: argparse.Namespace) -> int:
                }
            )
            break
+        is_auto_baseline = (
+            not proposal_files
+            and not args.skip_baseline
+            and state.next_trial_index == 1
+            and not state.trials
+            and _is_empty_config_patch(proposal)
+        )
        trial, _ = store.materialize_trial(study=study, state=state, proposal=proposal)
        trial_spec_path = Path(trial.artifact_dir) / "trial_spec.json"
        result = run_trial(trial_spec_path)
@@ -248,6 +304,23 @@ def cmd_study_tune(args: argparse.Namespace) -> int:
                "state_best_request_rate": state.best_request_rate,
            }
        )
+        if is_auto_baseline:
+            diagnosis = _baseline_all_infeasible_diagnosis(result)
+            if diagnosis is not None:
+                state.tuning_stop_reason = "baseline_all_infeasible"
+                state.tuning_stop_diagnosis = diagnosis
+                store.save_state(state)
+                executed.append(
+                    {
+                        "trial_id": None,
+                        "stopped": True,
+                        "reason": state.tuning_stop_reason,
+                        "diagnosis": diagnosis,
+                        "state_best_trial_id": state.best_trial_id,
+                        "state_best_request_rate": state.best_request_rate,
+                    }
+                )
+                break

    final_state = store.load_state(study.study_id)
    print(
@@ -257,6 +330,8 @@ def cmd_study_tune(args: argparse.Namespace) -> int:
                "executed_trials": executed,
                "best_trial_id": final_state.best_trial_id,
                "best_request_rate": final_state.best_request_rate,
+                "tuning_stop_reason": final_state.tuning_stop_reason,
+                "tuning_stop_diagnosis": final_state.tuning_stop_diagnosis,
            },
            ensure_ascii=False,
        )
--- a/src/aituner/spec.py
+++ b/src/aituner/spec.py
@@ -764,6 +764,8 @@ class StudyState:
    best_request_rate: float | None = None
    best_request_rate_per_gpu: float | None = None
    next_trial_index: int = 1
+    tuning_stop_reason: str = ""
+    tuning_stop_diagnosis: str = ""
    best_by_parallel_size: dict[str, dict[str, Any]] = field(default_factory=dict)
    trials: list[TrialSummary] = field(default_factory=list)

--- a/src/aituner/store.py
+++ b/src/aituner/store.py
@@ -45,6 +45,8 @@ class StudyStore:
            best_request_rate=payload.get("best_request_rate"),
            best_request_rate_per_gpu=payload.get("best_request_rate_per_gpu"),
            next_trial_index=int(payload.get("next_trial_index", 1)),
+            tuning_stop_reason=str(payload.get("tuning_stop_reason") or ""),
+            tuning_stop_diagnosis=str(payload.get("tuning_stop_diagnosis") or ""),
            best_by_parallel_size={
                str(key): value
                for key, value in (payload.get("best_by_parallel_size") or {}).items()
--- a/tests/test_core_flow.py
+++ b/tests/test_core_flow.py
@@ -2997,6 +2997,97 @@ class CoreFlowTests(unittest.TestCase):
            self.assertEqual(state.trials[0].config_patch, {"env_patch": {}, "flag_patch": {}})
            self.assertEqual(state.trials[1].config_patch["flag_patch"], {"max-num-seqs": 64})

+    def test_cli_tune_stops_when_baseline_is_all_infeasible(self) -> None:
+        with tempfile.TemporaryDirectory() as tmp:
+            tmp_path = Path(tmp)
+            study_path = _write_study_assets(tmp_path)
+            payload = json.loads(study_path.read_text(encoding="utf-8"))
+            payload["llm"]["endpoint"] = {
+                "provider": "custom",
+                "base_url": "http://llm.example/v1",
+                "wire_api": "chat.completions",
+                "model": "test-model",
+                "api_key_env": "OPENAI_API_KEY",
+            }
+            study_path.write_text(json.dumps(payload), encoding="utf-8")
+            store_root = tmp_path / "store"
+
+            def fake_run_trial(trial_spec_path: Path) -> dict[str, object]:
+                payload = json.loads(trial_spec_path.read_text(encoding="utf-8"))
+                trial_root = Path(payload["artifact_dir"])
+                result = {
+                    "study_id": payload["study_id"],
+                    "trial_id": payload["trial_id"],
+                    "status": "completed",
+                    "best_sampling_u": None,
+                    "best_request_rate": None,
+                    "best_pass_rate": None,
+                    "best_request_count": None,
+                    "probes": [
+                        {
+                            "threshold": 0.5,
+                            "feasible": False,
+                            "payload": {"pass_rate": 0.0, "request_rate": 2.0},
+                        },
+                        {
+                            "threshold": 0.25,
+                            "feasible": False,
+                            "payload": {"pass_rate": 0.5, "request_rate": 1.0},
+                        },
+                    ],
+                    "all_infeasible_diagnostics": {
+                        "threshold": 0.25,
+                        "request_rate": 1.0,
+                        "pass_rate": 0.5,
+                        "early_stop_reason": "slo_pass_rate_unrecoverable",
+                    },
+                }
+                (trial_root / "result.json").write_text(json.dumps(result), encoding="utf-8")
+                return result
+
+            with mock.patch("aituner.cli.run_trial", side_effect=fake_run_trial):
+                with mock.patch("aituner.cli.call_llm_for_proposal") as llm_mock:
+                    exit_code = cli_main(
+                        [
+                            "study",
+                            "tune",
+                            "--spec",
+                            str(study_path),
+                            "--store-root",
+                            str(store_root),
+                            "--max-trials",
+                            "3",
+                        ]
+                    )
+
+            self.assertEqual(exit_code, 0)
+            llm_mock.assert_not_called()
+            store = StudyStore(store_root)
+            state = store.load_state("study-1")
+            self.assertEqual(state.next_trial_index, 2)
+            self.assertEqual(len(state.trials), 1)
+            self.assertEqual(state.tuning_stop_reason, "baseline_all_infeasible")
+            self.assertIn("lowest_sampled_request_rate=1", state.tuning_stop_diagnosis)
+
+            with mock.patch("aituner.cli.run_trial") as run_trial_mock:
+                with mock.patch("aituner.cli.call_llm_for_proposal") as llm_mock:
+                    exit_code = cli_main(
+                        [
+                            "study",
+                            "tune",
+                            "--spec",
+                            str(study_path),
+                            "--store-root",
+                            str(store_root),
+                            "--max-trials",
+                            "3",
+                        ]
+                    )
+
+            self.assertEqual(exit_code, 0)
+            run_trial_mock.assert_not_called()
+            llm_mock.assert_not_called()
+
    def test_cli_tune_max_trials_is_total_budget_on_resume(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            tmp_path = Path(tmp)