Add study tune loop and smoke configs

2026-04-04 22:29:59 +08:00
parent 7b7eaafd78
commit f192c741ed
8 changed files with 387 additions and 1 deletions
--- a/tests/test_core_flow.py
+++ b/tests/test_core_flow.py
@@ -5,7 +5,9 @@ import subprocess
 import tempfile
 import unittest
 from pathlib import Path
+from unittest import mock

+from aituner.cli import main as cli_main
 from aituner.job import append_job, build_trial_job
 from aituner.llm import build_prompt, parse_proposal_text
 from aituner.search import ThresholdProbe, binary_search_max_feasible
@@ -371,6 +373,82 @@ class CoreFlowTests(unittest.TestCase):
        self.assertGreaterEqual(result.best_threshold, 0.5)
        self.assertIsNotNone(result.best_feasible_payload)

+    def test_trace_max_requests_uses_window_wide_downsample(self) -> None:
+        with tempfile.TemporaryDirectory() as tmp:
+            tmp_path = Path(tmp)
+            trace_dir = tmp_path / "trace_windows" / "traces"
+            trace_dir.mkdir(parents=True)
+            trace_path = trace_dir / "chat_many.jsonl"
+            with trace_path.open("w", encoding="utf-8") as handle:
+                for idx in range(10):
+                    handle.write(
+                        json.dumps(
+                            {
+                                "request_id": f"r{idx}",
+                                "timestamp": float(idx),
+                                "sampling_u": idx / 10.0,
+                                "messages": [{"role": "user", "content": f"hello-{idx}"}],
+                                "input_length": 10 + idx,
+                                "output_length": 5,
+                            }
+                        )
+                        + "\n"
+                    )
+            windows_path = tmp_path / "trace_windows" / "windows.json"
+            windows_path.write_text(
+                json.dumps(
+                    {
+                        "windows": [
+                            {
+                                "window_id": "w1",
+                                "trace_type": "chat",
+                                "trace_file": "traces/chat_many.jsonl",
+                                "window_start": 0.0,
+                                "window_end": 10.0,
+                            }
+                        ]
+                    }
+                ),
+                encoding="utf-8",
+            )
+            study_path = tmp_path / "study.json"
+            study_path.write_text(
+                json.dumps(
+                    {
+                        "study_id": "study-downsample",
+                        "hardware": {"gpu_count": 1},
+                        "model": {"model_id": "m1", "served_model_name": "dummy-model"},
+                        "engine": {
+                            "engine_name": "vllm",
+                            "exec_path": "/usr/local/bin/vllm",
+                            "host": "127.0.0.1",
+                            "port": 8000,
+                            "ready_timeout_s": 10,
+                            "request_timeout_s": 10,
+                            "healthcheck_path": "/v1/models",
+                            "launch_args": [],
+                            "base_envs": {},
+                            "base_flags": {},
+                            "tunable_envs": [],
+                            "tunable_flags": [],
+                        },
+                        "trace": {
+                            "windows_path": str(windows_path),
+                            "window_id": "w1",
+                            "max_concurrency": 1,
+                            "max_requests_per_probe": 4,
+                        },
+                        "slo": {"target_pass_rate": 0.95},
+                        "search": {"low": 0.0, "high": 1.0, "tolerance": 0.1, "max_probes": 2, "sample_seed": 1},
+                        "llm": {"system_prompt": "", "max_history_trials": 1},
+                    }
+                ),
+                encoding="utf-8",
+            )
+            study = load_study_spec(study_path)
+            _, requests = load_trace_requests(study, study_spec_path=study_path)
+            self.assertEqual([item.row_id for item in requests], ["r0", "r2", "r5", "r7"])
+
    def test_proposal_validation_and_job_emission(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            tmp_path = Path(tmp)
@@ -437,6 +515,83 @@ class CoreFlowTests(unittest.TestCase):
            self.assertEqual(next_state.best_trial_id, trial.trial_id)
            self.assertEqual(next_state.best_request_rate, 12.5)

+    def test_cli_tune_runs_multiple_manual_proposals(self) -> None:
+        with tempfile.TemporaryDirectory() as tmp:
+            tmp_path = Path(tmp)
+            study_path = _write_study_assets(tmp_path)
+            proposal1 = tmp_path / "proposal-1.json"
+            proposal2 = tmp_path / "proposal-2.json"
+            proposal1.write_text(
+                json.dumps(
+                    {
+                        "observation": "trial one",
+                        "diagnosis": "conservative",
+                        "config_patch": {"env_patch": {}, "flag_patch": {"tensor-parallel-size": 4}},
+                        "expected_effects": ["stable"],
+                        "why_not_previous_failures": "",
+                    }
+                ),
+                encoding="utf-8",
+            )
+            proposal2.write_text(
+                json.dumps(
+                    {
+                        "observation": "trial two",
+                        "diagnosis": "more batching",
+                        "config_patch": {"env_patch": {}, "flag_patch": {"max-num-seqs": 64}},
+                        "expected_effects": ["higher throughput"],
+                        "why_not_previous_failures": "",
+                    }
+                ),
+                encoding="utf-8",
+            )
+            store_root = tmp_path / "store"
+
+            def fake_run_trial(trial_spec_path: Path) -> dict[str, object]:
+                payload = json.loads(trial_spec_path.read_text(encoding="utf-8"))
+                trial_id = str(payload["trial_id"])
+                trial_root = Path(payload["artifact_dir"])
+                if trial_id.endswith("0001"):
+                    best_rate = 1.0
+                    best_u = 0.5
+                else:
+                    best_rate = 2.0
+                    best_u = 0.75
+                result = {
+                    "study_id": payload["study_id"],
+                    "trial_id": trial_id,
+                    "status": "completed",
+                    "best_sampling_u": best_u,
+                    "best_request_rate": best_rate,
+                    "best_pass_rate": 1.0,
+                    "best_request_count": 2,
+                    "probes": [],
+                }
+                (trial_root / "result.json").write_text(json.dumps(result), encoding="utf-8")
+                return result
+
+            with mock.patch("aituner.cli.run_trial", side_effect=fake_run_trial):
+                exit_code = cli_main(
+                    [
+                        "study",
+                        "tune",
+                        "--spec",
+                        str(study_path),
+                        "--store-root",
+                        str(store_root),
+                        "--proposal-file",
+                        str(proposal1),
+                        "--proposal-file",
+                        str(proposal2),
+                    ]
+                )
+            self.assertEqual(exit_code, 0)
+            store = StudyStore(store_root)
+            state = store.load_state("study-1")
+            self.assertEqual(state.best_trial_id, "trial-0002")
+            self.assertEqual(state.best_request_rate, 2.0)
+            self.assertEqual(state.next_trial_index, 3)
+

 if __name__ == "__main__":
    unittest.main()