Add harness-guided tuning prompts

2026-04-25 16:35:33 +08:00
parent 661db1e0c6
commit 2c5e9af02a
7 changed files with 643 additions and 4 deletions
--- a/tests/test_core_flow.py
+++ b/tests/test_core_flow.py
@@ -218,6 +218,9 @@ class CoreFlowTests(unittest.TestCase):
            self.assertIn("study-1", prompt)
            self.assertIn('"current_best"', prompt)
            self.assertIn("queueing_knee_by_bucket", prompt)
+            self.assertIn("Harnesses:", prompt)
+            self.assertIn("workload_lca_profile", prompt)
+            self.assertIn("knob_harnesses", prompt)
            self.assertTrue(study_root.exists())

    def test_trace_input_length_filter_keeps_only_matching_rows(self) -> None:
@@ -239,6 +242,8 @@ class CoreFlowTests(unittest.TestCase):
            self.assertEqual([item.prompt_tokens_hint for item in requests], [1000, 5000])
            self.assertEqual(summary["request_count"], 2)
            self.assertEqual(summary["prompt_tokens_p95"], 5000.0)
+            self.assertIn("prefix_cache", summary)
+            self.assertIn("arrival_burst_ratio_p95_to_mean", summary)
            prompt = build_prompt(
                study=study,
                window_summary=summary,
@@ -1728,6 +1733,44 @@ class CoreFlowTests(unittest.TestCase):
            self.assertEqual(state.best_request_rate, 2.0)
            self.assertEqual(state.next_trial_index, 3)

+    def test_cli_tune_honors_should_stop_proposal(self) -> None:
+        with tempfile.TemporaryDirectory() as tmp:
+            tmp_path = Path(tmp)
+            study_path = _write_study_assets(tmp_path)
+            proposal_path = tmp_path / "stop.json"
+            proposal_path.write_text(
+                json.dumps(
+                    {
+                        "observation": "incumbent converged",
+                        "diagnosis": "no adjacent harness probe is justified",
+                        "config_patch": {"env_patch": {}, "flag_patch": {}},
+                        "expected_effects": ["stop without spending another GPU trial"],
+                        "why_not_previous_failures": "not applicable",
+                        "should_stop": True,
+                    }
+                ),
+                encoding="utf-8",
+            )
+            store_root = tmp_path / "store"
+            with mock.patch("aituner.cli.run_trial") as run_trial_mock:
+                exit_code = cli_main(
+                    [
+                        "study",
+                        "tune",
+                        "--spec",
+                        str(study_path),
+                        "--store-root",
+                        str(store_root),
+                        "--proposal-file",
+                        str(proposal_path),
+                    ]
+                )
+            self.assertEqual(exit_code, 0)
+            run_trial_mock.assert_not_called()
+            store = StudyStore(store_root)
+            state = store.load_state("study-1")
+            self.assertEqual(state.next_trial_index, 1)
+
    def test_load_compare_spec_requires_window_selection(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            tmp_path = Path(tmp)
@@ -1993,6 +2036,18 @@ class CoreFlowTests(unittest.TestCase):
            ["throughput: higher", "ttft: lower"],
        )

+    def test_proposal_accepts_should_stop(self) -> None:
+        proposal = Proposal.from_dict(
+            {
+                "observation": "obs",
+                "diagnosis": "converged",
+                "config_patch": {"env_patch": {}, "flag_patch": {}},
+                "expected_effects": ["avoid wasting another GPU trial"],
+                "should_stop": True,
+            }
+        )
+        self.assertTrue(proposal.should_stop)
+
    def test_parse_proposal_text_accepts_wrapped_json(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            tmp_path = Path(tmp)