From 7ad439730ec97d78be180370f28c47c334cd45b2 Mon Sep 17 00:00:00 2001
From: Gahow Wang <gahow.wang@gmail.com>
Date: Sat, 27 Jun 2026 12:21:51 +0800
Subject: [PATCH] Add llm-first tuning proposal policy

---
 src/aituner/cli.py      | 12 +++++-
 tests/test_core_flow.py | 93 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 104 insertions(+), 1 deletion(-)

diff --git a/src/aituner/cli.py b/src/aituner/cli.py
index 3878035..44c31f9 100644
--- a/src/aituner/cli.py
+++ b/src/aituner/cli.py
@@ -288,6 +288,7 @@ def cmd_study_tune(args: argparse.Namespace) -> int:
     capability_profile = load_capability_profile(study, study_spec_path=spec_path)
     proposal_files = [Path(item).resolve() for item in (args.proposal_file or [])]
     max_trials = args.max_trials or (len(proposal_files) if proposal_files else 2)
+    proposal_policy = args.proposal_policy
     if max_trials <= 0:
         raise SpecError("max_trials must be positive")
     if proposal_files and max_trials > len(proposal_files):
@@ -387,7 +388,7 @@ def cmd_study_tune(args: argparse.Namespace) -> int:
             else:
                 guided_proposal = (
                     build_harness_guided_proposal(harness_context)
-                    if harness_context is not None
+                    if harness_context is not None and proposal_policy == "harness-first"
                     else None
                 )
                 if guided_proposal is not None:
@@ -782,6 +783,15 @@ def build_parser() -> argparse.ArgumentParser:
     tune.add_argument("--store-root")
     tune.add_argument("--proposal-file", action="append")
     tune.add_argument("--max-trials", type=int)
+    tune.add_argument(
+        "--proposal-policy",
+        choices=("harness-first", "llm-first"),
+        default="harness-first",
+        help=(
+            "Choose whether deterministic harness proposals are tried before the LLM "
+            "or whether the LLM proposes directly from the harness prompt/context."
+        ),
+    )
     tune.add_argument(
         "--skip-baseline",
         action="store_true",
diff --git a/tests/test_core_flow.py b/tests/test_core_flow.py
index 3b3d797..6200578 100644
--- a/tests/test_core_flow.py
+++ b/tests/test_core_flow.py
@@ -6323,6 +6323,99 @@ class CoreFlowTests(unittest.TestCase):
             )
             self.assertTrue(state.tuning_stop_diagnosis)
 
+    def test_cli_tune_llm_first_skips_deterministic_harness_proposal(self) -> None:
+        with tempfile.TemporaryDirectory() as tmp:
+            tmp_path = Path(tmp)
+            study_path = _write_study_assets(tmp_path)
+            payload = json.loads(study_path.read_text(encoding="utf-8"))
+            payload["llm"]["endpoint"] = {
+                "provider": "custom",
+                "base_url": "http://llm.example/v1",
+                "wire_api": "chat.completions",
+                "model": "test-model",
+                "api_key_env": "OPENAI_API_KEY",
+            }
+            study_path.write_text(json.dumps(payload), encoding="utf-8")
+            study = load_study_spec(study_path)
+            store_root = tmp_path / "store"
+            store = StudyStore(store_root)
+            store.init_study(spec_path=study_path, study=study)
+            store.save_state(
+                StudyState(
+                    study_id=study.study_id,
+                    best_trial_id="trial-0001",
+                    best_parallel_size=8,
+                    best_sampling_u=0.25,
+                    best_request_rate=1.0,
+                    best_request_rate_per_gpu=0.125,
+                    next_trial_index=2,
+                    trials=[
+                        TrialSummary(
+                            trial_id="trial-0001",
+                            status="completed",
+                            parallel_size=8,
+                            best_request_rate=1.0,
+                            best_request_rate_per_gpu=0.125,
+                            config_patch={"env_patch": {}, "flag_patch": {}},
+                        )
+                    ],
+                )
+            )
+
+            llm_payload = json.dumps(
+                {
+                    "observation": "Use harness evidence but let the LLM choose.",
+                    "diagnosis": "Try higher admission concurrency.",
+                    "config_patch": {"env_patch": {}, "flag_patch": {"max-num-seqs": 64}},
+                    "expected_effects": ["measure admission concurrency"],
+                    "why_not_previous_failures": "does not repeat a prior full config",
+                    "should_stop": False,
+                }
+            )
+
+            def fake_run_trial(trial_spec_path: Path) -> dict[str, object]:
+                payload = json.loads(trial_spec_path.read_text(encoding="utf-8"))
+                trial_root = Path(payload["artifact_dir"])
+                result = {
+                    "study_id": payload["study_id"],
+                    "trial_id": payload["trial_id"],
+                    "status": "completed",
+                    "best_sampling_u": 0.5,
+                    "best_request_rate": 2.0,
+                    "best_pass_rate": 1.0,
+                    "best_request_count": 2,
+                    "probes": [],
+                }
+                (trial_root / "result.json").write_text(json.dumps(result), encoding="utf-8")
+                return result
+
+            with mock.patch("aituner.cli.call_llm_for_proposal", return_value=llm_payload) as llm_mock:
+                with mock.patch("aituner.cli.run_trial", side_effect=fake_run_trial):
+                    exit_code = cli_main(
+                        [
+                            "study",
+                            "tune",
+                            "--spec",
+                            str(study_path),
+                            "--store-root",
+                            str(store_root),
+                            "--skip-baseline",
+                            "--max-trials",
+                            "2",
+                            "--proposal-policy",
+                            "llm-first",
+                        ]
+                    )
+
+            self.assertEqual(exit_code, 0)
+            llm_mock.assert_called_once()
+            proposal_root = store.study_root(study.study_id) / "proposals"
+            self.assertTrue((proposal_root / "proposal-0002.json").exists())
+            self.assertFalse((proposal_root / "harness-proposal-0002.json").exists())
+            self.assertTrue(
+                (store.study_root(study.study_id) / "harness" / "candidate-set-0002.json").exists()
+            )
+
     def test_cli_tune_evaluates_baseline_before_llm_proposal(self) -> None:
         with tempfile.TemporaryDirectory() as tmp:
             tmp_path = Path(tmp)