From 2937539b49161db03fb0c8bf16e327da8563c565 Mon Sep 17 00:00:00 2001
From: Gahow Wang <gahow.wang@gmail.com>
Date: Fri, 26 Jun 2026 22:17:47 +0800
Subject: [PATCH] Persist harness candidate set snapshots

---
 docs/aituner-roadmap.md |  4 ++--
 src/aituner/cli.py      | 46 +++++++++++++++++++++++++++++++++++++++++
 src/aituner/store.py    | 12 ++++++++++-
 tests/test_core_flow.py | 13 ++++++++++++
 4 files changed, 72 insertions(+), 3 deletions(-)

diff --git a/docs/aituner-roadmap.md b/docs/aituner-roadmap.md
index c625939..b983e01 100644
--- a/docs/aituner-roadmap.md
+++ b/docs/aituner-roadmap.md
@@ -97,8 +97,8 @@ declarative intervention grammar + coverage-relative validator。
 - CandidateSet 完整枚举并持久化 snapshot；
 - CandidateSet v1 先限定为当前 harness generator 实际构造出的 concrete candidates，
   不 claim 全 Cartesian knob space 枚举；`candidate_set_hash`、eligible/blocked
-  records 和 blocked reason summary 已在 harness context 中实现，独立 sidecar JSON
-  persistence 是下一片；
+  records 和 blocked reason summary 已在 harness context 与 `harness/candidate-set-*.json`
+  sidecar 中实现；
 - `harness_priority` 与 backend ranking 分离；
 - CoverageUnit 结构化，stop 不能只依赖 exact signature；
 - `search_high_saturated_by_incumbent` 不能绕过 CandidateSet coverage；对 `req/s/GPU`
diff --git a/src/aituner/cli.py b/src/aituner/cli.py
index 8874441..3878035 100644
--- a/src/aituner/cli.py
+++ b/src/aituner/cli.py
@@ -68,6 +68,42 @@ def _reject_repeated_effective_config(
     )
 
 
+def _harness_snapshot_payload(
+    *,
+    study: StudySpec,
+    state: StudyState,
+    harness_context: dict[str, object],
+) -> dict[str, object]:
+    experiment_plan = harness_context.get("experiment_plan")
+    if not isinstance(experiment_plan, dict):
+        experiment_plan = {}
+    candidate_set = experiment_plan.get("candidate_set")
+    if not isinstance(candidate_set, dict):
+        candidate_set = {}
+    return {
+        "schema_version": 1,
+        "study_id": study.study_id,
+        "iteration": state.next_trial_index,
+        "planner_version": experiment_plan.get("planner_version"),
+        "candidate_set_hash": candidate_set.get("candidate_set_hash"),
+        "state_ref": {
+            "best_trial_id": state.best_trial_id,
+            "best_parallel_size": state.best_parallel_size,
+            "best_request_rate": state.best_request_rate,
+            "best_request_rate_per_gpu": state.best_request_rate_per_gpu,
+            "next_trial_index": state.next_trial_index,
+            "trial_count": len(state.trials),
+        },
+        "candidate_set": candidate_set,
+        "decisions": {
+            "next_action": experiment_plan.get("next_action"),
+            "harness_proposal": harness_context.get("harness_proposal"),
+            "harness_stop": harness_context.get("harness_stop"),
+            "stop_authority": harness_context.get("stop_authority"),
+        },
+    }
+
+
 def _latency_percentiles(summary: object, metric: str) -> dict[str, float]:
     if not isinstance(summary, dict):
         return {}
@@ -289,6 +325,16 @@ def cmd_study_tune(args: argparse.Namespace) -> int:
             if study.llm.use_harness
             else None
         )
+        if harness_context is not None:
+            store.write_harness_snapshot(
+                study.study_id,
+                f"candidate-set-{state.next_trial_index:04d}",
+                _harness_snapshot_payload(
+                    study=study,
+                    state=state,
+                    harness_context=harness_context,
+                ),
+            )
         prompt = build_prompt(
             study=study,
             window_summary=window_summary,
diff --git a/src/aituner/store.py b/src/aituner/store.py
index 1c970b6..c0aae71 100644
--- a/src/aituner/store.py
+++ b/src/aituner/store.py
@@ -27,7 +27,7 @@ class StudyStore:
 
     def init_study(self, *, spec_path: Path, study: StudySpec) -> Path:
         root = self.study_root(study.study_id)
-        for rel in ("prompts", "proposals", "trials", "results"):
+        for rel in ("prompts", "proposals", "trials", "results", "harness"):
             (root / rel).mkdir(parents=True, exist_ok=True)
         (root / "study_spec.source").write_text(str(spec_path.resolve()) + "\n", encoding="utf-8")
         self.write_json(root / "study_spec.snapshot.json", to_jsonable(study))
@@ -70,6 +70,16 @@ class StudyStore:
         self.write_json(path, to_jsonable(proposal))
         return path
 
+    def write_harness_snapshot(
+        self,
+        study_id: str,
+        snapshot_name: str,
+        payload: dict[str, Any],
+    ) -> Path:
+        path = self.study_root(study_id) / "harness" / f"{snapshot_name}.json"
+        self.write_json(path, payload)
+        return path
+
     def materialize_trial(
         self,
         *,
diff --git a/tests/test_core_flow.py b/tests/test_core_flow.py
index 9b8f064..3b3d797 100644
--- a/tests/test_core_flow.py
+++ b/tests/test_core_flow.py
@@ -6297,6 +6297,19 @@ class CoreFlowTests(unittest.TestCase):
             self.assertTrue(proposal_path.exists())
             proposal = json.loads(proposal_path.read_text(encoding="utf-8"))
             self.assertTrue(proposal["should_stop"])
+            snapshot_path = (
+                store.study_root(study.study_id)
+                / "harness"
+                / "candidate-set-0005.json"
+            )
+            self.assertTrue(snapshot_path.exists())
+            snapshot = json.loads(snapshot_path.read_text(encoding="utf-8"))
+            self.assertEqual(snapshot["schema_version"], 1)
+            self.assertEqual(snapshot["iteration"], 5)
+            self.assertIn("candidate_set_hash", snapshot)
+            self.assertIn("candidate_set", snapshot)
+            self.assertIn("harness_stop", snapshot["decisions"])
+            self.assertIn("stop_authority", snapshot["decisions"])
             state = store.load_state(study.study_id)
             self.assertEqual(state.tuning_stop_reason, "harness_stop")
             self.assertEqual(