From 2937539b49161db03fb0c8bf16e327da8563c565 Mon Sep 17 00:00:00 2001 From: Gahow Wang Date: Fri, 26 Jun 2026 22:17:47 +0800 Subject: [PATCH] Persist harness candidate set snapshots --- docs/aituner-roadmap.md | 4 ++-- src/aituner/cli.py | 46 +++++++++++++++++++++++++++++++++++++++++ src/aituner/store.py | 12 ++++++++++- tests/test_core_flow.py | 13 ++++++++++++ 4 files changed, 72 insertions(+), 3 deletions(-) diff --git a/docs/aituner-roadmap.md b/docs/aituner-roadmap.md index c625939..b983e01 100644 --- a/docs/aituner-roadmap.md +++ b/docs/aituner-roadmap.md @@ -97,8 +97,8 @@ declarative intervention grammar + coverage-relative validator。 - CandidateSet 完整枚举并持久化 snapshot; - CandidateSet v1 先限定为当前 harness generator 实际构造出的 concrete candidates, 不 claim 全 Cartesian knob space 枚举;`candidate_set_hash`、eligible/blocked - records 和 blocked reason summary 已在 harness context 中实现,独立 sidecar JSON - persistence 是下一片; + records 和 blocked reason summary 已在 harness context 与 `harness/candidate-set-*.json` + sidecar 中实现; - `harness_priority` 与 backend ranking 分离; - CoverageUnit 结构化,stop 不能只依赖 exact signature; - `search_high_saturated_by_incumbent` 不能绕过 CandidateSet coverage;对 `req/s/GPU` diff --git a/src/aituner/cli.py b/src/aituner/cli.py index 8874441..3878035 100644 --- a/src/aituner/cli.py +++ b/src/aituner/cli.py @@ -68,6 +68,42 @@ def _reject_repeated_effective_config( ) +def _harness_snapshot_payload( + *, + study: StudySpec, + state: StudyState, + harness_context: dict[str, object], +) -> dict[str, object]: + experiment_plan = harness_context.get("experiment_plan") + if not isinstance(experiment_plan, dict): + experiment_plan = {} + candidate_set = experiment_plan.get("candidate_set") + if not isinstance(candidate_set, dict): + candidate_set = {} + return { + "schema_version": 1, + "study_id": study.study_id, + "iteration": state.next_trial_index, + "planner_version": experiment_plan.get("planner_version"), + "candidate_set_hash": candidate_set.get("candidate_set_hash"), + "state_ref": { + "best_trial_id": state.best_trial_id, + "best_parallel_size": state.best_parallel_size, + "best_request_rate": state.best_request_rate, + "best_request_rate_per_gpu": state.best_request_rate_per_gpu, + "next_trial_index": state.next_trial_index, + "trial_count": len(state.trials), + }, + "candidate_set": candidate_set, + "decisions": { + "next_action": experiment_plan.get("next_action"), + "harness_proposal": harness_context.get("harness_proposal"), + "harness_stop": harness_context.get("harness_stop"), + "stop_authority": harness_context.get("stop_authority"), + }, + } + + def _latency_percentiles(summary: object, metric: str) -> dict[str, float]: if not isinstance(summary, dict): return {} @@ -289,6 +325,16 @@ def cmd_study_tune(args: argparse.Namespace) -> int: if study.llm.use_harness else None ) + if harness_context is not None: + store.write_harness_snapshot( + study.study_id, + f"candidate-set-{state.next_trial_index:04d}", + _harness_snapshot_payload( + study=study, + state=state, + harness_context=harness_context, + ), + ) prompt = build_prompt( study=study, window_summary=window_summary, diff --git a/src/aituner/store.py b/src/aituner/store.py index 1c970b6..c0aae71 100644 --- a/src/aituner/store.py +++ b/src/aituner/store.py @@ -27,7 +27,7 @@ class StudyStore: def init_study(self, *, spec_path: Path, study: StudySpec) -> Path: root = self.study_root(study.study_id) - for rel in ("prompts", "proposals", "trials", "results"): + for rel in ("prompts", "proposals", "trials", "results", "harness"): (root / rel).mkdir(parents=True, exist_ok=True) (root / "study_spec.source").write_text(str(spec_path.resolve()) + "\n", encoding="utf-8") self.write_json(root / "study_spec.snapshot.json", to_jsonable(study)) @@ -70,6 +70,16 @@ class StudyStore: self.write_json(path, to_jsonable(proposal)) return path + def write_harness_snapshot( + self, + study_id: str, + snapshot_name: str, + payload: dict[str, Any], + ) -> Path: + path = self.study_root(study_id) / "harness" / f"{snapshot_name}.json" + self.write_json(path, payload) + return path + def materialize_trial( self, *, diff --git a/tests/test_core_flow.py b/tests/test_core_flow.py index 9b8f064..3b3d797 100644 --- a/tests/test_core_flow.py +++ b/tests/test_core_flow.py @@ -6297,6 +6297,19 @@ class CoreFlowTests(unittest.TestCase): self.assertTrue(proposal_path.exists()) proposal = json.loads(proposal_path.read_text(encoding="utf-8")) self.assertTrue(proposal["should_stop"]) + snapshot_path = ( + store.study_root(study.study_id) + / "harness" + / "candidate-set-0005.json" + ) + self.assertTrue(snapshot_path.exists()) + snapshot = json.loads(snapshot_path.read_text(encoding="utf-8")) + self.assertEqual(snapshot["schema_version"], 1) + self.assertEqual(snapshot["iteration"], 5) + self.assertIn("candidate_set_hash", snapshot) + self.assertIn("candidate_set", snapshot) + self.assertIn("harness_stop", snapshot["decisions"]) + self.assertIn("stop_authority", snapshot["decisions"]) state = store.load_state(study.study_id) self.assertEqual(state.tuning_stop_reason, "harness_stop") self.assertEqual(