diff --git a/src/aituner/llm.py b/src/aituner/llm.py index 3d6dece..1dce926 100644 --- a/src/aituner/llm.py +++ b/src/aituner/llm.py @@ -342,8 +342,11 @@ def build_prompt( [ "The primary cross-topology comparison metric is request_rate_per_gpu, not raw request_rate.", "The proposal should beat the incumbent on request_rate_per_gpu under the 95%+ SLO target.", - "The evaluator uses the best feasible sampling_u from the same tp_dp_product group when it exists.", - "If a tp_dp_product group has no history yet, the evaluator starts from the study's original search.low and runs a full binary search for that group.", + ( + "The evaluator may use the same tp_dp_product incumbent as the search floor when search.inherit_incumbent_floor=true." + if study.search.inherit_incumbent_floor + else "The evaluator runs each proposal over the full configured search range so raw per-iteration performance is measured directly." + ), "Do not assume a configuration with fewer GPUs should inherit the global incumbent sampling_u.", ( "Follow the active harness. Prefer stop over a weak exploratory proposal once a good incumbent has converged." diff --git a/src/aituner/spec.py b/src/aituner/spec.py index c743977..77d99e4 100644 --- a/src/aituner/spec.py +++ b/src/aituner/spec.py @@ -511,6 +511,7 @@ class SamplingSearchSpec: tolerance: float max_probes: int sample_seed: int + inherit_incumbent_floor: bool = False @classmethod def from_dict(cls, data: Mapping[str, Any]) -> "SamplingSearchSpec": @@ -524,6 +525,10 @@ class SamplingSearchSpec: sample_seed=_require_int( data.get("sample_seed", 20260325), context="search.sample_seed" ), + inherit_incumbent_floor=_require_bool( + data.get("inherit_incumbent_floor", False), + context="search.inherit_incumbent_floor", + ), ) diff --git a/src/aituner/store.py b/src/aituner/store.py index 0cc33d6..fb923e1 100644 --- a/src/aituner/store.py +++ b/src/aituner/store.py @@ -85,11 +85,21 @@ class StudyStore: trial_root = self.study_root(study.study_id) / "trials" / trial_id trial_root.mkdir(parents=True, exist_ok=True) parallel_size = _parallel_size_for_proposal(study=study, proposal=proposal) + search = study.search + if study.search.inherit_incumbent_floor: + search = replace( + study.search, + low=_derive_search_floor( + study=study, + state=state, + parallel_size=parallel_size, + ), + ) spec = TrialSpec( study_id=study.study_id, trial_id=trial_id, config_patch=proposal.config_patch, - search=study.search, + search=search, study_spec_path=str((self.study_root(study.study_id) / "study_spec.source").resolve()), artifact_dir=str(trial_root), probe_log_path=str(trial_root / "probe_history.json"), @@ -301,3 +311,15 @@ def _request_rate_per_gpu(best_request_rate: Any, parallel_size: int | None) -> return None return float(best_request_rate) / float(parallel_size) + +def _derive_search_floor(*, study: StudySpec, state: StudyState, parallel_size: int) -> float: + low = study.search.low + high = study.search.high + group_incumbent = (state.best_by_parallel_size or {}).get(str(parallel_size)) + if isinstance(group_incumbent, dict) and isinstance( + group_incumbent.get("best_sampling_u"), (int, float) + ): + candidate = float(group_incumbent["best_sampling_u"]) + else: + candidate = low + return min(high, max(low, candidate)) diff --git a/tests/test_core_flow.py b/tests/test_core_flow.py index 8e20b1f..8c704d9 100644 --- a/tests/test_core_flow.py +++ b/tests/test_core_flow.py @@ -2248,6 +2248,47 @@ class CoreFlowTests(unittest.TestCase): trial, _ = store.materialize_trial(study=study, state=state, proposal=proposal) self.assertEqual(trial.search.low, study.search.low) + def test_materialize_trial_can_use_incumbent_floor_when_enabled(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + tmp_path = Path(tmp) + study_path = _write_study_assets(tmp_path) + payload = json.loads(study_path.read_text(encoding="utf-8")) + payload["search"]["inherit_incumbent_floor"] = True + study_path.write_text(json.dumps(payload), encoding="utf-8") + study = load_study_spec(study_path) + store = StudyStore(tmp_path / ".aituner" / "studies") + store.init_study(spec_path=study_path, study=study) + state = StudyState( + study_id=study.study_id, + best_trial_id="trial-0001", + best_parallel_size=4, + best_sampling_u=0.375, + best_request_rate=3.0, + best_request_rate_per_gpu=0.75, + next_trial_index=2, + best_by_parallel_size={ + "4": { + "trial_id": "trial-0001", + "parallel_size": 4, + "best_sampling_u": 0.375, + "best_request_rate": 3.0, + "best_request_rate_per_gpu": 0.75, + } + }, + trials=[], + ) + proposal = Proposal.from_dict( + { + "observation": "Obs", + "diagnosis": "Diag", + "config_patch": {"env_patch": {}, "flag_patch": {"tensor-parallel-size": 4}}, + "expected_effects": ["raise rate"], + } + ) + trial, _ = store.materialize_trial(study=study, state=state, proposal=proposal) + self.assertEqual(trial.search.low, 0.375) + self.assertTrue(trial.search.inherit_incumbent_floor) + def test_materialize_trial_resets_search_floor_for_new_parallel_group(self) -> None: with tempfile.TemporaryDirectory() as tmp: tmp_path = Path(tmp)