From 83325b2f7602b1df4c5e448a7946a94979eb4455 Mon Sep 17 00:00:00 2001 From: Gahow Wang Date: Sat, 11 Apr 2026 00:36:45 +0800 Subject: [PATCH] Reset new topology groups to full binary search --- src/aituner/llm.py | 5 +++-- src/aituner/store.py | 6 ------ tests/test_core_flow.py | 13 +++++++++++-- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/aituner/llm.py b/src/aituner/llm.py index f6f8f4b..79ac9ea 100644 --- a/src/aituner/llm.py +++ b/src/aituner/llm.py @@ -301,8 +301,9 @@ def build_prompt( "", "The primary cross-topology comparison metric is request_rate_per_gpu, not raw request_rate.", "The proposal should beat the incumbent on request_rate_per_gpu under the 95%+ SLO target.", - "The evaluator uses the best feasible sampling_u from the same tp_dp_product group when it exists. For a different tp_dp_product group, it uses a scaled lower floor instead of reusing the global incumbent directly.", - "Do not assume a configuration with fewer GPUs must start from the global incumbent sampling_u.", + "The evaluator uses the best feasible sampling_u from the same tp_dp_product group when it exists.", + "If a tp_dp_product group has no history yet, the evaluator starts from the study's original search.low and runs a full binary search for that group.", + "Do not assume a configuration with fewer GPUs should inherit the global incumbent sampling_u.", ] return "\n".join(sections) diff --git a/src/aituner/store.py b/src/aituner/store.py index bb9e49e..c023e71 100644 --- a/src/aituner/store.py +++ b/src/aituner/store.py @@ -253,12 +253,6 @@ def _derive_search_floor(*, study: StudySpec, state: StudyState, parallel_size: group_incumbent.get("best_sampling_u"), (int, float) ): candidate = float(group_incumbent["best_sampling_u"]) - elif ( - isinstance(state.best_sampling_u, (int, float)) - and isinstance(state.best_parallel_size, int) - and state.best_parallel_size > 0 - ): - candidate = float(state.best_sampling_u) * float(parallel_size) / float(state.best_parallel_size) else: candidate = low return min(high, max(low, candidate)) diff --git a/tests/test_core_flow.py b/tests/test_core_flow.py index 1e7b856..9e7e484 100644 --- a/tests/test_core_flow.py +++ b/tests/test_core_flow.py @@ -1061,6 +1061,15 @@ class CoreFlowTests(unittest.TestCase): best_request_rate=3.0, best_request_rate_per_gpu=0.75, next_trial_index=2, + best_by_parallel_size={ + "4": { + "trial_id": "trial-0001", + "parallel_size": 4, + "best_sampling_u": 0.375, + "best_request_rate": 3.0, + "best_request_rate_per_gpu": 0.75, + } + }, trials=[], ) proposal = Proposal.from_dict( @@ -1112,7 +1121,7 @@ class CoreFlowTests(unittest.TestCase): trial, _ = store.materialize_trial(study=study, state=state, proposal=proposal) self.assertEqual(trial.search.low, 0.125) - def test_materialize_trial_scales_search_floor_for_different_parallel_group(self) -> None: + def test_materialize_trial_resets_search_floor_for_new_parallel_group(self) -> None: with tempfile.TemporaryDirectory() as tmp: tmp_path = Path(tmp) study_path = _write_study_assets(tmp_path) @@ -1138,7 +1147,7 @@ class CoreFlowTests(unittest.TestCase): } ) trial, _ = store.materialize_trial(study=study, state=state, proposal=proposal) - self.assertEqual(trial.search.low, 0.2) + self.assertEqual(trial.search.low, study.search.low) def test_ingest_trial_results_records_failure_reason(self) -> None: with tempfile.TemporaryDirectory() as tmp: