Use full state for frontier projection

2026-06-29 16:22:09 +08:00
parent 8dd9ada194
commit 9ef9550214
2 changed files with 89 additions and 6 deletions
--- a/src/aituner/harness.py
+++ b/src/aituner/harness.py
@@ -29,6 +29,7 @@ _RUNTIME_KEYS = {
 _STRONG_INCUMBENT_MIN_GAIN = 1.8
 _MIN_POST_INCUMBENT_VALIDATION_TRIALS = 2
 _VALIDATION_TRIALS_WITHOUT_FAMILY_COVERAGE = 3
+_STATEFUL_HISTORY_LIMIT = 8
 # Decode-bound throughput is frequently KV-cache limited, so more gpu-memory-utilization
 # yields more KV blocks and more concurrent decode. Hill-climb in small steps toward a
 # safe ceiling and let measurement find the real peak: a too-high target regresses or
@@ -428,9 +429,14 @@ def _knob_harnesses(
    return harnesses


-def _recent_trial_diagnostics(state: StudyState) -> list[dict[str, Any]]:
+def _recent_trial_diagnostics(
+    state: StudyState,
+    *,
+    limit: int | None = _STATEFUL_HISTORY_LIMIT,
+) -> list[dict[str, Any]]:
    diagnostics: list[dict[str, Any]] = []
-    for trial in state.trials[-stateful_history_limit() :]:
+    trials = state.trials if limit is None else state.trials[-limit:]
+    for trial in trials:
        item: dict[str, Any] = {
            "trial_id": trial.trial_id,
            "status": trial.status,
@@ -629,7 +635,7 @@ def _rank_bottleneck_hypotheses(


 def stateful_history_limit() -> int:
-    return 8
+    return _STATEFUL_HISTORY_LIMIT


 def _state_completed_trials_with_rates(state: StudyState) -> list[TrialSummary]:
@@ -1121,6 +1127,7 @@ def _candidate_actions(
    candidates.extend(
        _frontier_delta_projection_actions(
            study,
+            state,
            trial_profiles,
            top_bottleneck,
            bottleneck_hypotheses,
@@ -1641,6 +1648,7 @@ def _runtime_candidate_actions(

 def _frontier_delta_projection_actions(
    study: StudySpec,
+    state: StudyState,
    trial_profiles: list[dict[str, Any]],
    top_bottleneck: str,
    bottleneck_hypotheses: list[dict[str, Any]],
@@ -1649,10 +1657,11 @@ def _frontier_delta_projection_actions(
 ) -> list[dict[str, Any]]:
    if not (set(study.engine.tunable_flags) & _RUNTIME_KEYS):
        return []
-    anchors = _pareto_frontier_anchor_profiles(study, trial_profiles)
+    projection_profiles = _frontier_projection_profiles(study, state, trial_profiles)
+    anchors = _pareto_frontier_anchor_profiles(study, projection_profiles)
    if len(anchors) < 2:
        return []
-    deltas = _positive_runtime_delta_records(study, trial_profiles)
+    deltas = _positive_runtime_delta_records(study, projection_profiles)
    if not deltas:
        return []

@@ -1661,7 +1670,7 @@ def _frontier_delta_projection_actions(
    incumbent_rate = max(
        (
            _profile_request_rate_per_gpu(profile)
-            for profile in trial_profiles
+            for profile in projection_profiles
            if profile.get("status") == "completed"
        ),
        default=0.0,
@@ -1813,6 +1822,19 @@ def _frontier_delta_projection_actions(
    return actions[:8]


+def _frontier_projection_profiles(
+    study: StudySpec,
+    state: StudyState,
+    trial_profiles: list[dict[str, Any]],
+) -> list[dict[str, Any]]:
+    if len(state.trials) <= len(trial_profiles):
+        return trial_profiles
+    return _trial_profiles(
+        study,
+        _recent_trial_diagnostics(state, limit=None),
+    )
+
+
 def _pareto_frontier_anchor_profiles(
    study: StudySpec,
    trial_profiles: list[dict[str, Any]],
--- a/tests/test_core_flow.py
+++ b/tests/test_core_flow.py
@@ -2856,6 +2856,67 @@ class CoreFlowTests(unittest.TestCase):
                            },
                        },
                    ),
+                    TrialSummary(
+                        trial_id="trial-0006",
+                        status="completed",
+                        parallel_size=4,
+                        best_request_rate=8.0,
+                        best_request_rate_per_gpu=2.0,
+                        config_patch={
+                            "env_patch": {},
+                            "flag_patch": {
+                                "tensor-parallel-size": 4,
+                                "gpu-memory-utilization": 0.9,
+                                "max-num-seqs": 16,
+                            },
+                        },
+                    ),
+                    TrialSummary(
+                        trial_id="trial-0007",
+                        status="completed",
+                        parallel_size=4,
+                        best_request_rate=8.0,
+                        best_request_rate_per_gpu=2.0,
+                        config_patch={
+                            "env_patch": {},
+                            "flag_patch": {
+                                "tensor-parallel-size": 4,
+                                "gpu-memory-utilization": 0.92,
+                            },
+                        },
+                    ),
+                    TrialSummary(
+                        trial_id="trial-0008",
+                        status="completed",
+                        parallel_size=4,
+                        best_request_rate=8.0,
+                        best_request_rate_per_gpu=2.0,
+                        config_patch={
+                            "env_patch": {},
+                            "flag_patch": {
+                                "tensor-parallel-size": 4,
+                                "gpu-memory-utilization": 0.9,
+                                "max-num-batched-tokens": 16384,
+                                "max-num-seqs": 16,
+                            },
+                        },
+                    ),
+                    TrialSummary(
+                        trial_id="trial-0009",
+                        status="completed",
+                        parallel_size=4,
+                        best_request_rate=8.0,
+                        best_request_rate_per_gpu=2.0,
+                        config_patch={
+                            "env_patch": {},
+                            "flag_patch": {
+                                "tensor-parallel-size": 4,
+                                "gpu-memory-utilization": 0.9,
+                                "enable-chunked-prefill": True,
+                                "max-num-batched-tokens": 8192,
+                            },
+                        },
+                    ),
                ],
            )
            context = build_harness_context(