Add auto search high measurement policy
This commit is contained in:
@@ -51,7 +51,7 @@ from aituner.spec import (
|
||||
TrialSummary,
|
||||
load_study_spec,
|
||||
)
|
||||
from aituner.store import StudyStore
|
||||
from aituner.store import StudyStore, resolve_auto_high_search
|
||||
from aituner.trace import load_trace_requests, summarize_window
|
||||
from aituner.worker import (
|
||||
_adaptive_replay_set,
|
||||
@@ -79,6 +79,7 @@ def _write_study_assets(
|
||||
trace_overrides: dict[str, object] | None = None,
|
||||
slo_overrides: dict[str, object] | None = None,
|
||||
engine_overrides: dict[str, object] | None = None,
|
||||
search_overrides: dict[str, object] | None = None,
|
||||
) -> Path:
|
||||
trace_dir = tmp_path / "trace_windows" / "traces"
|
||||
trace_dir.mkdir(parents=True)
|
||||
@@ -196,6 +197,8 @@ def _write_study_assets(
|
||||
study_payload["slo"].update(slo_overrides)
|
||||
if engine_overrides:
|
||||
study_payload["engine"].update(engine_overrides)
|
||||
if search_overrides:
|
||||
study_payload["search"].update(search_overrides)
|
||||
study_path.write_text(json.dumps(study_payload), encoding="utf-8")
|
||||
return study_path
|
||||
|
||||
@@ -260,6 +263,76 @@ class CoreFlowTests(unittest.TestCase):
|
||||
self.assertIn("knob_harnesses", prompt)
|
||||
self.assertTrue(study_root.exists())
|
||||
|
||||
def test_search_auto_high_schema_is_backward_compatible(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
study_path = _write_study_assets(
|
||||
Path(tmp),
|
||||
search_overrides={"high": 0.4},
|
||||
)
|
||||
study = load_study_spec(study_path)
|
||||
self.assertFalse(study.search.auto_high.enabled)
|
||||
updated, evidence = resolve_auto_high_search(
|
||||
search=study.search,
|
||||
sampling_us=[0.1, 0.9],
|
||||
)
|
||||
self.assertEqual(updated.high, 0.4)
|
||||
self.assertEqual(evidence["reason"], "auto_high_disabled")
|
||||
|
||||
def test_search_auto_high_caps_at_policy_and_trace(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
study_path = _write_study_assets(
|
||||
Path(tmp),
|
||||
search_overrides={
|
||||
"high": 0.2,
|
||||
"auto_high": {
|
||||
"enabled": True,
|
||||
"max_sampling_u": 0.8,
|
||||
"require_human_confirmation_beyond_trace": True,
|
||||
},
|
||||
},
|
||||
)
|
||||
study = load_study_spec(study_path)
|
||||
capped_by_policy, policy_evidence = resolve_auto_high_search(
|
||||
search=study.search,
|
||||
sampling_us=[0.1, 0.9],
|
||||
)
|
||||
self.assertEqual(capped_by_policy.high, 0.8)
|
||||
self.assertEqual(
|
||||
policy_evidence["reason"],
|
||||
"search_high_raised_to_trace_ceiling",
|
||||
)
|
||||
|
||||
capped_by_trace, trace_evidence = resolve_auto_high_search(
|
||||
search=study.search,
|
||||
sampling_us=[0.1, 0.7],
|
||||
)
|
||||
self.assertEqual(capped_by_trace.high, 0.7)
|
||||
self.assertEqual(trace_evidence["effective_ceiling"], 0.7)
|
||||
|
||||
high_search = study.search.__class__.from_dict(
|
||||
{
|
||||
"low": 0.0,
|
||||
"high": 0.95,
|
||||
"tolerance": study.search.tolerance,
|
||||
"max_probes": study.search.max_probes,
|
||||
"sample_seed": study.search.sample_seed,
|
||||
"auto_high": {
|
||||
"enabled": True,
|
||||
"max_sampling_u": 0.8,
|
||||
"require_human_confirmation_beyond_trace": True,
|
||||
},
|
||||
}
|
||||
)
|
||||
lowered, lowered_evidence = resolve_auto_high_search(
|
||||
search=high_search,
|
||||
sampling_us=[0.1, 0.9],
|
||||
)
|
||||
self.assertEqual(lowered.high, 0.8)
|
||||
self.assertEqual(
|
||||
lowered_evidence["reason"],
|
||||
"search_high_lowered_to_trace_ceiling",
|
||||
)
|
||||
|
||||
def test_lca_workload_profile_uses_standard_10d_features(self) -> None:
|
||||
window = WindowRecord(
|
||||
window_id="w1",
|
||||
@@ -1381,11 +1454,17 @@ class CoreFlowTests(unittest.TestCase):
|
||||
window_summary={"prompt_tokens_p95": 2048},
|
||||
state=state,
|
||||
)
|
||||
self.assertTrue(context["harness_stop"]["should_stop"])
|
||||
self.assertEqual(context["harness_stop"]["reason"], "search_high_saturated_by_incumbent")
|
||||
self.assertFalse(context["harness_stop"]["should_stop"])
|
||||
self.assertEqual(
|
||||
context["harness_stop"]["reason"],
|
||||
"search_high_saturation_requires_parallel_size_evidence",
|
||||
)
|
||||
self.assertEqual(
|
||||
context["harness_stop"]["evidence"]["objective"],
|
||||
"request_rate_per_gpu",
|
||||
)
|
||||
proposal = build_harness_stop_proposal(context)
|
||||
self.assertIsNotNone(proposal)
|
||||
self.assertTrue(proposal.should_stop)
|
||||
self.assertIsNone(proposal)
|
||||
|
||||
def test_harness_stop_allows_feasible_high_probe_with_some_failures(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
@@ -1446,8 +1525,11 @@ class CoreFlowTests(unittest.TestCase):
|
||||
window_summary={"prompt_tokens_p95": 2048},
|
||||
state=state,
|
||||
)
|
||||
self.assertTrue(context["harness_stop"]["should_stop"])
|
||||
self.assertEqual(context["harness_stop"]["reason"], "search_high_saturated_by_incumbent")
|
||||
self.assertFalse(context["harness_stop"]["should_stop"])
|
||||
self.assertEqual(
|
||||
context["harness_stop"]["reason"],
|
||||
"search_high_saturation_requires_parallel_size_evidence",
|
||||
)
|
||||
|
||||
def test_harness_guided_first_tp_probe_for_latency_bottleneck(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
@@ -4498,7 +4580,9 @@ class CoreFlowTests(unittest.TestCase):
|
||||
with mock.patch("aituner.worker._wait_for_server_or_exit", return_value=None):
|
||||
with mock.patch("aituner.worker._terminate_process_tree", return_value=None):
|
||||
with mock.patch("aituner.worker._replay_requests", side_effect=fake_replay):
|
||||
result = run_trial(Path(trial.artifact_dir) / "trial_spec.json")
|
||||
result = run_trial(
|
||||
Path(trial.artifact_dir) / "trial_spec.json"
|
||||
)
|
||||
|
||||
self.assertEqual(result["status"], "completed")
|
||||
details_path = Path(trial.artifact_dir) / "probe_details.jsonl"
|
||||
@@ -4512,6 +4596,60 @@ class CoreFlowTests(unittest.TestCase):
|
||||
self.assertEqual(rows[0]["outcomes"][0]["request_id"], "r1")
|
||||
self.assertEqual(rows[0]["outcomes"][0]["sampling_u"], 0.1)
|
||||
|
||||
def test_run_trial_marks_full_trace_saturation_as_measurement_ceiling_insufficient(
|
||||
self,
|
||||
) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
study_path = _write_study_assets(tmp_path)
|
||||
study = load_study_spec(study_path)
|
||||
store = StudyStore(tmp_path / ".aituner" / "studies")
|
||||
store.init_study(spec_path=study_path, study=study)
|
||||
state = store.load_state(study.study_id)
|
||||
proposal = Proposal.from_dict(
|
||||
{
|
||||
"observation": "baseline",
|
||||
"diagnosis": "baseline",
|
||||
"config_patch": {"env_patch": {}, "flag_patch": {}},
|
||||
"expected_effects": ["measure"],
|
||||
}
|
||||
)
|
||||
trial, _ = store.materialize_trial(study=study, state=state, proposal=proposal)
|
||||
|
||||
def fake_replay(requests, **kwargs):
|
||||
return (
|
||||
[
|
||||
RequestOutcome(
|
||||
request_id=request.row_id,
|
||||
success=True,
|
||||
ttft_ms=10.0,
|
||||
tpot_ms=5.0,
|
||||
prompt_tokens=request.prompt_tokens_hint,
|
||||
completion_tokens=request.completion_tokens_hint,
|
||||
)
|
||||
for request in requests
|
||||
],
|
||||
False,
|
||||
"",
|
||||
)
|
||||
|
||||
process = mock.Mock()
|
||||
process.poll.return_value = 0
|
||||
with mock.patch("aituner.worker.subprocess.Popen", return_value=process):
|
||||
with mock.patch("aituner.worker._wait_for_server_or_exit", return_value=None):
|
||||
with mock.patch("aituner.worker._terminate_process_tree", return_value=None):
|
||||
with mock.patch(
|
||||
"aituner.worker._replay_requests",
|
||||
side_effect=fake_replay,
|
||||
):
|
||||
result = run_trial(Path(trial.artifact_dir) / "trial_spec.json")
|
||||
|
||||
self.assertEqual(result["status"], "completed")
|
||||
self.assertEqual(result["best_request_count"], 3)
|
||||
self.assertTrue(result["measurement"]["measurement_ceiling_insufficient"])
|
||||
self.assertEqual(result["measurement"]["reason"], "measurement_ceiling_insufficient")
|
||||
self.assertIn("auto_high_resolution", result["measurement"])
|
||||
|
||||
def test_run_trial_falls_back_below_inherited_search_floor(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
|
||||
Reference in New Issue
Block a user