Add harness early stop ablation
This commit is contained in:
@@ -13,7 +13,7 @@ from aituner.compare import load_compare_spec, run_compare
|
||||
from aituner.engine import build_launch_recipe
|
||||
from aituner.http_client import _auth_headers, _openai_url, _should_bypass_proxy
|
||||
from aituner.job import append_job, build_trial_job
|
||||
from aituner.harness import build_harness_context
|
||||
from aituner.harness import build_harness_context, build_harness_stop_proposal
|
||||
from aituner.llm import _extract_response_text, build_prompt, parse_proposal_text, validate_proposal
|
||||
from aituner.search import ThresholdProbe, binary_search_max_feasible
|
||||
from aituner.slo import RequestOutcome, evaluate_request, summarize_evaluations
|
||||
@@ -422,6 +422,119 @@ class CoreFlowTests(unittest.TestCase):
|
||||
)
|
||||
self.assertIn("validate", guard["recommended_next_action"])
|
||||
|
||||
def test_harness_stop_after_post_incumbent_validation_is_exhausted(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
study_path = _write_study_assets(tmp_path)
|
||||
study = load_study_spec(study_path)
|
||||
state = StudyState(
|
||||
study_id=study.study_id,
|
||||
best_trial_id="trial-0002",
|
||||
best_parallel_size=8,
|
||||
best_sampling_u=0.02,
|
||||
best_request_rate=2.4,
|
||||
best_request_rate_per_gpu=0.3,
|
||||
trials=[
|
||||
TrialSummary(
|
||||
trial_id="trial-0001",
|
||||
status="completed",
|
||||
parallel_size=8,
|
||||
best_request_rate=0.8,
|
||||
best_request_rate_per_gpu=0.1,
|
||||
config_patch={"env_patch": {}, "flag_patch": {}},
|
||||
),
|
||||
TrialSummary(
|
||||
trial_id="trial-0002",
|
||||
status="completed",
|
||||
parallel_size=8,
|
||||
best_request_rate=2.4,
|
||||
best_request_rate_per_gpu=0.3,
|
||||
config_patch={
|
||||
"env_patch": {},
|
||||
"flag_patch": {
|
||||
"tensor-parallel-size": 2,
|
||||
"data-parallel-size": 4,
|
||||
},
|
||||
},
|
||||
),
|
||||
TrialSummary(
|
||||
trial_id="trial-0003",
|
||||
status="completed",
|
||||
parallel_size=8,
|
||||
config_patch={
|
||||
"env_patch": {},
|
||||
"flag_patch": {
|
||||
"tensor-parallel-size": 1,
|
||||
"data-parallel-size": 8,
|
||||
},
|
||||
},
|
||||
),
|
||||
TrialSummary(
|
||||
trial_id="trial-0004",
|
||||
status="completed",
|
||||
parallel_size=8,
|
||||
config_patch={
|
||||
"env_patch": {},
|
||||
"flag_patch": {"max-num-seqs": 160},
|
||||
},
|
||||
),
|
||||
],
|
||||
)
|
||||
context = build_harness_context(
|
||||
study=study,
|
||||
window_summary={"prompt_tokens_p95": 2048},
|
||||
state=state,
|
||||
)
|
||||
self.assertTrue(context["harness_stop"]["should_stop"])
|
||||
self.assertEqual(context["harness_stop"]["reason"], "post_incumbent_validation_exhausted")
|
||||
proposal = build_harness_stop_proposal(context)
|
||||
self.assertIsNotNone(proposal)
|
||||
self.assertTrue(proposal.should_stop)
|
||||
|
||||
def test_harness_does_not_stop_immediately_after_strong_incumbent(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
study_path = _write_study_assets(tmp_path)
|
||||
study = load_study_spec(study_path)
|
||||
state = StudyState(
|
||||
study_id=study.study_id,
|
||||
best_trial_id="trial-0002",
|
||||
best_parallel_size=8,
|
||||
best_request_rate=2.4,
|
||||
best_request_rate_per_gpu=0.3,
|
||||
trials=[
|
||||
TrialSummary(
|
||||
trial_id="trial-0001",
|
||||
status="completed",
|
||||
parallel_size=8,
|
||||
best_request_rate=0.8,
|
||||
best_request_rate_per_gpu=0.1,
|
||||
config_patch={"env_patch": {}, "flag_patch": {}},
|
||||
),
|
||||
TrialSummary(
|
||||
trial_id="trial-0002",
|
||||
status="completed",
|
||||
parallel_size=8,
|
||||
best_request_rate=2.4,
|
||||
best_request_rate_per_gpu=0.3,
|
||||
config_patch={
|
||||
"env_patch": {},
|
||||
"flag_patch": {
|
||||
"tensor-parallel-size": 2,
|
||||
"data-parallel-size": 4,
|
||||
},
|
||||
},
|
||||
),
|
||||
],
|
||||
)
|
||||
context = build_harness_context(
|
||||
study=study,
|
||||
window_summary={"prompt_tokens_p95": 2048},
|
||||
state=state,
|
||||
)
|
||||
self.assertFalse(context["harness_stop"]["should_stop"])
|
||||
self.assertIsNone(build_harness_stop_proposal(context))
|
||||
|
||||
def test_trace_input_length_filter_keeps_only_matching_rows(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
@@ -564,6 +677,26 @@ class CoreFlowTests(unittest.TestCase):
|
||||
"\n".join(context["proposal_rules"]),
|
||||
)
|
||||
|
||||
def test_prompt_can_disable_harness_for_ablation(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
study_path = _write_study_assets(tmp_path)
|
||||
payload = json.loads(study_path.read_text(encoding="utf-8"))
|
||||
payload["llm"]["use_harness"] = False
|
||||
study_path.write_text(json.dumps(payload), encoding="utf-8")
|
||||
study = load_study_spec(study_path)
|
||||
window, requests = load_trace_requests(study, study_spec_path=study_path)
|
||||
prompt = build_prompt(
|
||||
study=study,
|
||||
window_summary=summarize_window(requests, window),
|
||||
state=StudyState(study_id=study.study_id),
|
||||
capability_profile=None,
|
||||
)
|
||||
self.assertFalse(study.llm.use_harness)
|
||||
self.assertIn("Disabled by llm.use_harness=false", prompt)
|
||||
self.assertNotIn('"paper_alignment"', prompt)
|
||||
self.assertIn("without harness hints", prompt)
|
||||
|
||||
def test_harness_uses_prior_infeasible_probe_for_active_bottleneck(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
@@ -2299,6 +2432,98 @@ class CoreFlowTests(unittest.TestCase):
|
||||
state = store.load_state("study-1")
|
||||
self.assertEqual(state.next_trial_index, 1)
|
||||
|
||||
def test_cli_tune_uses_harness_stop_before_llm(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
study_path = _write_study_assets(tmp_path)
|
||||
study = load_study_spec(study_path)
|
||||
store_root = tmp_path / "store"
|
||||
store = StudyStore(store_root)
|
||||
store.init_study(spec_path=study_path, study=study)
|
||||
store.save_state(
|
||||
StudyState(
|
||||
study_id=study.study_id,
|
||||
best_trial_id="trial-0002",
|
||||
best_parallel_size=8,
|
||||
best_sampling_u=0.02,
|
||||
best_request_rate=2.4,
|
||||
best_request_rate_per_gpu=0.3,
|
||||
next_trial_index=5,
|
||||
trials=[
|
||||
TrialSummary(
|
||||
trial_id="trial-0001",
|
||||
status="completed",
|
||||
parallel_size=8,
|
||||
best_request_rate=0.8,
|
||||
best_request_rate_per_gpu=0.1,
|
||||
config_patch={"env_patch": {}, "flag_patch": {}},
|
||||
),
|
||||
TrialSummary(
|
||||
trial_id="trial-0002",
|
||||
status="completed",
|
||||
parallel_size=8,
|
||||
best_request_rate=2.4,
|
||||
best_request_rate_per_gpu=0.3,
|
||||
config_patch={
|
||||
"env_patch": {},
|
||||
"flag_patch": {
|
||||
"tensor-parallel-size": 2,
|
||||
"data-parallel-size": 4,
|
||||
},
|
||||
},
|
||||
),
|
||||
TrialSummary(
|
||||
trial_id="trial-0003",
|
||||
status="completed",
|
||||
parallel_size=8,
|
||||
config_patch={
|
||||
"env_patch": {},
|
||||
"flag_patch": {
|
||||
"tensor-parallel-size": 1,
|
||||
"data-parallel-size": 8,
|
||||
},
|
||||
},
|
||||
),
|
||||
TrialSummary(
|
||||
trial_id="trial-0004",
|
||||
status="completed",
|
||||
parallel_size=8,
|
||||
config_patch={
|
||||
"env_patch": {},
|
||||
"flag_patch": {"max-num-seqs": 160},
|
||||
},
|
||||
),
|
||||
],
|
||||
)
|
||||
)
|
||||
|
||||
with mock.patch("aituner.cli.call_llm_for_proposal") as llm_mock:
|
||||
with mock.patch("aituner.cli.run_trial") as run_trial_mock:
|
||||
exit_code = cli_main(
|
||||
[
|
||||
"study",
|
||||
"tune",
|
||||
"--spec",
|
||||
str(study_path),
|
||||
"--store-root",
|
||||
str(store_root),
|
||||
"--max-trials",
|
||||
"1",
|
||||
]
|
||||
)
|
||||
|
||||
self.assertEqual(exit_code, 0)
|
||||
llm_mock.assert_not_called()
|
||||
run_trial_mock.assert_not_called()
|
||||
proposal_path = (
|
||||
store.study_root(study.study_id)
|
||||
/ "proposals"
|
||||
/ "harness-stop-0005.json"
|
||||
)
|
||||
self.assertTrue(proposal_path.exists())
|
||||
proposal = json.loads(proposal_path.read_text(encoding="utf-8"))
|
||||
self.assertTrue(proposal["should_stop"])
|
||||
|
||||
def test_cli_tune_evaluates_baseline_before_llm_proposal(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
|
||||
Reference in New Issue
Block a user