Add harness early stop ablation

This commit is contained in:
2026-05-02 08:08:14 +08:00
parent 6d3459c82d
commit 1a3d628268
9 changed files with 837 additions and 29 deletions

View File

@@ -13,7 +13,7 @@ from aituner.compare import load_compare_spec, run_compare
from aituner.engine import build_launch_recipe
from aituner.http_client import _auth_headers, _openai_url, _should_bypass_proxy
from aituner.job import append_job, build_trial_job
from aituner.harness import build_harness_context
from aituner.harness import build_harness_context, build_harness_stop_proposal
from aituner.llm import _extract_response_text, build_prompt, parse_proposal_text, validate_proposal
from aituner.search import ThresholdProbe, binary_search_max_feasible
from aituner.slo import RequestOutcome, evaluate_request, summarize_evaluations
@@ -422,6 +422,119 @@ class CoreFlowTests(unittest.TestCase):
)
self.assertIn("validate", guard["recommended_next_action"])
def test_harness_stop_after_post_incumbent_validation_is_exhausted(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
study_path = _write_study_assets(tmp_path)
study = load_study_spec(study_path)
state = StudyState(
study_id=study.study_id,
best_trial_id="trial-0002",
best_parallel_size=8,
best_sampling_u=0.02,
best_request_rate=2.4,
best_request_rate_per_gpu=0.3,
trials=[
TrialSummary(
trial_id="trial-0001",
status="completed",
parallel_size=8,
best_request_rate=0.8,
best_request_rate_per_gpu=0.1,
config_patch={"env_patch": {}, "flag_patch": {}},
),
TrialSummary(
trial_id="trial-0002",
status="completed",
parallel_size=8,
best_request_rate=2.4,
best_request_rate_per_gpu=0.3,
config_patch={
"env_patch": {},
"flag_patch": {
"tensor-parallel-size": 2,
"data-parallel-size": 4,
},
},
),
TrialSummary(
trial_id="trial-0003",
status="completed",
parallel_size=8,
config_patch={
"env_patch": {},
"flag_patch": {
"tensor-parallel-size": 1,
"data-parallel-size": 8,
},
},
),
TrialSummary(
trial_id="trial-0004",
status="completed",
parallel_size=8,
config_patch={
"env_patch": {},
"flag_patch": {"max-num-seqs": 160},
},
),
],
)
context = build_harness_context(
study=study,
window_summary={"prompt_tokens_p95": 2048},
state=state,
)
self.assertTrue(context["harness_stop"]["should_stop"])
self.assertEqual(context["harness_stop"]["reason"], "post_incumbent_validation_exhausted")
proposal = build_harness_stop_proposal(context)
self.assertIsNotNone(proposal)
self.assertTrue(proposal.should_stop)
def test_harness_does_not_stop_immediately_after_strong_incumbent(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
study_path = _write_study_assets(tmp_path)
study = load_study_spec(study_path)
state = StudyState(
study_id=study.study_id,
best_trial_id="trial-0002",
best_parallel_size=8,
best_request_rate=2.4,
best_request_rate_per_gpu=0.3,
trials=[
TrialSummary(
trial_id="trial-0001",
status="completed",
parallel_size=8,
best_request_rate=0.8,
best_request_rate_per_gpu=0.1,
config_patch={"env_patch": {}, "flag_patch": {}},
),
TrialSummary(
trial_id="trial-0002",
status="completed",
parallel_size=8,
best_request_rate=2.4,
best_request_rate_per_gpu=0.3,
config_patch={
"env_patch": {},
"flag_patch": {
"tensor-parallel-size": 2,
"data-parallel-size": 4,
},
},
),
],
)
context = build_harness_context(
study=study,
window_summary={"prompt_tokens_p95": 2048},
state=state,
)
self.assertFalse(context["harness_stop"]["should_stop"])
self.assertIsNone(build_harness_stop_proposal(context))
def test_trace_input_length_filter_keeps_only_matching_rows(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
@@ -564,6 +677,26 @@ class CoreFlowTests(unittest.TestCase):
"\n".join(context["proposal_rules"]),
)
def test_prompt_can_disable_harness_for_ablation(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
study_path = _write_study_assets(tmp_path)
payload = json.loads(study_path.read_text(encoding="utf-8"))
payload["llm"]["use_harness"] = False
study_path.write_text(json.dumps(payload), encoding="utf-8")
study = load_study_spec(study_path)
window, requests = load_trace_requests(study, study_spec_path=study_path)
prompt = build_prompt(
study=study,
window_summary=summarize_window(requests, window),
state=StudyState(study_id=study.study_id),
capability_profile=None,
)
self.assertFalse(study.llm.use_harness)
self.assertIn("Disabled by llm.use_harness=false", prompt)
self.assertNotIn('"paper_alignment"', prompt)
self.assertIn("without harness hints", prompt)
def test_harness_uses_prior_infeasible_probe_for_active_bottleneck(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
@@ -2299,6 +2432,98 @@ class CoreFlowTests(unittest.TestCase):
state = store.load_state("study-1")
self.assertEqual(state.next_trial_index, 1)
def test_cli_tune_uses_harness_stop_before_llm(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
study_path = _write_study_assets(tmp_path)
study = load_study_spec(study_path)
store_root = tmp_path / "store"
store = StudyStore(store_root)
store.init_study(spec_path=study_path, study=study)
store.save_state(
StudyState(
study_id=study.study_id,
best_trial_id="trial-0002",
best_parallel_size=8,
best_sampling_u=0.02,
best_request_rate=2.4,
best_request_rate_per_gpu=0.3,
next_trial_index=5,
trials=[
TrialSummary(
trial_id="trial-0001",
status="completed",
parallel_size=8,
best_request_rate=0.8,
best_request_rate_per_gpu=0.1,
config_patch={"env_patch": {}, "flag_patch": {}},
),
TrialSummary(
trial_id="trial-0002",
status="completed",
parallel_size=8,
best_request_rate=2.4,
best_request_rate_per_gpu=0.3,
config_patch={
"env_patch": {},
"flag_patch": {
"tensor-parallel-size": 2,
"data-parallel-size": 4,
},
},
),
TrialSummary(
trial_id="trial-0003",
status="completed",
parallel_size=8,
config_patch={
"env_patch": {},
"flag_patch": {
"tensor-parallel-size": 1,
"data-parallel-size": 8,
},
},
),
TrialSummary(
trial_id="trial-0004",
status="completed",
parallel_size=8,
config_patch={
"env_patch": {},
"flag_patch": {"max-num-seqs": 160},
},
),
],
)
)
with mock.patch("aituner.cli.call_llm_for_proposal") as llm_mock:
with mock.patch("aituner.cli.run_trial") as run_trial_mock:
exit_code = cli_main(
[
"study",
"tune",
"--spec",
str(study_path),
"--store-root",
str(store_root),
"--max-trials",
"1",
]
)
self.assertEqual(exit_code, 0)
llm_mock.assert_not_called()
run_trial_mock.assert_not_called()
proposal_path = (
store.study_root(study.study_id)
/ "proposals"
/ "harness-stop-0005.json"
)
self.assertTrue(proposal_path.exists())
proposal = json.loads(proposal_path.read_text(encoding="utf-8"))
self.assertTrue(proposal["should_stop"])
def test_cli_tune_evaluates_baseline_before_llm_proposal(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)