Add advisory harness attribution and descriptor planner MVP
This commit is contained in:
@@ -7410,6 +7410,231 @@ class CoreFlowTests(unittest.TestCase):
|
||||
(store.study_root(study.study_id) / "harness" / "candidate-set-0002.json").exists()
|
||||
)
|
||||
|
||||
def test_cli_tune_records_advisory_llm_out_of_set_candidate_family_gap(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
study_path = _write_study_assets(tmp_path)
|
||||
payload = json.loads(study_path.read_text(encoding="utf-8"))
|
||||
payload["llm"]["endpoint"] = {
|
||||
"provider": "custom",
|
||||
"base_url": "http://llm.example/v1",
|
||||
"wire_api": "chat.completions",
|
||||
"model": "test-model",
|
||||
"api_key_env": "OPENAI_API_KEY",
|
||||
}
|
||||
study_path.write_text(json.dumps(payload), encoding="utf-8")
|
||||
study = load_study_spec(study_path)
|
||||
store_root = tmp_path / "store"
|
||||
store = StudyStore(store_root)
|
||||
store.init_study(spec_path=study_path, study=study)
|
||||
store.save_state(
|
||||
StudyState(
|
||||
study_id=study.study_id,
|
||||
best_trial_id="trial-0001",
|
||||
best_parallel_size=1,
|
||||
best_sampling_u=0.25,
|
||||
best_request_rate=1.0,
|
||||
best_request_rate_per_gpu=1.0,
|
||||
next_trial_index=2,
|
||||
trials=[
|
||||
TrialSummary(
|
||||
trial_id="trial-0001",
|
||||
status="completed",
|
||||
parallel_size=1,
|
||||
best_request_rate=1.0,
|
||||
best_request_rate_per_gpu=1.0,
|
||||
config_patch={
|
||||
"env_patch": {},
|
||||
"flag_patch": {"max-num-seqs": 8},
|
||||
},
|
||||
)
|
||||
],
|
||||
)
|
||||
)
|
||||
harness_context = {
|
||||
"experiment_plan": {
|
||||
"planner_version": "test",
|
||||
"candidate_set": {
|
||||
"candidate_set_hash": "candidate-set-test",
|
||||
"eligible_candidates": [
|
||||
{
|
||||
"candidate_id": "cand-mns16",
|
||||
"action_id": "coordinate_step:max-num-seqs:8->16",
|
||||
"knob_family": "max-num-seqs",
|
||||
"score": 0.8,
|
||||
"effective_config_fingerprint": "not-the-llm-proposal",
|
||||
"config_patch": {
|
||||
"env_patch": {},
|
||||
"flag_patch": {"max-num-seqs": 16},
|
||||
},
|
||||
}
|
||||
],
|
||||
"blocked_candidates": [],
|
||||
},
|
||||
"next_action": None,
|
||||
}
|
||||
}
|
||||
llm_payload = json.dumps(
|
||||
{
|
||||
"observation": "Harness is in the right admission direction but too conservative.",
|
||||
"diagnosis": "Try a larger same-operator admission step.",
|
||||
"config_patch": {"env_patch": {}, "flag_patch": {"max-num-seqs": 24}},
|
||||
"expected_effects": ["test whether admission capacity was underexplored"],
|
||||
"why_not_previous_failures": "new value and no launch failure evidence",
|
||||
"should_stop": False,
|
||||
}
|
||||
)
|
||||
|
||||
def fake_run_trial(trial_spec_path: Path) -> dict[str, object]:
|
||||
trial_payload = json.loads(trial_spec_path.read_text(encoding="utf-8"))
|
||||
trial_root = Path(trial_payload["artifact_dir"])
|
||||
result = {
|
||||
"study_id": trial_payload["study_id"],
|
||||
"trial_id": trial_payload["trial_id"],
|
||||
"status": "completed",
|
||||
"best_sampling_u": 0.5,
|
||||
"best_request_rate": 2.0,
|
||||
"best_pass_rate": 1.0,
|
||||
"best_request_count": 2,
|
||||
"probes": [],
|
||||
}
|
||||
(trial_root / "result.json").write_text(json.dumps(result), encoding="utf-8")
|
||||
return result
|
||||
|
||||
buffer = io.StringIO()
|
||||
with mock.patch("aituner.cli.build_harness_context", return_value=harness_context):
|
||||
with mock.patch("aituner.llm.build_harness_context", return_value=harness_context):
|
||||
with mock.patch("aituner.cli.call_llm_for_proposal", return_value=llm_payload):
|
||||
with mock.patch("aituner.cli.run_trial", side_effect=fake_run_trial):
|
||||
with contextlib.redirect_stdout(buffer):
|
||||
exit_code = cli_main(
|
||||
[
|
||||
"study",
|
||||
"tune",
|
||||
"--spec",
|
||||
str(study_path),
|
||||
"--store-root",
|
||||
str(store_root),
|
||||
"--skip-baseline",
|
||||
"--max-trials",
|
||||
"2",
|
||||
"--proposal-policy",
|
||||
"llm-first",
|
||||
]
|
||||
)
|
||||
|
||||
self.assertEqual(exit_code, 0)
|
||||
summary = json.loads(buffer.getvalue())
|
||||
executed = summary["executed_trials"]
|
||||
self.assertEqual(executed[0]["proposal_origin"], "llm_out_of_set")
|
||||
self.assertTrue(executed[0]["candidate_family_gap_path"])
|
||||
attribution_path = (
|
||||
store.study_root(study.study_id)
|
||||
/ "proposal_attributions"
|
||||
/ "proposal-0002.json"
|
||||
)
|
||||
attribution = json.loads(attribution_path.read_text(encoding="utf-8"))
|
||||
self.assertEqual(attribution["proposal_origin"], "llm_out_of_set")
|
||||
self.assertEqual(attribution["harness_candidate_policy"], "advisory")
|
||||
gap_path = Path(executed[0]["candidate_family_gap_path"])
|
||||
gap = json.loads(gap_path.read_text(encoding="utf-8"))
|
||||
self.assertEqual(gap["gap_type"], "same_operator_new_step")
|
||||
self.assertEqual(gap["review_status"], "pending")
|
||||
self.assertEqual(gap["changed_knobs"], ["flag:max-num-seqs"])
|
||||
self.assertEqual(gap["proposal_patch"]["flag_patch"]["max-num-seqs"], 24)
|
||||
self.assertEqual(gap["nearest_harness_candidates"][0]["candidate_id"], "cand-mns16")
|
||||
|
||||
def test_cli_tune_strict_harness_policy_rejects_llm_out_of_set_proposal(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
study_path = _write_study_assets(tmp_path)
|
||||
payload = json.loads(study_path.read_text(encoding="utf-8"))
|
||||
payload["llm"]["harness_candidate_policy"] = "strict"
|
||||
payload["llm"]["endpoint"] = {
|
||||
"provider": "custom",
|
||||
"base_url": "http://llm.example/v1",
|
||||
"wire_api": "chat.completions",
|
||||
"model": "test-model",
|
||||
"api_key_env": "OPENAI_API_KEY",
|
||||
}
|
||||
study_path.write_text(json.dumps(payload), encoding="utf-8")
|
||||
study = load_study_spec(study_path)
|
||||
store_root = tmp_path / "store"
|
||||
store = StudyStore(store_root)
|
||||
store.init_study(spec_path=study_path, study=study)
|
||||
store.save_state(
|
||||
StudyState(
|
||||
study_id=study.study_id,
|
||||
best_trial_id="trial-0001",
|
||||
best_parallel_size=1,
|
||||
best_request_rate=1.0,
|
||||
best_request_rate_per_gpu=1.0,
|
||||
next_trial_index=2,
|
||||
trials=[
|
||||
TrialSummary(
|
||||
trial_id="trial-0001",
|
||||
status="completed",
|
||||
parallel_size=1,
|
||||
best_request_rate=1.0,
|
||||
best_request_rate_per_gpu=1.0,
|
||||
config_patch={"env_patch": {}, "flag_patch": {"max-num-seqs": 8}},
|
||||
)
|
||||
],
|
||||
)
|
||||
)
|
||||
harness_context = {
|
||||
"experiment_plan": {
|
||||
"candidate_set": {
|
||||
"candidate_set_hash": "candidate-set-test",
|
||||
"eligible_candidates": [
|
||||
{
|
||||
"candidate_id": "cand-mns16",
|
||||
"effective_config_fingerprint": "not-the-llm-proposal",
|
||||
"config_patch": {
|
||||
"env_patch": {},
|
||||
"flag_patch": {"max-num-seqs": 16},
|
||||
},
|
||||
}
|
||||
],
|
||||
}
|
||||
}
|
||||
}
|
||||
llm_payload = json.dumps(
|
||||
{
|
||||
"observation": "Try an out-of-set candidate.",
|
||||
"diagnosis": "strict mode should reject this.",
|
||||
"config_patch": {"env_patch": {}, "flag_patch": {"max-num-seqs": 24}},
|
||||
"expected_effects": ["should not run"],
|
||||
"why_not_previous_failures": "",
|
||||
"should_stop": False,
|
||||
}
|
||||
)
|
||||
stderr = io.StringIO()
|
||||
with mock.patch("aituner.cli.build_harness_context", return_value=harness_context):
|
||||
with mock.patch("aituner.llm.build_harness_context", return_value=harness_context):
|
||||
with mock.patch("aituner.cli.call_llm_for_proposal", return_value=llm_payload):
|
||||
with mock.patch("aituner.cli.run_trial") as run_trial_mock:
|
||||
with contextlib.redirect_stderr(stderr):
|
||||
exit_code = cli_main(
|
||||
[
|
||||
"study",
|
||||
"tune",
|
||||
"--spec",
|
||||
str(study_path),
|
||||
"--store-root",
|
||||
str(store_root),
|
||||
"--skip-baseline",
|
||||
"--max-trials",
|
||||
"2",
|
||||
"--proposal-policy",
|
||||
"llm-first",
|
||||
]
|
||||
)
|
||||
|
||||
self.assertEqual(exit_code, 2)
|
||||
run_trial_mock.assert_not_called()
|
||||
self.assertIn("llm.harness_candidate_policy=strict", stderr.getvalue())
|
||||
|
||||
def test_cli_tune_evaluates_baseline_before_llm_proposal(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
|
||||
68
tests/test_mechanism_planner.py
Normal file
68
tests/test_mechanism_planner.py
Normal file
@@ -0,0 +1,68 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import unittest
|
||||
|
||||
from aituner.engine_adapters.vllm import default_vllm_descriptors
|
||||
from aituner.knob_descriptor import KnobConstraints, KnobDescriptor
|
||||
from aituner.mechanism_planner import coordinate_line_search_candidates
|
||||
|
||||
|
||||
class MechanismPlannerTests(unittest.TestCase):
|
||||
def test_coordinate_search_uses_mechanism_not_knob_name(self) -> None:
|
||||
vllm_descriptor = default_vllm_descriptors(tunable_flags=("max-num-seqs",))[0]
|
||||
sglang_descriptor = KnobDescriptor(
|
||||
name="max-running-requests",
|
||||
location="flag",
|
||||
value_type="int",
|
||||
mechanisms=("admission_capacity", "kv_memory_pressure"),
|
||||
search_geometry="positive_capacity",
|
||||
operators=("coordinate_line_search",),
|
||||
constraints=KnobConstraints(min_value=1, integer=True, multiple_of=8),
|
||||
directional_effects={
|
||||
"increase": ("admission_capacity",),
|
||||
"decrease": ("kv_memory_pressure",),
|
||||
},
|
||||
)
|
||||
|
||||
vllm_candidates = coordinate_line_search_candidates(
|
||||
current_config={"max-num-seqs": 8},
|
||||
descriptors=(vllm_descriptor,),
|
||||
evidence_weights={"admission_capacity": 0.9},
|
||||
)
|
||||
sglang_candidates = coordinate_line_search_candidates(
|
||||
current_config={"max-running-requests": 8},
|
||||
descriptors=(sglang_descriptor,),
|
||||
evidence_weights={"admission_capacity": 0.9},
|
||||
)
|
||||
|
||||
self.assertEqual(vllm_candidates[0].patch, {"max-num-seqs": 16})
|
||||
self.assertEqual(sglang_candidates[0].patch, {"max-running-requests": 16})
|
||||
self.assertEqual(vllm_candidates[0].mechanism, "admission_capacity")
|
||||
self.assertEqual(sglang_candidates[0].mechanism, "admission_capacity")
|
||||
|
||||
def test_positive_capacity_can_decrease_for_memory_pressure(self) -> None:
|
||||
descriptor = default_vllm_descriptors(tunable_flags=("max-num-seqs",))[0]
|
||||
|
||||
candidates = coordinate_line_search_candidates(
|
||||
current_config={"max-num-seqs": 64},
|
||||
descriptors=(descriptor,),
|
||||
evidence_weights={"kv_memory_pressure": 0.8},
|
||||
)
|
||||
|
||||
self.assertEqual(candidates[0].direction, "decrease")
|
||||
self.assertEqual(candidates[0].patch, {"max-num-seqs": 32})
|
||||
|
||||
def test_bounded_fraction_respects_constraints(self) -> None:
|
||||
descriptor = default_vllm_descriptors(tunable_flags=("gpu-memory-utilization",))[0]
|
||||
|
||||
candidates = coordinate_line_search_candidates(
|
||||
current_config={"gpu-memory-utilization": 0.98},
|
||||
descriptors=(descriptor,),
|
||||
evidence_weights={"kv_memory_capacity": 0.8},
|
||||
)
|
||||
|
||||
self.assertEqual(candidates[0].patch, {"gpu-memory-utilization": 1.0})
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user