Add advisory harness attribution and descriptor planner MVP

This commit is contained in:
2026-06-30 12:05:03 +08:00
parent 08429e5da8
commit adb5356c4b
11 changed files with 1066 additions and 9 deletions

View File

@@ -7410,6 +7410,231 @@ class CoreFlowTests(unittest.TestCase):
(store.study_root(study.study_id) / "harness" / "candidate-set-0002.json").exists()
)
def test_cli_tune_records_advisory_llm_out_of_set_candidate_family_gap(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
study_path = _write_study_assets(tmp_path)
payload = json.loads(study_path.read_text(encoding="utf-8"))
payload["llm"]["endpoint"] = {
"provider": "custom",
"base_url": "http://llm.example/v1",
"wire_api": "chat.completions",
"model": "test-model",
"api_key_env": "OPENAI_API_KEY",
}
study_path.write_text(json.dumps(payload), encoding="utf-8")
study = load_study_spec(study_path)
store_root = tmp_path / "store"
store = StudyStore(store_root)
store.init_study(spec_path=study_path, study=study)
store.save_state(
StudyState(
study_id=study.study_id,
best_trial_id="trial-0001",
best_parallel_size=1,
best_sampling_u=0.25,
best_request_rate=1.0,
best_request_rate_per_gpu=1.0,
next_trial_index=2,
trials=[
TrialSummary(
trial_id="trial-0001",
status="completed",
parallel_size=1,
best_request_rate=1.0,
best_request_rate_per_gpu=1.0,
config_patch={
"env_patch": {},
"flag_patch": {"max-num-seqs": 8},
},
)
],
)
)
harness_context = {
"experiment_plan": {
"planner_version": "test",
"candidate_set": {
"candidate_set_hash": "candidate-set-test",
"eligible_candidates": [
{
"candidate_id": "cand-mns16",
"action_id": "coordinate_step:max-num-seqs:8->16",
"knob_family": "max-num-seqs",
"score": 0.8,
"effective_config_fingerprint": "not-the-llm-proposal",
"config_patch": {
"env_patch": {},
"flag_patch": {"max-num-seqs": 16},
},
}
],
"blocked_candidates": [],
},
"next_action": None,
}
}
llm_payload = json.dumps(
{
"observation": "Harness is in the right admission direction but too conservative.",
"diagnosis": "Try a larger same-operator admission step.",
"config_patch": {"env_patch": {}, "flag_patch": {"max-num-seqs": 24}},
"expected_effects": ["test whether admission capacity was underexplored"],
"why_not_previous_failures": "new value and no launch failure evidence",
"should_stop": False,
}
)
def fake_run_trial(trial_spec_path: Path) -> dict[str, object]:
trial_payload = json.loads(trial_spec_path.read_text(encoding="utf-8"))
trial_root = Path(trial_payload["artifact_dir"])
result = {
"study_id": trial_payload["study_id"],
"trial_id": trial_payload["trial_id"],
"status": "completed",
"best_sampling_u": 0.5,
"best_request_rate": 2.0,
"best_pass_rate": 1.0,
"best_request_count": 2,
"probes": [],
}
(trial_root / "result.json").write_text(json.dumps(result), encoding="utf-8")
return result
buffer = io.StringIO()
with mock.patch("aituner.cli.build_harness_context", return_value=harness_context):
with mock.patch("aituner.llm.build_harness_context", return_value=harness_context):
with mock.patch("aituner.cli.call_llm_for_proposal", return_value=llm_payload):
with mock.patch("aituner.cli.run_trial", side_effect=fake_run_trial):
with contextlib.redirect_stdout(buffer):
exit_code = cli_main(
[
"study",
"tune",
"--spec",
str(study_path),
"--store-root",
str(store_root),
"--skip-baseline",
"--max-trials",
"2",
"--proposal-policy",
"llm-first",
]
)
self.assertEqual(exit_code, 0)
summary = json.loads(buffer.getvalue())
executed = summary["executed_trials"]
self.assertEqual(executed[0]["proposal_origin"], "llm_out_of_set")
self.assertTrue(executed[0]["candidate_family_gap_path"])
attribution_path = (
store.study_root(study.study_id)
/ "proposal_attributions"
/ "proposal-0002.json"
)
attribution = json.loads(attribution_path.read_text(encoding="utf-8"))
self.assertEqual(attribution["proposal_origin"], "llm_out_of_set")
self.assertEqual(attribution["harness_candidate_policy"], "advisory")
gap_path = Path(executed[0]["candidate_family_gap_path"])
gap = json.loads(gap_path.read_text(encoding="utf-8"))
self.assertEqual(gap["gap_type"], "same_operator_new_step")
self.assertEqual(gap["review_status"], "pending")
self.assertEqual(gap["changed_knobs"], ["flag:max-num-seqs"])
self.assertEqual(gap["proposal_patch"]["flag_patch"]["max-num-seqs"], 24)
self.assertEqual(gap["nearest_harness_candidates"][0]["candidate_id"], "cand-mns16")
def test_cli_tune_strict_harness_policy_rejects_llm_out_of_set_proposal(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
study_path = _write_study_assets(tmp_path)
payload = json.loads(study_path.read_text(encoding="utf-8"))
payload["llm"]["harness_candidate_policy"] = "strict"
payload["llm"]["endpoint"] = {
"provider": "custom",
"base_url": "http://llm.example/v1",
"wire_api": "chat.completions",
"model": "test-model",
"api_key_env": "OPENAI_API_KEY",
}
study_path.write_text(json.dumps(payload), encoding="utf-8")
study = load_study_spec(study_path)
store_root = tmp_path / "store"
store = StudyStore(store_root)
store.init_study(spec_path=study_path, study=study)
store.save_state(
StudyState(
study_id=study.study_id,
best_trial_id="trial-0001",
best_parallel_size=1,
best_request_rate=1.0,
best_request_rate_per_gpu=1.0,
next_trial_index=2,
trials=[
TrialSummary(
trial_id="trial-0001",
status="completed",
parallel_size=1,
best_request_rate=1.0,
best_request_rate_per_gpu=1.0,
config_patch={"env_patch": {}, "flag_patch": {"max-num-seqs": 8}},
)
],
)
)
harness_context = {
"experiment_plan": {
"candidate_set": {
"candidate_set_hash": "candidate-set-test",
"eligible_candidates": [
{
"candidate_id": "cand-mns16",
"effective_config_fingerprint": "not-the-llm-proposal",
"config_patch": {
"env_patch": {},
"flag_patch": {"max-num-seqs": 16},
},
}
],
}
}
}
llm_payload = json.dumps(
{
"observation": "Try an out-of-set candidate.",
"diagnosis": "strict mode should reject this.",
"config_patch": {"env_patch": {}, "flag_patch": {"max-num-seqs": 24}},
"expected_effects": ["should not run"],
"why_not_previous_failures": "",
"should_stop": False,
}
)
stderr = io.StringIO()
with mock.patch("aituner.cli.build_harness_context", return_value=harness_context):
with mock.patch("aituner.llm.build_harness_context", return_value=harness_context):
with mock.patch("aituner.cli.call_llm_for_proposal", return_value=llm_payload):
with mock.patch("aituner.cli.run_trial") as run_trial_mock:
with contextlib.redirect_stderr(stderr):
exit_code = cli_main(
[
"study",
"tune",
"--spec",
str(study_path),
"--store-root",
str(store_root),
"--skip-baseline",
"--max-trials",
"2",
"--proposal-policy",
"llm-first",
]
)
self.assertEqual(exit_code, 2)
run_trial_mock.assert_not_called()
self.assertIn("llm.harness_candidate_policy=strict", stderr.getvalue())
def test_cli_tune_evaluates_baseline_before_llm_proposal(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)

View File

@@ -0,0 +1,68 @@
from __future__ import annotations
import unittest
from aituner.engine_adapters.vllm import default_vllm_descriptors
from aituner.knob_descriptor import KnobConstraints, KnobDescriptor
from aituner.mechanism_planner import coordinate_line_search_candidates
class MechanismPlannerTests(unittest.TestCase):
def test_coordinate_search_uses_mechanism_not_knob_name(self) -> None:
vllm_descriptor = default_vllm_descriptors(tunable_flags=("max-num-seqs",))[0]
sglang_descriptor = KnobDescriptor(
name="max-running-requests",
location="flag",
value_type="int",
mechanisms=("admission_capacity", "kv_memory_pressure"),
search_geometry="positive_capacity",
operators=("coordinate_line_search",),
constraints=KnobConstraints(min_value=1, integer=True, multiple_of=8),
directional_effects={
"increase": ("admission_capacity",),
"decrease": ("kv_memory_pressure",),
},
)
vllm_candidates = coordinate_line_search_candidates(
current_config={"max-num-seqs": 8},
descriptors=(vllm_descriptor,),
evidence_weights={"admission_capacity": 0.9},
)
sglang_candidates = coordinate_line_search_candidates(
current_config={"max-running-requests": 8},
descriptors=(sglang_descriptor,),
evidence_weights={"admission_capacity": 0.9},
)
self.assertEqual(vllm_candidates[0].patch, {"max-num-seqs": 16})
self.assertEqual(sglang_candidates[0].patch, {"max-running-requests": 16})
self.assertEqual(vllm_candidates[0].mechanism, "admission_capacity")
self.assertEqual(sglang_candidates[0].mechanism, "admission_capacity")
def test_positive_capacity_can_decrease_for_memory_pressure(self) -> None:
descriptor = default_vllm_descriptors(tunable_flags=("max-num-seqs",))[0]
candidates = coordinate_line_search_candidates(
current_config={"max-num-seqs": 64},
descriptors=(descriptor,),
evidence_weights={"kv_memory_pressure": 0.8},
)
self.assertEqual(candidates[0].direction, "decrease")
self.assertEqual(candidates[0].patch, {"max-num-seqs": 32})
def test_bounded_fraction_respects_constraints(self) -> None:
descriptor = default_vllm_descriptors(tunable_flags=("gpu-memory-utilization",))[0]
candidates = coordinate_line_search_candidates(
current_config={"gpu-memory-utilization": 0.98},
descriptors=(descriptor,),
evidence_weights={"kv_memory_capacity": 0.8},
)
self.assertEqual(candidates[0].patch, {"gpu-memory-utilization": 1.0})
if __name__ == "__main__":
unittest.main()