Veto repeated materialized configs
This commit is contained in:
@@ -15,6 +15,7 @@ from unittest import mock
|
||||
|
||||
from aituner.cli import main as cli_main
|
||||
from aituner.compare import _aggregate_summary, load_compare_spec, run_compare
|
||||
from aituner.config_signature import materialized_effective_config_signature
|
||||
from aituner.engine import build_launch_recipe
|
||||
from aituner.http_client import (
|
||||
HttpClientError,
|
||||
@@ -382,14 +383,101 @@ class CoreFlowTests(unittest.TestCase):
|
||||
study,
|
||||
{"env_patch": {}, "flag_patch": {"tensor-parallel-size": 8}},
|
||||
)
|
||||
noop_tp_string = _effective_config_signature(
|
||||
study,
|
||||
{"env_patch": {}, "flag_patch": {"tensor-parallel-size": "8"}},
|
||||
)
|
||||
changed_tp = _effective_config_signature(
|
||||
study,
|
||||
{"env_patch": {}, "flag_patch": {"tensor-parallel-size": 4}},
|
||||
)
|
||||
|
||||
self.assertEqual(baseline, noop_tp)
|
||||
self.assertEqual(baseline, noop_tp_string)
|
||||
self.assertNotEqual(baseline, changed_tp)
|
||||
|
||||
def test_materialized_signature_inherits_incumbent_topology(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
study_path = _write_study_assets(
|
||||
tmp_path,
|
||||
engine_overrides={
|
||||
"base_flags": {
|
||||
"host": "127.0.0.1",
|
||||
"port": 8000,
|
||||
"tensor-parallel-size": 4,
|
||||
"data-parallel-size": 2,
|
||||
"max-num-seqs": 64,
|
||||
},
|
||||
"tunable_flags": [
|
||||
"tensor-parallel-size",
|
||||
"data-parallel-size",
|
||||
"max-num-seqs",
|
||||
],
|
||||
"topology_constraints": {
|
||||
"allowed_tensor_parallel_sizes": [1, 2, 4, 8],
|
||||
"allowed_data_parallel_sizes": [1, 2, 4, 8],
|
||||
"allowed_tp_dp_products": [1, 2, 4, 8],
|
||||
},
|
||||
},
|
||||
)
|
||||
study = load_study_spec(study_path)
|
||||
state = StudyState(
|
||||
study_id=study.study_id,
|
||||
best_trial_id="trial-0002",
|
||||
best_parallel_size=8,
|
||||
trials=[
|
||||
TrialSummary(
|
||||
trial_id="trial-0002",
|
||||
status="completed",
|
||||
parallel_size=8,
|
||||
config_patch={
|
||||
"env_patch": {},
|
||||
"flag_patch": {
|
||||
"tensor-parallel-size": 2,
|
||||
"data-parallel-size": 4,
|
||||
"max-num-seqs": 160,
|
||||
},
|
||||
},
|
||||
)
|
||||
],
|
||||
)
|
||||
runtime_only = Proposal.from_dict(
|
||||
{
|
||||
"observation": "Try the same runtime cap.",
|
||||
"diagnosis": "This should materialize on incumbent topology.",
|
||||
"config_patch": {"env_patch": {}, "flag_patch": {"max-num-seqs": 160}},
|
||||
"expected_effects": ["no-op after topology inheritance"],
|
||||
}
|
||||
)
|
||||
explicit = Proposal.from_dict(
|
||||
{
|
||||
"observation": "Explicit duplicate.",
|
||||
"diagnosis": "Same effective execution config.",
|
||||
"config_patch": {
|
||||
"env_patch": {},
|
||||
"flag_patch": {
|
||||
"tensor-parallel-size": "2",
|
||||
"data-parallel-size": "4",
|
||||
"max-num-seqs": "160",
|
||||
},
|
||||
},
|
||||
"expected_effects": ["same config"],
|
||||
}
|
||||
)
|
||||
self.assertEqual(
|
||||
materialized_effective_config_signature(
|
||||
study=study,
|
||||
state=state,
|
||||
proposal=runtime_only,
|
||||
),
|
||||
materialized_effective_config_signature(
|
||||
study=study,
|
||||
state=state,
|
||||
proposal=explicit,
|
||||
),
|
||||
)
|
||||
|
||||
def test_lca_workload_profile_uses_standard_10d_features(self) -> None:
|
||||
window = WindowRecord(
|
||||
window_id="w1",
|
||||
@@ -6019,6 +6107,105 @@ class CoreFlowTests(unittest.TestCase):
|
||||
self.assertTrue(honored)
|
||||
self.assertEqual(honored[-1]["stop_authorized_by"], "llm_after_veto_budget")
|
||||
|
||||
def test_cli_tune_rejects_repeated_materialized_llm_config(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
study_path = _write_study_assets(
|
||||
tmp_path,
|
||||
engine_overrides={
|
||||
"base_flags": {
|
||||
"host": "127.0.0.1",
|
||||
"port": 8000,
|
||||
"tensor-parallel-size": 4,
|
||||
"data-parallel-size": 2,
|
||||
"max-num-seqs": 64,
|
||||
},
|
||||
"tunable_flags": [
|
||||
"tensor-parallel-size",
|
||||
"data-parallel-size",
|
||||
"max-num-seqs",
|
||||
],
|
||||
"topology_constraints": {
|
||||
"allowed_tensor_parallel_sizes": [1, 2, 4, 8],
|
||||
"allowed_data_parallel_sizes": [1, 2, 4, 8],
|
||||
"allowed_tp_dp_products": [1, 2, 4, 8],
|
||||
},
|
||||
},
|
||||
)
|
||||
spec = json.loads(study_path.read_text(encoding="utf-8"))
|
||||
spec["llm"]["use_harness"] = False
|
||||
spec["llm"]["endpoint"] = {
|
||||
"provider": "custom",
|
||||
"base_url": "http://localhost:9/v1",
|
||||
"model": "test-model",
|
||||
"api_key_env": "AITUNER_TEST_KEY",
|
||||
}
|
||||
study_path.write_text(json.dumps(spec), encoding="utf-8")
|
||||
study = load_study_spec(study_path)
|
||||
store_root = tmp_path / "store"
|
||||
store = StudyStore(store_root)
|
||||
store.init_study(spec_path=study_path, study=study)
|
||||
store.save_state(
|
||||
StudyState(
|
||||
study_id=study.study_id,
|
||||
best_trial_id="trial-0002",
|
||||
best_parallel_size=8,
|
||||
best_sampling_u=0.125,
|
||||
best_request_rate=3.0,
|
||||
best_request_rate_per_gpu=0.375,
|
||||
next_trial_index=3,
|
||||
trials=[
|
||||
TrialSummary(
|
||||
trial_id="trial-0002",
|
||||
status="completed",
|
||||
parallel_size=8,
|
||||
best_sampling_u=0.125,
|
||||
best_request_rate=3.0,
|
||||
best_request_rate_per_gpu=0.375,
|
||||
config_patch={
|
||||
"env_patch": {},
|
||||
"flag_patch": {
|
||||
"tensor-parallel-size": 2,
|
||||
"data-parallel-size": 4,
|
||||
"max-num-seqs": 160,
|
||||
},
|
||||
},
|
||||
)
|
||||
],
|
||||
)
|
||||
)
|
||||
repeated_runtime_patch = json.dumps(
|
||||
{
|
||||
"observation": "Try the same runtime setting.",
|
||||
"diagnosis": "This is duplicate after topology inheritance.",
|
||||
"config_patch": {"env_patch": {}, "flag_patch": {"max-num-seqs": 160}},
|
||||
"expected_effects": ["should be vetoed"],
|
||||
"why_not_previous_failures": "",
|
||||
"should_stop": False,
|
||||
}
|
||||
)
|
||||
stderr = io.StringIO()
|
||||
with mock.patch("aituner.cli.run_trial") as run_trial_mock, mock.patch(
|
||||
"aituner.cli.call_llm_for_proposal", return_value=repeated_runtime_patch
|
||||
), contextlib.redirect_stderr(stderr):
|
||||
exit_code = cli_main(
|
||||
[
|
||||
"study",
|
||||
"tune",
|
||||
"--spec",
|
||||
str(study_path),
|
||||
"--store-root",
|
||||
str(store_root),
|
||||
"--skip-baseline",
|
||||
"--max-trials",
|
||||
"3",
|
||||
]
|
||||
)
|
||||
self.assertEqual(exit_code, 2)
|
||||
run_trial_mock.assert_not_called()
|
||||
self.assertIn("repeats an already tested effective full config", stderr.getvalue())
|
||||
self.assertIn("trial-0002", stderr.getvalue())
|
||||
|
||||
def test_cli_tune_uses_harness_stop_before_llm(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
|
||||
Reference in New Issue
Block a user