Veto repeated materialized configs

2026-06-26 22:15:47 +08:00
parent 825d3e03e9
commit 5080b50315
6 changed files with 383 additions and 66 deletions
--- a/tests/test_core_flow.py
+++ b/tests/test_core_flow.py
@@ -15,6 +15,7 @@ from unittest import mock

 from aituner.cli import main as cli_main
 from aituner.compare import _aggregate_summary, load_compare_spec, run_compare
+from aituner.config_signature import materialized_effective_config_signature
 from aituner.engine import build_launch_recipe
 from aituner.http_client import (
    HttpClientError,
@@ -382,14 +383,101 @@ class CoreFlowTests(unittest.TestCase):
                study,
                {"env_patch": {}, "flag_patch": {"tensor-parallel-size": 8}},
            )
+            noop_tp_string = _effective_config_signature(
+                study,
+                {"env_patch": {}, "flag_patch": {"tensor-parallel-size": "8"}},
+            )
            changed_tp = _effective_config_signature(
                study,
                {"env_patch": {}, "flag_patch": {"tensor-parallel-size": 4}},
            )

            self.assertEqual(baseline, noop_tp)
+            self.assertEqual(baseline, noop_tp_string)
            self.assertNotEqual(baseline, changed_tp)

+    def test_materialized_signature_inherits_incumbent_topology(self) -> None:
+        with tempfile.TemporaryDirectory() as tmp:
+            tmp_path = Path(tmp)
+            study_path = _write_study_assets(
+                tmp_path,
+                engine_overrides={
+                    "base_flags": {
+                        "host": "127.0.0.1",
+                        "port": 8000,
+                        "tensor-parallel-size": 4,
+                        "data-parallel-size": 2,
+                        "max-num-seqs": 64,
+                    },
+                    "tunable_flags": [
+                        "tensor-parallel-size",
+                        "data-parallel-size",
+                        "max-num-seqs",
+                    ],
+                    "topology_constraints": {
+                        "allowed_tensor_parallel_sizes": [1, 2, 4, 8],
+                        "allowed_data_parallel_sizes": [1, 2, 4, 8],
+                        "allowed_tp_dp_products": [1, 2, 4, 8],
+                    },
+                },
+            )
+            study = load_study_spec(study_path)
+            state = StudyState(
+                study_id=study.study_id,
+                best_trial_id="trial-0002",
+                best_parallel_size=8,
+                trials=[
+                    TrialSummary(
+                        trial_id="trial-0002",
+                        status="completed",
+                        parallel_size=8,
+                        config_patch={
+                            "env_patch": {},
+                            "flag_patch": {
+                                "tensor-parallel-size": 2,
+                                "data-parallel-size": 4,
+                                "max-num-seqs": 160,
+                            },
+                        },
+                    )
+                ],
+            )
+            runtime_only = Proposal.from_dict(
+                {
+                    "observation": "Try the same runtime cap.",
+                    "diagnosis": "This should materialize on incumbent topology.",
+                    "config_patch": {"env_patch": {}, "flag_patch": {"max-num-seqs": 160}},
+                    "expected_effects": ["no-op after topology inheritance"],
+                }
+            )
+            explicit = Proposal.from_dict(
+                {
+                    "observation": "Explicit duplicate.",
+                    "diagnosis": "Same effective execution config.",
+                    "config_patch": {
+                        "env_patch": {},
+                        "flag_patch": {
+                            "tensor-parallel-size": "2",
+                            "data-parallel-size": "4",
+                            "max-num-seqs": "160",
+                        },
+                    },
+                    "expected_effects": ["same config"],
+                }
+            )
+            self.assertEqual(
+                materialized_effective_config_signature(
+                    study=study,
+                    state=state,
+                    proposal=runtime_only,
+                ),
+                materialized_effective_config_signature(
+                    study=study,
+                    state=state,
+                    proposal=explicit,
+                ),
+            )
+
    def test_lca_workload_profile_uses_standard_10d_features(self) -> None:
        window = WindowRecord(
            window_id="w1",
@@ -6019,6 +6107,105 @@ class CoreFlowTests(unittest.TestCase):
            self.assertTrue(honored)
            self.assertEqual(honored[-1]["stop_authorized_by"], "llm_after_veto_budget")

+    def test_cli_tune_rejects_repeated_materialized_llm_config(self) -> None:
+        with tempfile.TemporaryDirectory() as tmp:
+            tmp_path = Path(tmp)
+            study_path = _write_study_assets(
+                tmp_path,
+                engine_overrides={
+                    "base_flags": {
+                        "host": "127.0.0.1",
+                        "port": 8000,
+                        "tensor-parallel-size": 4,
+                        "data-parallel-size": 2,
+                        "max-num-seqs": 64,
+                    },
+                    "tunable_flags": [
+                        "tensor-parallel-size",
+                        "data-parallel-size",
+                        "max-num-seqs",
+                    ],
+                    "topology_constraints": {
+                        "allowed_tensor_parallel_sizes": [1, 2, 4, 8],
+                        "allowed_data_parallel_sizes": [1, 2, 4, 8],
+                        "allowed_tp_dp_products": [1, 2, 4, 8],
+                    },
+                },
+            )
+            spec = json.loads(study_path.read_text(encoding="utf-8"))
+            spec["llm"]["use_harness"] = False
+            spec["llm"]["endpoint"] = {
+                "provider": "custom",
+                "base_url": "http://localhost:9/v1",
+                "model": "test-model",
+                "api_key_env": "AITUNER_TEST_KEY",
+            }
+            study_path.write_text(json.dumps(spec), encoding="utf-8")
+            study = load_study_spec(study_path)
+            store_root = tmp_path / "store"
+            store = StudyStore(store_root)
+            store.init_study(spec_path=study_path, study=study)
+            store.save_state(
+                StudyState(
+                    study_id=study.study_id,
+                    best_trial_id="trial-0002",
+                    best_parallel_size=8,
+                    best_sampling_u=0.125,
+                    best_request_rate=3.0,
+                    best_request_rate_per_gpu=0.375,
+                    next_trial_index=3,
+                    trials=[
+                        TrialSummary(
+                            trial_id="trial-0002",
+                            status="completed",
+                            parallel_size=8,
+                            best_sampling_u=0.125,
+                            best_request_rate=3.0,
+                            best_request_rate_per_gpu=0.375,
+                            config_patch={
+                                "env_patch": {},
+                                "flag_patch": {
+                                    "tensor-parallel-size": 2,
+                                    "data-parallel-size": 4,
+                                    "max-num-seqs": 160,
+                                },
+                            },
+                        )
+                    ],
+                )
+            )
+            repeated_runtime_patch = json.dumps(
+                {
+                    "observation": "Try the same runtime setting.",
+                    "diagnosis": "This is duplicate after topology inheritance.",
+                    "config_patch": {"env_patch": {}, "flag_patch": {"max-num-seqs": 160}},
+                    "expected_effects": ["should be vetoed"],
+                    "why_not_previous_failures": "",
+                    "should_stop": False,
+                }
+            )
+            stderr = io.StringIO()
+            with mock.patch("aituner.cli.run_trial") as run_trial_mock, mock.patch(
+                "aituner.cli.call_llm_for_proposal", return_value=repeated_runtime_patch
+            ), contextlib.redirect_stderr(stderr):
+                exit_code = cli_main(
+                    [
+                        "study",
+                        "tune",
+                        "--spec",
+                        str(study_path),
+                        "--store-root",
+                        str(store_root),
+                        "--skip-baseline",
+                        "--max-trials",
+                        "3",
+                    ]
+                )
+            self.assertEqual(exit_code, 2)
+            run_trial_mock.assert_not_called()
+            self.assertIn("repeats an already tested effective full config", stderr.getvalue())
+            self.assertIn("trial-0002", stderr.getvalue())
+
    def test_cli_tune_uses_harness_stop_before_llm(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            tmp_path = Path(tmp)